diff --git a/.ci/gitlab/config.yml b/.ci/gitlab/config.yml
index 122e5deaa5d79e515892674515fb6f9b4901e1a6..719f9f299ba78792f5ad6cd7dcb1be7a6bcc27a3 100644
--- a/.ci/gitlab/config.yml
+++ b/.ci/gitlab/config.yml
@@ -106,6 +106,8 @@ common unstable_gcc_full cpp:
     variables:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: common
+    tags:
+        - dustin
     stage: cpp
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_cpp.bash
@@ -115,6 +117,8 @@ common unstable_gcc_full python:
     variables:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: common
+    tags:
+        - dustin
     stage: python
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_python.bash
@@ -124,6 +128,8 @@ common unstable_gcc_full headercheck:
     variables:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: common
+    tags:
+        - dustin
     stage: headercheck
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_headercheck.bash
@@ -133,6 +139,8 @@ grid unstable_gcc_full cpp:
     variables:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: grid
+    tags:
+        - dustin
     stage: cpp
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_cpp.bash
@@ -142,6 +150,8 @@ grid unstable_gcc_full python:
     variables:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: grid
+    tags:
+        - dustin
     stage: python
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_python.bash
@@ -151,6 +161,8 @@ grid unstable_gcc_full headercheck:
     variables:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: grid
+    tags:
+        - dustin
     stage: headercheck
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_headercheck.bash
@@ -161,7 +173,7 @@ functions unstable_gcc_full cpp:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: functions
     tags:
-        - amm-only
+        - dustin
     stage: cpp
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_cpp.bash
@@ -172,7 +184,7 @@ functions unstable_gcc_full python:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: functions
     tags:
-        - amm-only
+        - dustin
     stage: python
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_python.bash
@@ -183,7 +195,7 @@ functions unstable_gcc_full headercheck:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: functions
     tags:
-        - amm-only
+        - dustin
     stage: headercheck
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_headercheck.bash
@@ -194,7 +206,7 @@ la unstable_gcc_full cpp:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: la
     tags:
-        - amm-only
+        - dustin
     stage: cpp
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_cpp.bash
@@ -205,7 +217,7 @@ la unstable_gcc_full python:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: la
     tags:
-        - amm-only
+        - dustin
     stage: python
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_python.bash
@@ -216,7 +228,7 @@ la unstable_gcc_full headercheck:
         CI_IMAGE: debian-unstable_gcc_full
         TESTS_MODULE_SUBDIR: la
     tags:
-        - amm-only
+        - dustin
     stage: headercheck
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_headercheck.bash
@@ -226,6 +238,8 @@ common gcc_full cpp:
     variables:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: common
+    tags:
+        - dustin
     stage: cpp
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_cpp.bash
@@ -235,6 +249,8 @@ common gcc_full python:
     variables:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: common
+    tags:
+        - dustin
     stage: python
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_python.bash
@@ -244,6 +260,8 @@ common gcc_full headercheck:
     variables:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: common
+    tags:
+        - dustin
     stage: headercheck
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_headercheck.bash
@@ -253,6 +271,8 @@ grid gcc_full cpp:
     variables:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: grid
+    tags:
+        - dustin
     stage: cpp
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_cpp.bash
@@ -262,6 +282,8 @@ grid gcc_full python:
     variables:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: grid
+    tags:
+        - dustin
     stage: python
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_python.bash
@@ -271,6 +293,8 @@ grid gcc_full headercheck:
     variables:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: grid
+    tags:
+        - dustin
     stage: headercheck
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_headercheck.bash
@@ -281,7 +305,7 @@ functions gcc_full cpp:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: functions
     tags:
-        - amm-only
+        - dustin
     stage: cpp
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_cpp.bash
@@ -292,7 +316,7 @@ functions gcc_full python:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: functions
     tags:
-        - amm-only
+        - dustin
     stage: python
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_python.bash
@@ -303,7 +327,7 @@ functions gcc_full headercheck:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: functions
     tags:
-        - amm-only
+        - dustin
     stage: headercheck
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_headercheck.bash
@@ -314,7 +338,7 @@ la gcc_full cpp:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: la
     tags:
-        - amm-only
+        - dustin
     stage: cpp
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_cpp.bash
@@ -325,7 +349,7 @@ la gcc_full python:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: la
     tags:
-        - amm-only
+        - dustin
     stage: python
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_python.bash
@@ -336,7 +360,7 @@ la gcc_full headercheck:
         CI_IMAGE: debian_gcc_full
         TESTS_MODULE_SUBDIR: la
     tags:
-        - amm-only
+        - dustin
     stage: headercheck
     script:
           - /home/dune-ci/src/${MY_MODULE}/.ci/shared/scripts/test_headercheck.bash
diff --git a/.ci/gitlab/config_template.py b/.ci/gitlab/config_template.py
index 1f50071f915c4ec74333d27d6abcbafb98f37e29..5e296e62a0aebf871d7c7feb6a47d6bf87a22fc6 100755
--- a/.ci/gitlab/config_template.py
+++ b/.ci/gitlab/config_template.py
@@ -102,9 +102,9 @@ variables:
     variables:
         CI_IMAGE: {{ image }}
         TESTS_MODULE_SUBDIR: {{ subdir }}
-    {%- if subdir in ['functions', 'la'] and 'gcc' in image %}
+    {%- if 'gcc' in image %}
     tags:
-        - amm-only
+        - dustin
     {%- endif %}
     stage: {{kind}}
     script:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 734422c434f6086b9f5a07e6332015bcd4a3c0cd..4dbceb013b33da356012815e4b1ab82292f914e1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,7 @@ set(ENV{LD_LIBRARY_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/../local/lib:$ENV{LD_LIBRA
 set(ENV{PKG_CONFIG_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/../local/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}")
 
 # cmake specific
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.8)
 
 set(CMAKE_COLOR_MAKEFILE ON)
 
diff --git a/cmake/modules/DunePybindxiMacros.cmake b/cmake/modules/DunePybindxiMacros.cmake
index f556ce8873c89bf3130538ec7a45689a7ee19233..d170be55c890a1b8c2949f630487d1884e3acb84 100644
--- a/cmake/modules/DunePybindxiMacros.cmake
+++ b/cmake/modules/DunePybindxiMacros.cmake
@@ -23,10 +23,13 @@ set(PYTHON_MODULE_EXTENSION ${PYTHON_MODULE_EXTENSION} CACHE INTERNAL "")
 
 include(CheckCXXCompilerFlag)
 if(NOT MSVC AND NOT DUNE_PYBINDXI_CPP_STANDARD)
+  check_cxx_compiler_flag("-std=c++17" HAS_CPP17_FLAG)
   check_cxx_compiler_flag("-std=c++14" HAS_CPP14_FLAG)
   check_cxx_compiler_flag("-std=c++11" HAS_CPP11_FLAG)
 
-  if(HAS_CPP14_FLAG)
+  if(HAS_CPP17_FLAG)
+    set(DUNE_PYBINDXI_CPP_STANDARD -std=c++17)
+  elseif(HAS_CPP14_FLAG)
     set(DUNE_PYBINDXI_CPP_STANDARD -std=c++14)
   elseif(HAS_CPP11_FLAG)
     set(DUNE_PYBINDXI_CPP_STANDARD -std=c++11)
diff --git a/cmake/modules/FindClangFormat.cmake b/cmake/modules/FindClangFormat.cmake
index 033f2e28a60b23e7c3cf47743b5946d01d109599..52892d290ea48d52d6f7ffc391c9d7c53a06ce21 100644
--- a/cmake/modules/FindClangFormat.cmake
+++ b/cmake/modules/FindClangFormat.cmake
@@ -21,7 +21,7 @@
 #   and ClangFormat_FOUND accordingly
 # ~~~
 
-find_program(ClangFormat_EXECUTABLE NAMES clang-format-6 clang-format-6.0)
+find_program(ClangFormat_EXECUTABLE NAMES clang-format-8 clang-format-8.0)
 if(NOT EXISTS ${ClangFormat_EXECUTABLE})
   find_program(ClangFormat_EXECUTABLE NAMES clang-format)
 endif(NOT EXISTS ${ClangFormat_EXECUTABLE})
diff --git a/cmake/modules/FindClangTidy.cmake b/cmake/modules/FindClangTidy.cmake
index 34988560f110617e09506e66bc0a31885b8de432..46d34df02d47d3d95f021b09dcf60de0aaf5fb7f 100644
--- a/cmake/modules/FindClangTidy.cmake
+++ b/cmake/modules/FindClangTidy.cmake
@@ -23,11 +23,13 @@
 
 find_program(ClangTidy_EXECUTABLE
              NAMES clang-tidy
-                   clang-tidy-3.6
-                   clang-tidy-3.7
-                   clang-tidy-3.8
-                   clang-tidy-3.9
-                   clang-tidy-4.0)
+                   clang-tidy-8
+                   clang-tidy-9
+                   clang-tidy-10
+                   clang-tidy-11
+                   clang-tidy-12
+                   clang-tidy-13
+                   clang-tidy-14)
 if(EXISTS ${ClangTidy_EXECUTABLE})
   execute_process(COMMAND ${ClangTidy_EXECUTABLE} -version OUTPUT_VARIABLE clang_out)
   string(REGEX
@@ -37,5 +39,8 @@ if(EXISTS ${ClangTidy_EXECUTABLE})
                  ${clang_out})
 endif()
 
+find_program(RunTidy_EXECUTABLE
+NAMES run-clang-tidy-${ClangTidy_VERSION})
+
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(ClangTidy REQUIRED_VARS ClangTidy_EXECUTABLE VERSION_VAR ClangTidy_VERSION)
diff --git a/cmake/modules/XtTooling.cmake b/cmake/modules/XtTooling.cmake
index dc64ada5eeab7588a324914d1aa98cc6df5aa9fa..66bbd8af87835683f1c6eba477496a5f7a7ba43e 100644
--- a/cmake/modules/XtTooling.cmake
+++ b/cmake/modules/XtTooling.cmake
@@ -34,14 +34,18 @@ macro(add_analyze)
   endif(EXISTS ${ANALYZER})
 endmacro(add_analyze)
 
-find_package(ClangFormat 6 EXACT)
+find_package(ClangFormat 8 EXACT)
 macro(add_format glob_dir)
-  if(${ARGC} GREATER 1)
-    message(WARNING "'add_format' API has changed. Please provide a single "
-                    "search directory instead of multiple filenames")
-  endif()
   if(NOT TARGET format)
-    add_custom_target(format)
+    if(NOT ClangFormat_FOUND)
+      message(WARNING "clang-format not found, not adding format target")
+    else()
+      add_custom_target(format)
+    endif()
+  else()
+    if(NOT ClangFormat_FOUND)
+      message(FATAL "clang-format not found but format target already exists")
+    endif()
   endif(NOT TARGET format)
   string(REPLACE "/"
                  "_"
@@ -89,20 +93,14 @@ macro(add_format glob_dir)
   add_dependencies(format "format_${fn}_cmake")
 endmacro(add_format)
 
-find_package(ClangTidy 3.7)
+find_package(ClangTidy 8)
 macro(add_tidy glob_dir)
   if(ClangTidy_FOUND)
+    dune_symlink_to_source_files(FILES .clang-tidy)
     message(STATUS "adding tidy target")
-    if(NOT TARGET tidy)
-      add_custom_target(tidy)
-    endif(NOT TARGET tidy)
-    string(REPLACE "/"
-                   "_"
-                   fn
-                   ${glob_dir})
-    file(GLOB_RECURSE _files "${glob_dir}/*.cc" "${glob_dir}/*.c")
-    add_custom_target("tidy_${fn}" ${ClangTidy_EXECUTABLE} -p=${CMAKE_CURRENT_BINARY_DIR} ${_files})
-    add_dependencies(tidy "tidy_${fn}")
+    set(TIDY_ARGS -config= -style=file -p=${CMAKE_CURRENT_BINARY_DIR} -j ${DXT_TEST_PROCS})
+    add_custom_target("tidy" ${RunTidy_EXECUTABLE} ${TIDY_ARGS} -export-fixes=${CMAKE_CURRENT_BINARY_DIR}/clang-tidy.fixes)
+    add_custom_target("fix_tidy" ${RunTidy_EXECUTABLE} ${TIDY_ARGS} -fix)
   else()
     message(WARNING "not adding tidy target because clang-tidy is missing or"
                     "wrong version: ${ClangTidy_EXECUTABLE} ${ClangTidy_VERSION}")
diff --git a/config.h.cmake b/config.h.cmake
index 390c479bea4ab3aa45987bf608760c7b81be644b..918e1b75ae1bca45e58dca161f745bde7ea0a8ac 100644
--- a/config.h.cmake
+++ b/config.h.cmake
@@ -12,6 +12,9 @@
 /* begin dune-xt */
 // NEVER delete/alter above comment, dune's cmake relies on it
 
+// this precludes clang-tidy and co. from issuing warnings for this file
+#pragma GCC system_header
+
 /* Define to the version of dune-xt */
 #define DUNE_XT_VERSION ${DUNE_XT_VERSION}
 
diff --git a/dune/pybindxi/buffer_info.h b/dune/pybindxi/buffer_info.h
index 925133c379dfd3f2af67995424db9f83e5aebbc4..7a190337548692f8d3ac88840b35683f42d55695 100644
--- a/dune/pybindxi/buffer_info.h
+++ b/dune/pybindxi/buffer_info.h
@@ -23,7 +23,8 @@ struct buffer_info
   std::string format; // For homogeneous buffers, this should be set to format_descriptor<T>::format()
   ssize_t ndim = 0; // Number of dimensions
   std::vector<ssize_t> shape; // Shape of the tensor (1 entry per dimension)
-  std::vector<ssize_t> strides; // Number of entries between adjacent entries (for each per dimension)
+  std::vector<ssize_t> strides; // Number of bytes between adjacent entries (for each per dimension)
+  bool readonly = false; // flag to indicate if the underlying storage may be written to
 
   buffer_info() {}
 
@@ -32,7 +33,8 @@ struct buffer_info
               const std::string& format,
               ssize_t ndim,
               detail::any_container<ssize_t> shape_in,
-              detail::any_container<ssize_t> strides_in)
+              detail::any_container<ssize_t> strides_in,
+              bool readonly = false)
     : ptr(ptr)
     , itemsize(itemsize)
     , size(1)
@@ -40,6 +42,7 @@ struct buffer_info
     , ndim(ndim)
     , shape(std::move(shape_in))
     , strides(std::move(strides_in))
+    , readonly(readonly)
   {
     if (ndim != (ssize_t)shape.size() || ndim != (ssize_t)strides.size())
       pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length");
@@ -48,23 +51,32 @@ struct buffer_info
   }
 
   template <typename T>
-  buffer_info(T* ptr, detail::any_container<ssize_t> shape_in, detail::any_container<ssize_t> strides_in)
+  buffer_info(T* ptr,
+              detail::any_container<ssize_t> shape_in,
+              detail::any_container<ssize_t> strides_in,
+              bool readonly = false)
     : buffer_info(private_ctr_tag(),
                   ptr,
                   sizeof(T),
                   format_descriptor<T>::format(),
                   static_cast<ssize_t>(shape_in->size()),
                   std::move(shape_in),
-                  std::move(strides_in))
+                  std::move(strides_in),
+                  readonly)
+  {}
+
+  buffer_info(void* ptr, ssize_t itemsize, const std::string& format, ssize_t size, bool readonly = false)
+    : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly)
   {}
 
-  buffer_info(void* ptr, ssize_t itemsize, const std::string& format, ssize_t size)
-    : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize})
+  template <typename T>
+  buffer_info(T* ptr, ssize_t size, bool readonly = false)
+    : buffer_info(ptr, sizeof(T), format_descriptor<T>::format(), size, readonly)
   {}
 
   template <typename T>
-  buffer_info(T* ptr, ssize_t size)
-    : buffer_info(ptr, sizeof(T), format_descriptor<T>::format(), size)
+  buffer_info(const T* ptr, ssize_t size, bool readonly = true)
+    : buffer_info(const_cast<T*>(ptr), sizeof(T), format_descriptor<T>::format(), size, readonly)
   {}
 
   explicit buffer_info(Py_buffer* view, bool ownview = true)
@@ -73,7 +85,8 @@ struct buffer_info
                   view->format,
                   view->ndim,
                   {view->shape, view->shape + view->ndim},
-                  {view->strides, view->strides + view->ndim})
+                  {view->strides, view->strides + view->ndim},
+                  view->readonly)
   {
     this->view = view;
     this->ownview = ownview;
@@ -98,6 +111,7 @@ struct buffer_info
     strides = std::move(rhs.strides);
     std::swap(view, rhs.view);
     std::swap(ownview, rhs.ownview);
+    readonly = rhs.readonly;
     return *this;
   }
 
@@ -119,8 +133,9 @@ private:
               const std::string& format,
               ssize_t ndim,
               detail::any_container<ssize_t>&& shape_in,
-              detail::any_container<ssize_t>&& strides_in)
-    : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in))
+              detail::any_container<ssize_t>&& strides_in,
+              bool readonly)
+    : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly)
   {}
 
   Py_buffer* view = nullptr;
diff --git a/dune/pybindxi/cast.h b/dune/pybindxi/cast.h
index e09413c51a5dbcf171220bb9f3cf36f0cc020880..63e90a9c2456417dbc0a015e901c243255cea5b4 100644
--- a/dune/pybindxi/cast.h
+++ b/dune/pybindxi/cast.h
@@ -33,6 +33,10 @@
 #  include <string_view>
 #endif
 
+#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
+#  define PYBIND11_HAS_U8STRING
+#endif
+
 NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 NAMESPACE_BEGIN(detail)
 
@@ -610,9 +614,16 @@ public:
       case return_value_policy::copy:
         if (copy_constructor)
           valueptr = copy_constructor(src);
-        else
-          throw cast_error("return_value_policy = copy, but the "
-                           "object is non-copyable!");
+        else {
+#if defined(NDEBUG)
+          throw cast_error("return_value_policy = copy, but type is "
+                           "non-copyable! (compile in debug mode for details)");
+#else
+          std::string type_name(tinfo->cpptype->name());
+          detail::clean_type_id(type_name);
+          throw cast_error("return_value_policy = copy, but type " + type_name + " is non-copyable!");
+#endif
+        }
         wrapper->owned = true;
         break;
 
@@ -621,9 +632,17 @@ public:
           valueptr = move_constructor(src);
         else if (copy_constructor)
           valueptr = copy_constructor(src);
-        else
-          throw cast_error("return_value_policy = move, but the "
-                           "object is neither movable nor copyable!");
+        else {
+#if defined(NDEBUG)
+          throw cast_error("return_value_policy = move, but type is neither "
+                           "movable nor copyable! "
+                           "(compile in debug mode for details)");
+#else
+          std::string type_name(tinfo->cpptype->name());
+          detail::clean_type_id(type_name);
+          throw cast_error("return_value_policy = move, but type " + type_name + " is neither movable nor copyable!");
+#endif
+        }
         wrapper->owned = true;
         break;
 
@@ -652,9 +671,9 @@ public:
       if (type->operator_new) {
         vptr = type->operator_new(type->type_size);
       } else {
-#if defined(PYBIND11_CPP17)
+#if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912)
         if (type->type_align > __STDCPP_DEFAULT_NEW_ALIGNMENT__)
-          vptr = ::operator new(type->type_size, (std::align_val_t)type->type_align);
+          vptr = ::operator new(type->type_size, std::align_val_t(type->type_align));
         else
 #endif
           vptr = ::operator new(type->type_size);
@@ -863,17 +882,33 @@ template <typename Container>
 struct is_copy_constructible<
     Container,
     enable_if_t<all_of<std::is_copy_constructible<Container>,
-                       std::is_same<typename Container::value_type&, typename Container::reference>>::value>>
+                       std::is_same<typename Container::value_type&, typename Container::reference>,
+                       // Avoid infinite recursion
+                       negation<std::is_same<Container, typename Container::value_type>>>::value>>
   : is_copy_constructible<typename Container::value_type>
 {};
 
-#if !defined(PYBIND11_CPP17)
-// Likewise for std::pair before C++17 (which mandates that the copy constructor not exist when the
-// two types aren't themselves copy constructible).
+// Likewise for std::pair
+// (after C++17 it is mandatory that the copy constructor not exist when the two types aren't themselves
+// copy constructible, but this can not be relied upon when T1 or T2 are themselves containers).
 template <typename T1, typename T2>
 struct is_copy_constructible<std::pair<T1, T2>> : all_of<is_copy_constructible<T1>, is_copy_constructible<T2>>
 {};
-#endif
+
+// The same problems arise with std::is_copy_assignable, so we use the same workaround.
+template <typename T, typename SFINAE = void>
+struct is_copy_assignable : std::is_copy_assignable<T>
+{};
+template <typename Container>
+struct is_copy_assignable<
+    Container,
+    enable_if_t<all_of<std::is_copy_assignable<Container>,
+                       std::is_same<typename Container::value_type&, typename Container::reference>>::value>>
+  : is_copy_assignable<typename Container::value_type>
+{};
+template <typename T1, typename T2>
+struct is_copy_assignable<std::pair<T1, T2>> : all_of<is_copy_assignable<T1>, is_copy_assignable<T2>>
+{};
 
 NAMESPACE_END(detail)
 
@@ -1110,6 +1145,9 @@ public:
 
 template <typename CharT>
 using is_std_char_type = any_of<std::is_same<CharT, char>, /* std::string */
+#if defined(PYBIND11_HAS_U8STRING)
+                                std::is_same<CharT, char8_t>, /* std::u8string */
+#endif
                                 std::is_same<CharT, char16_t>, /* std::u16string */
                                 std::is_same<CharT, char32_t>, /* std::u32string */
                                 std::is_same<CharT, wchar_t> /* std::wstring */
@@ -1145,10 +1183,12 @@ public:
     }
 
     bool py_err = py_value == (py_type)-1 && PyErr_Occurred();
+
+    // Protect std::numeric_limits::min/max with parentheses
     if (py_err
         || (std::is_integral<T>::value && sizeof(py_type) != sizeof(T)
-            && (py_value < (py_type)std::numeric_limits<T>::min()
-                || py_value > (py_type)std::numeric_limits<T>::max()))) {
+            && (py_value < (py_type)(std::numeric_limits<T>::min)()
+                || py_value > (py_type)(std::numeric_limits<T>::max)()))) {
       bool type_error = py_err
                         && PyErr_ExceptionMatches(
 #if PY_VERSION_HEX < 0x03000000 && !defined(PYPY_VERSION)
@@ -1332,6 +1372,8 @@ public:
       if (res == 0 || res == 1) {
         value = (bool)res;
         return true;
+      } else {
+        PyErr_Clear();
       }
     }
     return false;
@@ -1352,6 +1394,9 @@ struct string_caster
   // Simplify life by being able to assume standard char sizes (the standard only guarantees
   // minimums, but Python requires exact sizes)
   static_assert(!std::is_same<CharT, char>::value || sizeof(CharT) == 1, "Unsupported char size != 1");
+#if defined(PYBIND11_HAS_U8STRING)
+  static_assert(!std::is_same<CharT, char8_t>::value || sizeof(CharT) == 1, "Unsupported char8_t size != 1");
+#endif
   static_assert(!std::is_same<CharT, char16_t>::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2");
   static_assert(!std::is_same<CharT, char32_t>::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4");
   // wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
@@ -1371,7 +1416,7 @@ struct string_caster
 #if PY_MAJOR_VERSION >= 3
       return load_bytes(load_src);
 #else
-      if (sizeof(CharT) == 1) {
+      if (std::is_same<CharT, char>::value) {
         return load_bytes(load_src);
       }
 
@@ -1442,7 +1487,7 @@ private:
   // without any encoding/decoding attempt).  For other C++ char sizes this is a no-op.
   // which supports loading a unicode from a str, doesn't take this path.
   template <typename C = CharT>
-  bool load_bytes(enable_if_t<sizeof(C) == 1, handle> src)
+  bool load_bytes(enable_if_t<std::is_same<C, char>::value, handle> src)
   {
     if (PYBIND11_BYTES_CHECK(src.ptr())) {
       // We were passed a Python 3 raw bytes; accept it into a std::string or char*
@@ -1458,7 +1503,7 @@ private:
   }
 
   template <typename C = CharT>
-  bool load_bytes(enable_if_t<sizeof(C) != 1, handle>)
+  bool load_bytes(enable_if_t<!std::is_same<C, char>::value, handle>)
   {
     return false;
   }
@@ -1637,9 +1682,14 @@ protected:
   template <size_t... Is>
   bool load_impl(const sequence& seq, bool convert, index_sequence<Is...>)
   {
+#ifdef __cpp_fold_expressions
+    if ((... || !std::get<Is>(subcasters).load(seq[Is], convert)))
+      return false;
+#else
     for (bool r : {std::get<Is>(subcasters).load(seq[Is], convert)...})
       if (!r)
         return false;
+#endif
     return true;
   }
 
@@ -2352,14 +2402,19 @@ private:
   template <size_t... Is>
   bool load_impl_sequence(function_call& call, index_sequence<Is...>)
   {
+#ifdef __cpp_fold_expressions
+    if ((... || !std::get<Is>(argcasters).load(call.args[Is], call.args_convert[Is])))
+      return false;
+#else
     for (bool r : {std::get<Is>(argcasters).load(call.args[Is], call.args_convert[Is])...})
       if (!r)
         return false;
+#endif
     return true;
   }
 
   template <typename Return, typename Func, size_t... Is, typename Guard>
-  Return call_impl(Func&& f, index_sequence<Is...>, Guard&&)
+  Return call_impl(Func&& f, index_sequence<Is...>, Guard&&) &&
   {
     return std::forward<Func>(f)(cast_op<Args>(std::move(std::get<Is>(argcasters)))...);
   }
diff --git a/dune/pybindxi/chrono.h b/dune/pybindxi/chrono.h
index df72db32f6c1dfac5f708bf794857de2f9261aa7..ee8941e486c7885754b42317aa0a81c7421cbc71 100644
--- a/dune/pybindxi/chrono.h
+++ b/dune/pybindxi/chrono.h
@@ -125,8 +125,11 @@ public:
 
     if (!src)
       return false;
+
+    std::tm cal;
+    microseconds msecs;
+
     if (PyDateTime_Check(src.ptr())) {
-      std::tm cal;
       cal.tm_sec = PyDateTime_DATE_GET_SECOND(src.ptr());
       cal.tm_min = PyDateTime_DATE_GET_MINUTE(src.ptr());
       cal.tm_hour = PyDateTime_DATE_GET_HOUR(src.ptr());
@@ -134,11 +137,30 @@ public:
       cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1;
       cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900;
       cal.tm_isdst = -1;
-
-      value = system_clock::from_time_t(std::mktime(&cal)) + microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr()));
-      return true;
+      msecs = microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr()));
+    } else if (PyDate_Check(src.ptr())) {
+      cal.tm_sec = 0;
+      cal.tm_min = 0;
+      cal.tm_hour = 0;
+      cal.tm_mday = PyDateTime_GET_DAY(src.ptr());
+      cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1;
+      cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900;
+      cal.tm_isdst = -1;
+      msecs = microseconds(0);
+    } else if (PyTime_Check(src.ptr())) {
+      cal.tm_sec = PyDateTime_TIME_GET_SECOND(src.ptr());
+      cal.tm_min = PyDateTime_TIME_GET_MINUTE(src.ptr());
+      cal.tm_hour = PyDateTime_TIME_GET_HOUR(src.ptr());
+      cal.tm_mday = 1; // This date (day, month, year) = (1, 0, 70)
+      cal.tm_mon = 0; // represents 1-Jan-1970, which is the first
+      cal.tm_year = 70; // earliest available date for Python's datetime
+      cal.tm_isdst = -1;
+      msecs = microseconds(PyDateTime_TIME_GET_MICROSECOND(src.ptr()));
     } else
       return false;
+
+    value = system_clock::from_time_t(std::mktime(&cal)) + msecs;
+    return true;
   }
 
   static handle cast(const std::chrono::time_point<std::chrono::system_clock, Duration>& src,
diff --git a/dune/pybindxi/detail/class.h b/dune/pybindxi/detail/class.h
index 2f2945b5f3ce8b3d8488382546a1541cf86eeecf..15fd61dface372d06fbb49e8d430220f41af6882 100644
--- a/dune/pybindxi/detail/class.h
+++ b/dune/pybindxi/detail/class.h
@@ -374,6 +374,7 @@ extern "C" inline void pybind11_object_dealloc(PyObject* self)
   auto type = Py_TYPE(self);
   type->tp_free(self);
 
+#if PY_VERSION_HEX < 0x03080000
   // `type->tp_dealloc != pybind11_object_dealloc` means that we're being called
   // as part of a derived type's dealloc, in which case we're not allowed to decref
   // the type here. For cross-module compatibility, we shouldn't compare directly
@@ -381,6 +382,11 @@ extern "C" inline void pybind11_object_dealloc(PyObject* self)
   auto pybind11_object_type = (PyTypeObject*)get_internals().instance_base;
   if (type->tp_dealloc == pybind11_object_type->tp_dealloc)
     Py_DECREF(type);
+#else
+  // This was not needed before Python 3.8 (Python issue 35810)
+  // https://github.com/pybind/pybind11/issues/1946
+  Py_DECREF(type);
+#endif
 }
 
 /** Create the type which can be used as a common base for all classes.  This is
@@ -515,6 +521,13 @@ extern "C" inline int pybind11_getbuffer(PyObject* obj, Py_buffer* view, int fla
   view->len = view->itemsize;
   for (auto s : info->shape)
     view->len *= s;
+  view->readonly = info->readonly;
+  if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE && info->readonly) {
+    if (view)
+      view->obj = nullptr;
+    PyErr_SetString(PyExc_BufferError, "Writable buffer requested for readonly storage");
+    return -1;
+  }
   if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
     view->format = const_cast<char*>(info->format.c_str());
   if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) {
@@ -617,6 +630,9 @@ inline PyObject* make_new_python_type(const type_record& rec)
   type->tp_as_number = &heap_type->as_number;
   type->tp_as_sequence = &heap_type->as_sequence;
   type->tp_as_mapping = &heap_type->as_mapping;
+#if PY_VERSION_HEX >= 0x03050000
+  type->tp_as_async = &heap_type->as_async;
+#endif
 
   /* Flags */
   type->tp_flags |= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE;
diff --git a/dune/pybindxi/detail/common.h b/dune/pybindxi/detail/common.h
index ed9476d97bb2372026bbbfcad2a5d6211d2c67c5..18aa287d82245730ff19150bbb726606e21472e8 100644
--- a/dune/pybindxi/detail/common.h
+++ b/dune/pybindxi/detail/common.h
@@ -94,8 +94,8 @@
 #endif
 
 #define PYBIND11_VERSION_MAJOR 2
-#define PYBIND11_VERSION_MINOR 3
-#define PYBIND11_VERSION_PATCH dev1
+#define PYBIND11_VERSION_MINOR 5
+#define PYBIND11_VERSION_PATCH 0
 
 /// Include Python header, disable linking to pythonX_d.lib on Windows in debug mode
 #if defined(_MSC_VER)
@@ -104,7 +104,7 @@
 #  endif
 #  pragma warning(push)
 #  pragma warning(disable : 4510 4610 4512 4005)
-#  if defined(_DEBUG)
+#  if defined(_DEBUG) && !defined(Py_DEBUG)
 #    define PYBIND11_DEBUG_MARKER
 #    undef _DEBUG
 #  endif
@@ -114,10 +114,9 @@
 #include <frameobject.h>
 #include <pythread.h>
 
-#if defined(_WIN32) && (defined(min) || defined(max))
-#  error Macro clash with min and max -- define NOMINMAX when compiling your program on Windows
-#endif
-
+/* Python #defines overrides on all sorts of core functions, which
+   tends to weak havok in C++ codebases that expect these to work
+   like regular functions (potentially with several overloads) */
 #if defined(isalnum)
 #  undef isalnum
 #  undef isalpha
@@ -128,6 +127,10 @@
 #  undef toupper
 #endif
 
+#if defined(copysign)
+#  undef copysign
+#endif
+
 #if defined(_MSC_VER)
 #  if defined(PYBIND11_DEBUG_MARKER)
 #    define _DEBUG
@@ -169,7 +172,10 @@
 #  define PYBIND11_STR_TYPE ::pybind11::str
 #  define PYBIND11_BOOL_ATTR "__bool__"
 #  define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_bool)
-#  define PYBIND11_PLUGIN_IMPL(name) extern "C" PYBIND11_EXPORT PyObject* PyInit_##name()
+// Providing a separate declaration to make Clang's -Wmissing-prototypes happy
+#  define PYBIND11_PLUGIN_IMPL(name)                                                                                   \
+    extern "C" PYBIND11_EXPORT PyObject* PyInit_##name();                                                              \
+    extern "C" PYBIND11_EXPORT PyObject* PyInit_##name()
 
 #else
 #  define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyMethod_New(ptr, nullptr, class_)
@@ -192,8 +198,10 @@
 #  define PYBIND11_STR_TYPE ::pybind11::bytes
 #  define PYBIND11_BOOL_ATTR "__nonzero__"
 #  define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_nonzero)
+// Providing a separate PyInit decl to make Clang's -Wmissing-prototypes happy
 #  define PYBIND11_PLUGIN_IMPL(name)                                                                                   \
     static PyObject* pybind11_init_wrapper();                                                                          \
+    extern "C" PYBIND11_EXPORT void init##name();                                                                      \
     extern "C" PYBIND11_EXPORT void init##name()                                                                       \
     {                                                                                                                  \
       (void)pybind11_init_wrapper();                                                                                   \
@@ -216,6 +224,7 @@ extern "C"
 #define PYBIND11_STRINGIFY(x) #x
 #define PYBIND11_TOSTRING(x) PYBIND11_STRINGIFY(x)
 #define PYBIND11_CONCAT(first, second) first##second
+#define PYBIND11_ENSURE_INTERNALS_READY pybind11::detail::get_internals();
 
 #define PYBIND11_CHECK_PYTHON_VERSION                                                                                  \
   {                                                                                                                    \
@@ -265,6 +274,7 @@ extern "C"
   PYBIND11_PLUGIN_IMPL(name)                                                                                           \
   {                                                                                                                    \
     PYBIND11_CHECK_PYTHON_VERSION                                                                                      \
+    PYBIND11_ENSURE_INTERNALS_READY                                                                                    \
     try {                                                                                                              \
       return pybind11_init();                                                                                          \
     }                                                                                                                  \
@@ -294,6 +304,7 @@ extern "C"
   PYBIND11_PLUGIN_IMPL(name)                                                                                           \
   {                                                                                                                    \
     PYBIND11_CHECK_PYTHON_VERSION                                                                                      \
+    PYBIND11_ENSURE_INTERNALS_READY                                                                                    \
     auto m = pybind11::module(PYBIND11_TOSTRING(name));                                                                \
     try {                                                                                                              \
       PYBIND11_CONCAT(pybind11_init_, name)(m);                                                                        \
@@ -840,6 +851,8 @@ PYBIND11_RUNTIME_EXCEPTION(index_error, PyExc_IndexError)
 PYBIND11_RUNTIME_EXCEPTION(key_error, PyExc_KeyError)
 PYBIND11_RUNTIME_EXCEPTION(value_error, PyExc_ValueError)
 PYBIND11_RUNTIME_EXCEPTION(type_error, PyExc_TypeError)
+PYBIND11_RUNTIME_EXCEPTION(buffer_error, PyExc_BufferError)
+PYBIND11_RUNTIME_EXCEPTION(import_error, PyExc_ImportError)
 PYBIND11_RUNTIME_EXCEPTION(cast_error, PyExc_RuntimeError) /// Thrown when pybind11::cast or handle::call fail due to a
                                                            /// type casting error
 PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used internally
@@ -922,10 +935,6 @@ struct nodelete
   {}
 };
 
-// overload_cast requires variable templates: C++14
-#if defined(PYBIND11_CPP14)
-#  define PYBIND11_OVERLOAD_CAST 1
-
 NAMESPACE_BEGIN(detail)
 template <typename... Args>
 struct overload_cast_impl
@@ -952,19 +961,23 @@ struct overload_cast_impl
 };
 NAMESPACE_END(detail)
 
+// overload_cast requires variable templates: C++14
+#if defined(PYBIND11_CPP14)
+#  define PYBIND11_OVERLOAD_CAST 1
 /// Syntax sugar for resolving overloaded function pointers:
 ///  - regular: static_cast<Return (Class::*)(Arg0, Arg1, Arg2)>(&Class::func)
 ///  - sweet:   overload_cast<Arg0, Arg1, Arg2>(&Class::func)
 template <typename... Args>
 static constexpr detail::overload_cast_impl<Args...> overload_cast = {};
 // MSVC 2015 only accepts this particular initialization syntax for this variable template.
+#endif
 
 /// Const member function selector for overload_cast
 ///  - regular: static_cast<Return (Class::*)(Arg) const>(&Class::func)
 ///  - sweet:   overload_cast<Arg>(&Class::func, const_)
 static constexpr auto const_ = std::true_type{};
 
-#else // no overload_cast: providing something that static_assert-fails:
+#if !defined(PYBIND11_CPP14) // no overload_cast: providing something that static_assert-fails:
 template <typename... Args>
 struct overload_cast
 {
diff --git a/dune/pybindxi/detail/internals.h b/dune/pybindxi/detail/internals.h
index 7ddaf73f2161c21682d3b5bc004f33022ac527a9..6d8e48330ca344cdc6bdd262ed789fdf2fb18fb7 100644
--- a/dune/pybindxi/detail/internals.h
+++ b/dune/pybindxi/detail/internals.h
@@ -26,6 +26,7 @@ inline PyObject* make_object_base_type(PyTypeObject* metaclass);
 #  define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key))
 #  define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value))
 #  define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr)
+#  define PYBIND11_TLS_FREE(key) PyThread_tss_free(key)
 #else
 // Usually an int but a long on Cygwin64 with Python 3.x
 #  define PYBIND11_TLS_KEY_INIT(var) decltype(PyThread_create_key()) var = 0
@@ -41,6 +42,7 @@ inline PyObject* make_object_base_type(PyTypeObject* metaclass);
 #    define PYBIND11_TLS_DELETE_VALUE(key) PyThread_set_key_value((key), nullptr)
 #    define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_set_key_value((key), (value))
 #  endif
+#  define PYBIND11_TLS_FREE(key) (void)key
 #endif
 
 // Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly
@@ -117,6 +119,17 @@ struct internals
 #if defined(WITH_THREAD)
   PYBIND11_TLS_KEY_INIT(tstate);
   PyInterpreterState* istate = nullptr;
+  ~internals()
+  {
+    // This destructor is called *after* Py_Finalize() in finalize_interpreter().
+    // That *SHOULD BE* fine. The following details what happens whe PyThread_tss_free is called.
+    // PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does nothing.
+    // PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
+    // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). Neither
+    // of those have anything to do with CPython internals.
+    // PyMem_RawFree *requires* that the `tstate` be allocated with the CPython allocator.
+    PYBIND11_TLS_FREE(tstate);
+  }
 #endif
 };
 
@@ -148,14 +161,49 @@ struct type_info
 };
 
 /// Tracks the `internals` and `type_info` ABI version independent of the main library version
-#define PYBIND11_INTERNALS_VERSION 3
+#define PYBIND11_INTERNALS_VERSION 4
 
-#if defined(_DEBUG)
+/// On MSVC, debug and release builds are not ABI-compatible!
+#if defined(_MSC_VER) && defined(_DEBUG)
 #  define PYBIND11_BUILD_TYPE "_debug"
 #else
 #  define PYBIND11_BUILD_TYPE ""
 #endif
 
+/// Let's assume that different compilers are ABI-incompatible.
+#if defined(_MSC_VER)
+#  define PYBIND11_COMPILER_TYPE "_msvc"
+#elif defined(__INTEL_COMPILER)
+#  define PYBIND11_COMPILER_TYPE "_icc"
+#elif defined(__clang__)
+#  define PYBIND11_COMPILER_TYPE "_clang"
+#elif defined(__PGI)
+#  define PYBIND11_COMPILER_TYPE "_pgi"
+#elif defined(__MINGW32__)
+#  define PYBIND11_COMPILER_TYPE "_mingw"
+#elif defined(__CYGWIN__)
+#  define PYBIND11_COMPILER_TYPE "_gcc_cygwin"
+#elif defined(__GNUC__)
+#  define PYBIND11_COMPILER_TYPE "_gcc"
+#else
+#  define PYBIND11_COMPILER_TYPE "_unknown"
+#endif
+
+#if defined(_LIBCPP_VERSION)
+#  define PYBIND11_STDLIB "_libcpp"
+#elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
+#  define PYBIND11_STDLIB "_libstdcpp"
+#else
+#  define PYBIND11_STDLIB ""
+#endif
+
+/// On Linux/OSX, changes in __GXX_ABI_VERSION__ indicate ABI incompatibility.
+#if defined(__GXX_ABI_VERSION)
+#  define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION)
+#else
+#  define PYBIND11_BUILD_ABI ""
+#endif
+
 #if defined(WITH_THREAD)
 #  define PYBIND11_INTERNALS_KIND ""
 #else
@@ -163,12 +211,12 @@ struct type_info
 #endif
 
 #define PYBIND11_INTERNALS_ID                                                                                          \
-  "__pybind11_internals_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_BUILD_TYPE   \
-      "__"
+  "__pybind11_internals_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION)                                               \
+      PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__"
 
 #define PYBIND11_MODULE_LOCAL_ID                                                                                       \
   "__pybind11_module_local_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION)                                            \
-      PYBIND11_INTERNALS_KIND PYBIND11_BUILD_TYPE "__"
+      PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__"
 
 /// Each module locally stores a pointer to the `internals` data. The data
 /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`.
@@ -178,6 +226,63 @@ inline internals**& get_internals_pp()
   return internals_pp;
 }
 
+inline void translate_exception(std::exception_ptr p)
+{
+  try {
+    if (p)
+      std::rethrow_exception(p);
+  } catch (error_already_set& e) {
+    e.restore();
+    return;
+  } catch (const builtin_exception& e) {
+    e.set_error();
+    return;
+  } catch (const std::bad_alloc& e) {
+    PyErr_SetString(PyExc_MemoryError, e.what());
+    return;
+  } catch (const std::domain_error& e) {
+    PyErr_SetString(PyExc_ValueError, e.what());
+    return;
+  } catch (const std::invalid_argument& e) {
+    PyErr_SetString(PyExc_ValueError, e.what());
+    return;
+  } catch (const std::length_error& e) {
+    PyErr_SetString(PyExc_ValueError, e.what());
+    return;
+  } catch (const std::out_of_range& e) {
+    PyErr_SetString(PyExc_IndexError, e.what());
+    return;
+  } catch (const std::range_error& e) {
+    PyErr_SetString(PyExc_ValueError, e.what());
+    return;
+  } catch (const std::overflow_error& e) {
+    PyErr_SetString(PyExc_OverflowError, e.what());
+    return;
+  } catch (const std::exception& e) {
+    PyErr_SetString(PyExc_RuntimeError, e.what());
+    return;
+  } catch (...) {
+    PyErr_SetString(PyExc_RuntimeError, "Caught an unknown exception!");
+    return;
+  }
+}
+
+#if !defined(__GLIBCXX__)
+inline void translate_local_exception(std::exception_ptr p)
+{
+  try {
+    if (p)
+      std::rethrow_exception(p);
+  } catch (error_already_set& e) {
+    e.restore();
+    return;
+  } catch (const builtin_exception& e) {
+    e.set_error();
+    return;
+  }
+}
+#endif
+
 /// Return a reference to the current `internals` data
 PYBIND11_NOINLINE inline internals& get_internals()
 {
@@ -185,6 +290,20 @@ PYBIND11_NOINLINE inline internals& get_internals()
   if (internals_pp && *internals_pp)
     return **internals_pp;
 
+  // Ensure that the GIL is held since we will need to make Python calls.
+  // Cannot use py::gil_scoped_acquire here since that constructor calls get_internals.
+  struct gil_scoped_acquire_local
+  {
+    gil_scoped_acquire_local()
+      : state(PyGILState_Ensure())
+    {}
+    ~gil_scoped_acquire_local()
+    {
+      PyGILState_Release(state);
+    }
+    const PyGILState_STATE state;
+  } gil;
+
   constexpr auto* id = PYBIND11_INTERNALS_ID;
   auto builtins = handle(PyEval_GetBuiltins());
   if (builtins.contains(id) && isinstance<capsule>(builtins[id])) {
@@ -196,18 +315,7 @@ PYBIND11_NOINLINE inline internals& get_internals()
     //
     // libstdc++ doesn't require this (types there are identified only by name)
 #if !defined(__GLIBCXX__)
-    (*internals_pp)->registered_exception_translators.push_front([](std::exception_ptr p) -> void {
-      try {
-        if (p)
-          std::rethrow_exception(p);
-      } catch (error_already_set& e) {
-        e.restore();
-        return;
-      } catch (const builtin_exception& e) {
-        e.set_error();
-        return;
-      }
-    });
+    (*internals_pp)->registered_exception_translators.push_front(&translate_local_exception);
 #endif
   } else {
     if (!internals_pp)
@@ -231,42 +339,7 @@ PYBIND11_NOINLINE inline internals& get_internals()
     internals_ptr->istate = tstate->interp;
 #endif
     builtins[id] = capsule(internals_pp);
-    internals_ptr->registered_exception_translators.push_front([](std::exception_ptr p) -> void {
-      try {
-        if (p)
-          std::rethrow_exception(p);
-      } catch (error_already_set& e) {
-        e.restore();
-        return;
-      } catch (const builtin_exception& e) {
-        e.set_error();
-        return;
-      } catch (const std::bad_alloc& e) {
-        PyErr_SetString(PyExc_MemoryError, e.what());
-        return;
-      } catch (const std::domain_error& e) {
-        PyErr_SetString(PyExc_ValueError, e.what());
-        return;
-      } catch (const std::invalid_argument& e) {
-        PyErr_SetString(PyExc_ValueError, e.what());
-        return;
-      } catch (const std::length_error& e) {
-        PyErr_SetString(PyExc_ValueError, e.what());
-        return;
-      } catch (const std::out_of_range& e) {
-        PyErr_SetString(PyExc_IndexError, e.what());
-        return;
-      } catch (const std::range_error& e) {
-        PyErr_SetString(PyExc_ValueError, e.what());
-        return;
-      } catch (const std::exception& e) {
-        PyErr_SetString(PyExc_RuntimeError, e.what());
-        return;
-      } catch (...) {
-        PyErr_SetString(PyExc_RuntimeError, "Caught an unknown exception!");
-        return;
-      }
-    });
+    internals_ptr->registered_exception_translators.push_front(&translate_exception);
     internals_ptr->static_property_type = make_static_property_type();
     internals_ptr->default_metaclass = make_default_metaclass();
     internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass);
diff --git a/dune/pybindxi/embed.h b/dune/pybindxi/embed.h
index 0aedd584701998d4fd913d9a519555fbaed35f74..750345cb96217383658ae66e1d71f86671b56102 100644
--- a/dune/pybindxi/embed.h
+++ b/dune/pybindxi/embed.h
@@ -19,12 +19,14 @@
 
 #if PY_MAJOR_VERSION >= 3
 #  define PYBIND11_EMBEDDED_MODULE_IMPL(name)                                                                          \
+    extern "C" PyObject* pybind11_init_impl_##name();                                                                  \
     extern "C" PyObject* pybind11_init_impl_##name()                                                                   \
     {                                                                                                                  \
       return pybind11_init_wrapper_##name();                                                                           \
     }
 #else
 #  define PYBIND11_EMBEDDED_MODULE_IMPL(name)                                                                          \
+    extern "C" void pybind11_init_impl_##name();                                                                       \
     extern "C" void pybind11_init_impl_##name()                                                                        \
     {                                                                                                                  \
       pybind11_init_wrapper_##name();                                                                                  \
diff --git a/dune/pybindxi/functional.h b/dune/pybindxi/functional.h
index 6e515584529cd3c43e308c5533567469e2191150..8c13710f49ebcda69f0502f9561c5c28d2611d17 100644
--- a/dune/pybindxi/functional.h
+++ b/dune/pybindxi/functional.h
@@ -76,12 +76,23 @@ public:
       }
     };
 
-    value = [hfunc = func_handle(std::move(func))](Args... args) -> Return {
-      gil_scoped_acquire acq;
-      object retval(hfunc.f(std::forward<Args>(args)...));
-      /* Visual studio 2015 parser issue: need parentheses around this expression */
-      return (retval.template cast<Return>());
+    // to emulate 'move initialization capture' in C++11
+    struct func_wrapper
+    {
+      func_handle hfunc;
+      func_wrapper(func_handle&& hf)
+        : hfunc(std::move(hf))
+      {}
+      Return operator()(Args... args) const
+      {
+        gil_scoped_acquire acq;
+        object retval(hfunc.f(std::forward<Args>(args)...));
+        /* Visual studio 2015 parser issue: need parentheses around this expression */
+        return (retval.template cast<Return>());
+      }
     };
+
+    value = func_wrapper(func_handle(std::move(func)));
     return true;
   }
 
diff --git a/dune/pybindxi/iostream.h b/dune/pybindxi/iostream.h
index 60c98dce87e12d5b56a8e1589943a7728ae7205e..ccc416abf48b53b14316550c9fbbfbad53de778b 100644
--- a/dune/pybindxi/iostream.h
+++ b/dune/pybindxi/iostream.h
@@ -68,6 +68,8 @@ public:
     setp(d_buffer.get(), d_buffer.get() + buf_size - 1);
   }
 
+  pythonbuf(pythonbuf&&) = default;
+
   /// Sync before destroy
   ~pythonbuf()
   {
diff --git a/dune/pybindxi/numpy.h b/dune/pybindxi/numpy.h
index 042101e52495f8a7f0a0c6351ba2a2cdb916a69d..c0ea9fa2358610192ae9807a91a546273600ce8b 100644
--- a/dune/pybindxi/numpy.h
+++ b/dune/pybindxi/numpy.h
@@ -15,6 +15,7 @@
 #include <numeric>
 #include <algorithm>
 #include <array>
+#include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <sstream>
@@ -117,6 +118,26 @@ inline numpy_internals& get_numpy_internals()
   return *ptr;
 }
 
+template <typename T>
+struct same_size
+{
+  template <typename U>
+  using as = bool_constant<sizeof(T) == sizeof(U)>;
+};
+
+template <typename Concrete>
+constexpr int platform_lookup()
+{
+  return -1;
+}
+
+// Lookup a type according to its size, and return a value corresponding to the NumPy typenum.
+template <typename Concrete, typename T, typename... Ts, typename... Ints>
+constexpr int platform_lookup(int I, Ints... Is)
+{
+  return sizeof(Concrete) == sizeof(T) ? I : platform_lookup<Concrete, Ts...>(Is...);
+}
+
 struct npy_api
 {
   enum constants
@@ -148,7 +169,21 @@ struct npy_api
     NPY_OBJECT_ = 17,
     NPY_STRING_,
     NPY_UNICODE_,
-    NPY_VOID_
+    NPY_VOID_,
+    // Platform-dependent normalization
+    NPY_INT8_ = NPY_BYTE_,
+    NPY_UINT8_ = NPY_UBYTE_,
+    NPY_INT16_ = NPY_SHORT_,
+    NPY_UINT16_ = NPY_USHORT_,
+    // `npy_common.h` defines the integer aliases. In order, it checks:
+    // NPY_BITSOF_LONG, NPY_BITSOF_LONGLONG, NPY_BITSOF_INT, NPY_BITSOF_SHORT, NPY_BITSOF_CHAR
+    // and assigns the alias to the first matching size, so we should check in this order.
+    NPY_INT32_ = platform_lookup<std::int32_t, long, int, short>(NPY_LONG_, NPY_INT_, NPY_SHORT_),
+    NPY_UINT32_ =
+        platform_lookup<std::uint32_t, unsigned long, unsigned int, unsigned short>(NPY_ULONG_, NPY_UINT_, NPY_USHORT_),
+    NPY_INT64_ = platform_lookup<std::int64_t, long, long long, int>(NPY_LONG_, NPY_LONGLONG_, NPY_INT_),
+    NPY_UINT64_ = platform_lookup<std::uint64_t, unsigned long, unsigned long long, unsigned int>(
+        NPY_ULONG_, NPY_ULONGLONG_, NPY_UINT_),
   };
 
   typedef struct
@@ -1241,12 +1276,12 @@ private:
   constexpr static const int values[15] = {npy_api::NPY_BOOL_,
                                            npy_api::NPY_BYTE_,
                                            npy_api::NPY_UBYTE_,
-                                           npy_api::NPY_SHORT_,
-                                           npy_api::NPY_USHORT_,
-                                           npy_api::NPY_INT_,
-                                           npy_api::NPY_UINT_,
-                                           npy_api::NPY_LONGLONG_,
-                                           npy_api::NPY_ULONGLONG_,
+                                           npy_api::NPY_INT16_,
+                                           npy_api::NPY_UINT16_,
+                                           npy_api::NPY_INT32_,
+                                           npy_api::NPY_UINT32_,
+                                           npy_api::NPY_INT64_,
+                                           npy_api::NPY_UINT64_,
                                            npy_api::NPY_FLOAT_,
                                            npy_api::NPY_DOUBLE_,
                                            npy_api::NPY_LONGDOUBLE_,
@@ -1260,7 +1295,7 @@ public:
   static pybind11::dtype dtype()
   {
     if (auto ptr = npy_api::get().PyArray_DescrFromType_(value))
-      return reinterpret_borrow<pybind11::dtype>(ptr);
+      return reinterpret_steal<pybind11::dtype>(ptr);
     pybind11_fail("Unsupported buffer format!");
   }
 };
@@ -1334,8 +1369,15 @@ inline PYBIND11_NOINLINE void register_structured_dtype(any_container<field_desc
   if (numpy_internals.get_type_info(tinfo, false))
     pybind11_fail("NumPy: dtype is already registered");
 
+  // Use ordered fields because order matters as of NumPy 1.14:
+  // https://docs.scipy.org/doc/numpy/release.html#multiple-field-indexing-assignment-of-structured-arrays
+  std::vector<field_descriptor> ordered_fields(std::move(fields));
+  std::sort(ordered_fields.begin(), ordered_fields.end(), [](const field_descriptor& a, const field_descriptor& b) {
+    return a.offset < b.offset;
+  });
+
   list names, formats, offsets;
-  for (auto field : *fields) {
+  for (auto& field : ordered_fields) {
     if (!field.descr)
       pybind11_fail(std::string("NumPy: unsupported field dtype: `") + field.name + "` @ " + tinfo.name());
     names.append(PYBIND11_STR_TYPE(field.name));
@@ -1351,10 +1393,6 @@ inline PYBIND11_NOINLINE void register_structured_dtype(any_container<field_desc
   // - https://github.com/numpy/numpy/pull/7798
   // Because of this, we won't use numpy's logic to generate buffer format
   // strings and will just do it ourselves.
-  std::vector<field_descriptor> ordered_fields(std::move(fields));
-  std::sort(ordered_fields.begin(), ordered_fields.end(), [](const field_descriptor& a, const field_descriptor& b) {
-    return a.offset < b.offset;
-  });
   ssize_t offset = 0;
   std::ostringstream oss;
   // mark the structure as unaligned with '^', because numpy and C++ don't
diff --git a/dune/pybindxi/pybind11.h b/dune/pybindxi/pybind11.h
index 119899ed05c18ee47c9472f10797454cdeabc91a..37752e89d191614c094a1c24a52da7c24b93e44a 100644
--- a/dune/pybindxi/pybind11.h
+++ b/dune/pybindxi/pybind11.h
@@ -521,7 +521,7 @@ protected:
 
         function_call call(func, parent);
 
-        size_t args_to_copy = std::min(pos_args, n_args_in);
+        size_t args_to_copy = (std::min)(pos_args, n_args_in); // Protect std::min with parentheses
         size_t args_copied = 0;
 
         // 0. Inject new-style `self` argument
@@ -1061,11 +1061,18 @@ inline void call_operator_delete(void* p, size_t s, size_t a)
 {
   (void)s;
   (void)a;
-#if defined(PYBIND11_CPP17)
-  if (a > __STDCPP_DEFAULT_NEW_ALIGNMENT__)
+#if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912)
+  if (a > __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
+#  ifdef __cpp_sized_deallocation
     ::operator delete(p, s, std::align_val_t(a));
-  else
-    ::operator delete(p, s);
+#  else
+    ::operator delete(p, std::align_val_t(a));
+#  endif
+    return;
+  }
+#endif
+#ifdef __cpp_sized_deallocation
+  ::operator delete(p, s);
 #else
   ::operator delete(p);
 #endif
@@ -1602,9 +1609,17 @@ struct enum_base
       },                                                                                                               \
       is_method(m_base))
 
+#define PYBIND11_ENUM_OP_CONV_LHS(op, expr)                                                                            \
+  m_base.attr(op) = cpp_function(                                                                                      \
+      [](object a_, object b) {                                                                                        \
+        int_ a(a_);                                                                                                    \
+        return expr;                                                                                                   \
+      },                                                                                                               \
+      is_method(m_base))
+
     if (is_convertible) {
-      PYBIND11_ENUM_OP_CONV("__eq__", !b.is_none() && a.equal(b));
-      PYBIND11_ENUM_OP_CONV("__ne__", b.is_none() || !a.equal(b));
+      PYBIND11_ENUM_OP_CONV_LHS("__eq__", !b.is_none() && a.equal(b));
+      PYBIND11_ENUM_OP_CONV_LHS("__ne__", b.is_none() || !a.equal(b));
 
       if (is_arithmetic) {
         PYBIND11_ENUM_OP_CONV("__lt__", a < b);
@@ -1617,6 +1632,7 @@ struct enum_base
         PYBIND11_ENUM_OP_CONV("__ror__", a | b);
         PYBIND11_ENUM_OP_CONV("__xor__", a ^ b);
         PYBIND11_ENUM_OP_CONV("__rxor__", a ^ b);
+        m_base.attr("__invert__") = cpp_function([](object arg) { return ~(int_(arg)); }, is_method(m_base));
       }
     } else {
       PYBIND11_ENUM_OP_STRICT("__eq__", int_(a).equal(int_(b)), return false);
@@ -1632,6 +1648,7 @@ struct enum_base
       }
     }
 
+#undef PYBIND11_ENUM_OP_CONV_LHS
 #undef PYBIND11_ENUM_OP_CONV
 #undef PYBIND11_ENUM_OP_STRICT
 
@@ -1693,6 +1710,10 @@ public:
 #if PY_MAJOR_VERSION < 3
     def("__long__", [](Type value) { return (Scalar)value; });
 #endif
+#if PY_MAJOR_VERSION > 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8)
+    def("__index__", [](Type value) { return (Scalar)value; });
+#endif
+
     cpp_function setstate([](Type& value, Scalar arg) { value = static_cast<Type>(arg); }, is_method(*this));
     attr("__setstate__") = setstate;
   }
@@ -2205,8 +2226,8 @@ class gil_scoped_release
 error_already_set::~error_already_set()
 {
   if (m_type) {
-    error_scope scope;
     gil_scoped_acquire gil;
+    error_scope scope;
     m_type.release().dec_ref();
     m_value.release().dec_ref();
     m_trace.release().dec_ref();
diff --git a/dune/pybindxi/pytypes.h b/dune/pybindxi/pytypes.h
index 65bd1d1faa2e8631d4aa243d6e464a5d8672af29..6255f057a510f655cc08551b0103c036f92f364c 100644
--- a/dune/pybindxi/pytypes.h
+++ b/dune/pybindxi/pytypes.h
@@ -1733,6 +1733,10 @@ public:
   {
     return (size_t)PyTuple_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   detail::tuple_accessor operator[](size_t index) const
   {
     return {*this, index};
@@ -1773,6 +1777,10 @@ public:
   {
     return (size_t)PyDict_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   detail::dict_iterator begin() const
   {
     return {*this, 0};
@@ -1785,13 +1793,10 @@ public:
   {
     PyDict_Clear(ptr());
   }
-  bool contains(handle key) const
-  {
-    return PyDict_Contains(ptr(), key.ptr()) == 1;
-  }
-  bool contains(const char* key) const
+  template <typename T>
+  bool contains(T&& key) const
   {
-    return PyDict_Contains(ptr(), pybind11::str(key).ptr()) == 1;
+    return PyDict_Contains(m_ptr, detail::object_or_cast(std::forward<T>(key)).ptr()) == 1;
   }
 
 private:
@@ -1812,6 +1817,10 @@ public:
   {
     return (size_t)PySequence_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   detail::sequence_accessor operator[](size_t index) const
   {
     return {*this, index};
@@ -1844,6 +1853,10 @@ public:
   {
     return (size_t)PyList_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   detail::list_accessor operator[](size_t index) const
   {
     return {*this, index};
@@ -1865,6 +1878,11 @@ public:
   {
     PyList_Append(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr());
   }
+  template <typename T>
+  void insert(size_t index, T&& val) const
+  {
+    PyList_Insert(m_ptr, static_cast<ssize_t>(index), detail::object_or_cast(std::forward<T>(val)).ptr());
+  }
 };
 
 class args : public tuple
@@ -1890,6 +1908,10 @@ public:
   {
     return (size_t)PySet_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   template <typename T>
   bool add(T&& val) const
   {
@@ -1899,6 +1921,11 @@ public:
   {
     PySet_Clear(m_ptr);
   }
+  template <typename T>
+  bool contains(T&& val) const
+  {
+    return PySet_Contains(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr()) == 1;
+  }
 };
 
 class function : public object
@@ -1929,7 +1956,7 @@ class buffer : public object
 public:
   PYBIND11_OBJECT_DEFAULT(buffer, object, PyObject_CheckBuffer)
 
-  buffer_info request(bool writable = false)
+  buffer_info request(bool writable = false) const
   {
     int flags = PyBUF_STRIDES | PyBUF_FORMAT;
     if (writable)
@@ -1966,7 +1993,7 @@ public:
     buf.strides = py_strides.data();
     buf.shape = py_shape.data();
     buf.suboffsets = nullptr;
-    buf.readonly = false;
+    buf.readonly = info.readonly;
     buf.internal = nullptr;
 
     m_ptr = PyMemoryView_FromBuffer(&buf);
diff --git a/dune/pybindxi/stl_bind.h b/dune/pybindxi/stl_bind.h
index 9d48d19c6f8acf98744a40dd134c9cfe5ce57c2c..8575cb89552c2bdb5916cda8600dc9d201e3567b 100644
--- a/dune/pybindxi/stl_bind.h
+++ b/dune/pybindxi/stl_bind.h
@@ -133,6 +133,14 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
   using SizeType = typename Vector::size_type;
   using DiffType = typename Vector::difference_type;
 
+  auto wrap_i = [](DiffType i, SizeType n) {
+    if (i < 0)
+      i += n;
+    if (i < 0 || (SizeType)i >= n)
+      throw index_error();
+    return i;
+  };
+
   cl.def(
       "append", [](Vector& v, const T& value) { v.push_back(value); }, arg("x"), "Add an item to the end of the list");
 
@@ -144,6 +152,9 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
     return v.release();
   }));
 
+  cl.def(
+      "clear", [](Vector& v) { v.clear(); }, "Clear the contents");
+
   cl.def(
       "extend",
       [](Vector& v, const Vector& src) { v.insert(v.end(), src.begin(), src.end()); },
@@ -174,10 +185,13 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
 
   cl.def(
       "insert",
-      [](Vector& v, SizeType i, const T& x) {
-        if (i > v.size())
+      [](Vector& v, DiffType i, const T& x) {
+        // Can't use wrap_i; i == v.size() is OK
+        if (i < 0)
+          i += v.size();
+        if (i < 0 || (SizeType)i > v.size())
           throw index_error();
-        v.insert(v.begin() + (DiffType)i, x);
+        v.insert(v.begin() + i, x);
       },
       arg("i"),
       arg("x"),
@@ -196,20 +210,18 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
 
   cl.def(
       "pop",
-      [](Vector& v, SizeType i) {
-        if (i >= v.size())
-          throw index_error();
-        T t = v[i];
-        v.erase(v.begin() + (DiffType)i);
+      [wrap_i](Vector& v, DiffType i) {
+        i = wrap_i(i, v.size());
+        T t = v[(SizeType)i];
+        v.erase(v.begin() + i);
         return t;
       },
       arg("i"),
       "Remove and return the item at index ``i``");
 
-  cl.def("__setitem__", [](Vector& v, SizeType i, const T& t) {
-    if (i >= v.size())
-      throw index_error();
-    v[i] = t;
+  cl.def("__setitem__", [wrap_i](Vector& v, DiffType i, const T& t) {
+    i = wrap_i(i, v.size());
+    v[(SizeType)i] = t;
   });
 
   /// Slicing protocol
@@ -252,10 +264,9 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
 
   cl.def(
       "__delitem__",
-      [](Vector& v, SizeType i) {
-        if (i >= v.size())
-          throw index_error();
-        v.erase(v.begin() + DiffType(i));
+      [wrap_i](Vector& v, DiffType i) {
+        i = wrap_i(i, v.size());
+        v.erase(v.begin() + i);
       },
       "Delete the list elements at index ``i``");
 
@@ -291,14 +302,22 @@ void vector_accessor(enable_if_t<!vector_needs_copy<Vector>::value, Class_>& cl)
 {
   using T = typename Vector::value_type;
   using SizeType = typename Vector::size_type;
+  using DiffType = typename Vector::difference_type;
   using ItType = typename Vector::iterator;
 
+  auto wrap_i = [](DiffType i, SizeType n) {
+    if (i < 0)
+      i += n;
+    if (i < 0 || (SizeType)i >= n)
+      throw index_error();
+    return i;
+  };
+
   cl.def(
       "__getitem__",
-      [](Vector& v, SizeType i) -> T& {
-        if (i >= v.size())
-          throw index_error();
-        return v[i];
+      [wrap_i](Vector& v, DiffType i) -> T& {
+        i = wrap_i(i, v.size());
+        return v[(SizeType)i];
       },
       return_value_policy::reference_internal // ref + keepalive
   );
@@ -318,11 +337,14 @@ void vector_accessor(enable_if_t<vector_needs_copy<Vector>::value, Class_>& cl)
 {
   using T = typename Vector::value_type;
   using SizeType = typename Vector::size_type;
+  using DiffType = typename Vector::difference_type;
   using ItType = typename Vector::iterator;
-  cl.def("__getitem__", [](const Vector& v, SizeType i) -> T {
-    if (i >= v.size())
+  cl.def("__getitem__", [](const Vector& v, DiffType i) -> T {
+    if (i < 0 && (i += v.size()) < 0)
+      throw index_error();
+    if ((SizeType)i >= v.size())
       throw index_error();
-    return v[i];
+    return v[(SizeType)i];
   });
 
   cl.def(
@@ -514,7 +536,7 @@ void map_assignment(const Args&...)
 
 // Map assignment when copy-assignable: just copy the value
 template <typename Map, typename Class_>
-void map_assignment(enable_if_t<std::is_copy_assignable<typename Map::mapped_type>::value, Class_>& cl)
+void map_assignment(enable_if_t<is_copy_assignable<typename Map::mapped_type>::value, Class_>& cl)
 {
   using KeyType = typename Map::key_type;
   using MappedType = typename Map::mapped_type;
@@ -530,7 +552,7 @@ void map_assignment(enable_if_t<std::is_copy_assignable<typename Map::mapped_typ
 
 // Not copy-assignable, but still copy-constructible: we can update the value by erasing and reinserting
 template <typename Map, typename Class_>
-void map_assignment(enable_if_t<!std::is_copy_assignable<typename Map::mapped_type>::value
+void map_assignment(enable_if_t<!is_copy_assignable<typename Map::mapped_type>::value
                                     && is_copy_constructible<typename Map::mapped_type>::value,
                                 Class_>& cl)
 {
diff --git a/dune/xt/common/fixed_map.hh b/dune/xt/common/fixed_map.hh
index d2bdbd7aad387cc097e4b9c935aca72bffef8943..ebf9e95849cab8038f16bd13a3e7501ce28e8b7a 100644
--- a/dune/xt/common/fixed_map.hh
+++ b/dune/xt/common/fixed_map.hh
@@ -126,30 +126,19 @@ private:
 
   typedef FixedMap<key_imp, T, nin> ThisType;
 
-  template <class K> // for sfinae to work this needs to be a template although the type is already fixed
-  typename std::enable_if<std::is_convertible<K, std::string>::value, std::string>::type
-  range_error_message(K key) const
+  std::string range_error_message(key_imp key) const
   {
     std::stringstream ss;
-    ss << "missing key '" << key << "' in FixedMap!";
-    return ss.str();
-  }
-
-  template <class K>
-  typename std::enable_if<std::is_convertible<K, int>::value, std::string>::type range_error_message(K key) const
-  {
-    std::stringstream ss;
-    ss << "missing key (converted to int)'" << int(key) << "' in FixedMap!";
+    if constexpr (std::is_convertible<key_imp, std::string>::value) {
+      ss << "missing key '" << key << "' in FixedMap!";
+    } else if constexpr (std::is_convertible<key_imp, int>::value) {
+      ss << "missing key (converted to int)'" << int(key) << "' in FixedMap!";
+    } else {
+      ss << "missing key is not printable";
+    }
     return ss.str();
   }
 
-  template <class K>
-  typename std::enable_if<!(std::is_convertible<K, int>::value || std::is_convertible<K, std::string>::value),
-                          std::string>::type range_error_message(K /*key*/) const
-  {
-    return "missing key is not printable";
-  }
-
 public:
   typedef key_imp key_type;
   typedef T mapped_type;
diff --git a/dune/xt/common/logstreams.hh b/dune/xt/common/logstreams.hh
index 56c92509efbc8c6c83829c473abed16ff3c3e0ae..4416dd13e91d76026a28333166368f7f1f3a0523 100644
--- a/dune/xt/common/logstreams.hh
+++ b/dune/xt/common/logstreams.hh
@@ -19,6 +19,7 @@
 #include <iostream>
 #include <type_traits>
 #include <mutex>
+#include <list>
 
 #include <dune/common/timer.hh>
 
diff --git a/dune/xt/common/math.hh b/dune/xt/common/math.hh
index bd008519a8146954a09f27b023e3125f575f9179..8124283364be6cdbcc5b2f1f158bb5982f8f289a 100644
--- a/dune/xt/common/math.hh
+++ b/dune/xt/common/math.hh
@@ -30,8 +30,6 @@
 #include <boost/accumulators/statistics/min.hpp>
 #include <boost/accumulators/statistics/mean.hpp>
 #include <boost/format.hpp>
-#include <boost/fusion/include/void.hpp>
-#include <boost/geometry.hpp>
 #include <boost/math/special_functions/fpclassify.hpp>
 #include <boost/static_assert.hpp>
 #include <dune/xt/common/reenable_warnings.hh>
diff --git a/dune/xt/common/matrix.hh b/dune/xt/common/matrix.hh
index c4dd5fb2d26736fffce713d0db684ac6bc8bbc6f..e837f5bc29067d8e4d974e7907f14cd09f189e00 100644
--- a/dune/xt/common/matrix.hh
+++ b/dune/xt/common/matrix.hh
@@ -292,31 +292,33 @@ struct MatrixAbstraction<Dune::FieldMatrix<K, N, M>>
 
 
 template <class MatrixType>
-typename std::enable_if<is_matrix<MatrixType>::value, size_t>::type get_matrix_rows(const MatrixType& matrix)
+auto get_matrix_rows(const MatrixType& matrix)
 {
+  static_assert(is_matrix<MatrixType>::value);
   return MatrixAbstraction<MatrixType>::rows(matrix);
 }
 
 
 template <class MatrixType>
-typename std::enable_if<is_matrix<MatrixType>::value, size_t>::type get_matrix_cols(const MatrixType& matrix)
+auto get_matrix_cols(const MatrixType& matrix)
 {
+  static_assert(is_matrix<MatrixType>::value);
   return MatrixAbstraction<MatrixType>::cols(matrix);
 }
 
 
 template <class MatrixType>
-typename std::enable_if<is_matrix<MatrixType>::value, typename MatrixAbstraction<MatrixType>::S>::type
-get_matrix_entry(const MatrixType& matrix, const size_t ii, const size_t jj)
+auto get_matrix_entry(const MatrixType& matrix, const size_t ii, const size_t jj)
 {
+  static_assert(is_matrix<MatrixType>::value);
   return MatrixAbstraction<MatrixType>::get_entry(matrix, ii, jj);
 }
 
 
 template <class MatrixType, class S>
-typename std::enable_if<is_matrix<MatrixType>::value, void>::type
-set_matrix_entry(MatrixType& matrix, const size_t ii, const size_t jj, const S& value)
+auto set_matrix_entry(MatrixType& matrix, const size_t ii, const size_t jj, const S& value)
 {
+  static_assert(is_matrix<MatrixType>::value);
   MatrixAbstraction<MatrixType>::set_entry(matrix, ii, jj, value);
 }
 
@@ -326,14 +328,12 @@ template <class MatrixType,
           size_t COLS = MatrixAbstraction<MatrixType>::static_cols,
           class FieldType = typename MatrixAbstraction<MatrixType>::S,
           class SparsityPatternType = FullPattern>
-typename std::enable_if<
-    is_matrix<MatrixType>::value,
-    typename MatrixAbstraction<MatrixType>::template MatrixTypeTemplate<ROWS, COLS, FieldType>>::type
-create(const size_t rows,
-       const size_t cols,
-       const FieldType& val = 0,
-       const SparsityPatternType& pattern = SparsityPatternType())
+auto create(const size_t rows,
+            const size_t cols,
+            const FieldType& val = 0,
+            const SparsityPatternType& pattern = SparsityPatternType())
 {
+  static_assert(is_matrix<MatrixType>::value);
   return MatrixAbstraction<
       typename MatrixAbstraction<MatrixType>::template MatrixTypeTemplate<ROWS, COLS, FieldType>>::create(rows,
                                                                                                           cols,
@@ -343,18 +343,18 @@ create(const size_t rows,
 
 
 template <class TargetMatrixType, class SourceMatrixType>
-typename std::enable_if<is_matrix<TargetMatrixType>::value && is_matrix<SourceMatrixType>::value,
-                        TargetMatrixType>::type
-zeros_like(const SourceMatrixType& source)
+auto zeros_like(const SourceMatrixType& source)
 {
+  static_assert(is_matrix<TargetMatrixType>::value && is_matrix<SourceMatrixType>::value);
   return create<TargetMatrixType>(
       get_matrix_rows(source), get_matrix_cols(source), typename MatrixAbstraction<TargetMatrixType>::S(0));
 }
 
 
 template <class MatrixType>
-typename std::enable_if<is_matrix<MatrixType>::value, MatrixType>::type zeros_like(const MatrixType& source)
+auto zeros_like(const MatrixType& source)
 {
+  static_assert(is_matrix<MatrixType>::value);
   return zeros_like<MatrixType, MatrixType>(source);
 }
 
diff --git a/dune/xt/common/print.hh b/dune/xt/common/print.hh
index 728ee4a0475a4e57f8b5277dddc1e517c77c0f75..548348c5c04091271135e9d782e51694653e6848 100644
--- a/dune/xt/common/print.hh
+++ b/dune/xt/common/print.hh
@@ -39,30 +39,6 @@ class DefaultPrinter
 {
   using ThisType = DefaultPrinter;
 
-  template <bool has_ostream = is_printable<T>::value, bool anything = true>
-  struct call_ostream;
-
-  template <bool anything>
-  struct call_ostream<true, anything>
-  {
-    static void or_print_error(std::ostream& out, const T& val)
-    {
-      // There are some operator<< overloads that are deprecated due to the introduction of this Printer.
-#include <dune/xt/common/disable_warnings.hh>
-      out << val;
-#include <dune/xt/common/reenable_warnings.hh>
-    }
-  };
-
-  template <bool anything>
-  struct call_ostream<false, anything>
-  {
-    static void or_print_error(std::ostream& out, const T& /*val*/)
-    {
-      out << "missing specialization for Printer<T> with T=" << Typename<T>::value();
-    }
-  };
-
 public:
   using ValueType = T;
 
@@ -81,7 +57,14 @@ public:
 
   virtual void repr(std::ostream& out) const
   {
-    call_ostream<>::or_print_error(out, value);
+    if constexpr (is_printable<T>::value) {
+      // There are some of our operator<< overloads that are deprecated due to the introduction of this Printer.
+#include <dune/xt/common/disable_warnings.hh>
+      out << value;
+#include <dune/xt/common/reenable_warnings.hh>
+    } else {
+      out << "missing specialization for Printer<T> with T=" << Typename<T>::value();
+    }
   }
 
   virtual void str(std::ostream& out) const
diff --git a/dune/xt/common/string_internal.hh b/dune/xt/common/string_internal.hh
index 133396ce5037efc0da95f2d919caf580a2f07c92..52ab184f89511c40dd621f607947a7d20a68583c 100644
--- a/dune/xt/common/string_internal.hh
+++ b/dune/xt/common/string_internal.hh
@@ -24,6 +24,7 @@
 #include <boost/algorithm/string/trim.hpp>
 #include <boost/lexical_cast.hpp>
 #include <boost/numeric/conversion/cast.hpp>
+#include  <boost/algorithm/string/constants.hpp>
 #include <dune/xt/common/reenable_warnings.hh>
 
 #include <dune/xt/common/debug.hh>
@@ -78,8 +79,11 @@ static inline T convert_safely(std::string ss)
   return T();
 } // ... convert_safely(...)
 
+template <class T>
+T convert_from_string(std::string ss, const size_t rows = 0, const size_t cols = 0);
+
 // unspecialized variant
-template <class T, bool anything = true>
+template <class T>
 struct Helper
 {
   static inline T convert_from_string(std::string ss)
@@ -89,8 +93,8 @@ struct Helper
 }; // struct Helper
 
 // variant for bool, to correctly parse true and false
-template <bool anything>
-struct Helper<bool, anything>
+template <>
+struct Helper<bool>
 {
   static inline bool convert_from_string(std::string ss)
   {
@@ -107,8 +111,8 @@ struct Helper<bool, anything>
 
 // variant for all basic types supported by std::sto*
 #define DUNE_XT_COMMON_STRING_GENERATE_HELPER(tn, tns)                                                                 \
-  template <bool anything>                                                                                             \
-  struct Helper<tn, anything>                                                                                          \
+  template <>                                                                                                          \
+  struct Helper<tn>                                                                                                    \
   {                                                                                                                    \
     static inline tn convert_from_string(std::string ss)                                                               \
     {                                                                                                                  \
@@ -133,8 +137,8 @@ DUNE_XT_COMMON_STRING_GENERATE_HELPER(long double, ld)
 #undef DUNE_XT_COMMON_STRING_GENERATE_HELPER
 
 // variant for unsigned int as there is no stoui
-template <bool anything>
-struct Helper<unsigned int, anything>
+template <>
+struct Helper<unsigned int>
 {
   static inline unsigned int convert_from_string(std::string ss)
   {
@@ -152,25 +156,14 @@ struct Helper<unsigned int, anything>
   }
 };
 
-// variant for everything that is not a matrix or a vector or complex value
-template <class T>
-static inline typename std::enable_if<!is_vector<T>::value && !is_matrix<T>::value && !is_complex<T>::value, T>::type
-convert_from_string(std::string ss, const size_t DXTC_DEBUG_ONLY(rows) = 0, const size_t DXTC_DEBUG_ONLY(cols) = 0)
-{
-  DXT_ASSERT(rows == 0);
-  DXT_ASSERT(cols == 0);
-  return Helper<T>::convert_from_string(ss);
-}
-
-template <class V>
-static inline typename std::enable_if<is_complex<V>::value, V>::type
-convert_from_string(std::string ss, const size_t /*size*/ = 0, const size_t /*cols*/ = 0)
+template <class ComplexType>
+ComplexType complex_from_string(std::string ss, const size_t /*size*/ = 0, const size_t /*cols*/ = 0)
 {
   boost::algorithm::trim(ss);
   if (ss.size() < 1)
     DUNE_THROW(Exceptions::conversion_error, "Error converting " << ss << " (too short)");
   using namespace std;
-  typedef typename V::value_type T;
+  typedef typename ComplexType::value_type T;
   T re(0), im(0);
   const auto sign_pos = ss.find("+", 1) != string::npos ? ss.find("+", 1) : ss.find("-", 1);
   auto im_pos = ss.find("i");
@@ -187,14 +180,12 @@ convert_from_string(std::string ss, const size_t /*size*/ = 0, const size_t /*co
       DUNE_THROW(Exceptions::conversion_error, "Error converting " << ss << " no imaginary unit");
     im = convert_from_string<T>(ss.substr(0, im_pos));
   }
-  return V(re, im);
+  return ComplexType(re, im);
 }
 
 template <class VectorType>
-static inline typename std::enable_if<is_vector<VectorType>::value, VectorType>::type
-convert_from_string(std::string ss, const size_t size, const size_t DXTC_DEBUG_ONLY(cols) = 0)
+VectorType vector_from_string(std::string vector_str, const size_t size, const size_t DXTC_DEBUG_ONLY(cols) = 0)
 {
-  auto vector_str = ss;
   typedef typename VectorAbstraction<VectorType>::S S;
   DXT_ASSERT(cols == 0);
   // check if this is a vector
@@ -243,8 +234,7 @@ convert_from_string(std::string ss, const size_t size, const size_t DXTC_DEBUG_O
 } // ... convert_from_string(...)
 
 template <class MatrixType>
-static inline typename std::enable_if<is_matrix<MatrixType>::value, MatrixType>::type
-convert_from_string(std::string matrix_str, const size_t rows, const size_t cols)
+MatrixType matrix_from_string(std::string matrix_str, const size_t rows, const size_t cols)
 {
   typedef typename MatrixAbstraction<MatrixType>::S S;
   // check if this is a matrix
@@ -333,6 +323,23 @@ convert_from_string(std::string matrix_str, const size_t rows, const size_t cols
   }
 } // ... convert_from_string(...)
 
+// main function that dispatches to specializations
+template <class T>
+T convert_from_string(std::string ss, const size_t rows, const size_t cols)
+{
+  if constexpr (is_complex<T>::value) {
+    return complex_from_string<T>(ss);
+  } else if constexpr (is_vector<T>::value) {
+    return vector_from_string<T>(ss, rows, cols);
+  } else if constexpr (is_matrix<T>::value) {
+    return matrix_from_string<T>(ss, rows, cols);
+  } else {
+    DXT_ASSERT(rows == 0);
+    DXT_ASSERT(cols == 0);
+    return Helper<T>::convert_from_string(ss);
+  }
+}
+
 // variant for everything that is not a matrix, a vector or any of the types specified below
 template <class T>
 static inline typename std::enable_if<!is_vector<T>::value && !is_matrix<T>::value, std::string>::type
diff --git a/dune/xt/common/type_traits.hh b/dune/xt/common/type_traits.hh
index eb7b179e19fc3ac0a421cebf1b79bda1008b7e94..c0d30bbd56475ed29e7cbc1aa4246ef60692f24b 100644
--- a/dune/xt/common/type_traits.hh
+++ b/dune/xt/common/type_traits.hh
@@ -250,31 +250,12 @@ std::string get_template_basename(const T&)
   return str.substr(0, r);
 }
 
-template <class T, class Ptr = void>
-struct is_smart_ptr
-{
-  static const bool value = false;
-  typedef T type;
-};
-
-template <class T>
-struct is_smart_ptr<T, typename std::enable_if<std::is_same<std::unique_ptr<typename T::element_type>, T>::value>::type>
-{
-  static const bool value = true;
-  typedef T type;
-};
-
-template <class T>
-struct is_smart_ptr<T, typename std::enable_if<std::is_same<std::shared_ptr<typename T::element_type>, T>::value>::type>
-{
-  static const bool value = true;
-  typedef T type;
-};
-
 template <class T>
-struct is_smart_ptr<T, typename std::enable_if<std::is_same<std::weak_ptr<typename T::element_type>, T>::value>::type>
+struct is_smart_ptr
 {
-  static const bool value = true;
+  static const bool value = std::is_same<std::unique_ptr<typename T::element_type>, T>::value
+                            || std::is_same<std::shared_ptr<typename T::element_type>, T>::value
+                            || std::is_same<std::weak_ptr<typename T::element_type>, T>::value;
   typedef T type;
 };
 
diff --git a/dune/xt/functions/base/combined-element-functions.hh b/dune/xt/functions/base/combined-element-functions.hh
index 1293f8abf9f816261aab9133c1fc37771b261107..5a589067dfbe3f1a0e5095a41e475fe421b4caa3 100644
--- a/dune/xt/functions/base/combined-element-functions.hh
+++ b/dune/xt/functions/base/combined-element-functions.hh
@@ -58,6 +58,7 @@ private:
   template <class L, class R>
   class dim_switch
   {
+    //! last tpl arg cannot be dropped due to gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282
     template <CombinationType cc = comb,
               size_t rL = L::r,
               size_t rCL = L::rC,
diff --git a/dune/xt/functions/base/combined-functions.hh b/dune/xt/functions/base/combined-functions.hh
index a6580b0b21b04a52a842531383603a1868ee7a23..40f18c8d6fe85e52a285826a7a607625a858e6d1 100644
--- a/dune/xt/functions/base/combined-functions.hh
+++ b/dune/xt/functions/base/combined-functions.hh
@@ -49,6 +49,7 @@ private:
   template <class L, class R>
   class Choose
   {
+    //! last tpl arg cannot be dropped due to gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282
     template <size_t rL, size_t rR, size_t rCL, size_t rcR, CombinationType cc, bool anything = true>
     class Dimension
     {
diff --git a/dune/xt/functions/base/reinterpret.hh b/dune/xt/functions/base/reinterpret.hh
index 9986d56f6a988c17415aaebee199a94f37636649..b2c6c569094668050c13f6832b1c499c1431dda6 100644
--- a/dune/xt/functions/base/reinterpret.hh
+++ b/dune/xt/functions/base/reinterpret.hh
@@ -244,11 +244,10 @@ auto reinterpreted_source = reinterpret<TargetElement>(source, source_grid_view)
  * \sa ReinterpretLocalizableFunction
  */
 template <class TargetElement, class SourceGridView, size_t r, size_t rC, class R>
-std::enable_if_t<XT::Grid::is_layer<SourceGridView>::value,
-                 ReinterpretLocalizableFunction<SourceGridView, TargetElement, r, rC, R>>
-reinterpret(const GridFunctionInterface<XT::Grid::extract_entity_t<SourceGridView>, r, rC, R>& source,
-            const SourceGridView& source_grid_view)
+auto reinterpret(const GridFunctionInterface<XT::Grid::extract_entity_t<SourceGridView>, r, rC, R>& source,
+                 const SourceGridView& source_grid_view)
 {
+  static_assert(XT::Grid::is_layer<SourceGridView>::value);
   return ReinterpretLocalizableFunction<SourceGridView, TargetElement, r, rC, R>(source, source_grid_view);
 }
 
@@ -260,12 +259,12 @@ reinterpret(const GridFunctionInterface<XT::Grid::extract_entity_t<SourceGridVie
  * \sa ReinterpretLocalizableFunction
  */
 template <class SourceGridView, size_t r, size_t rC, class R, class TargetGridView>
-std::enable_if_t<XT::Grid::is_layer<SourceGridView>::value && XT::Grid::is_layer<TargetGridView>::value,
-                 ReinterpretLocalizableFunction<SourceGridView, XT::Grid::extract_entity_t<TargetGridView>, r, rC, R>>
-reinterpret(const GridFunctionInterface<XT::Grid::extract_entity_t<SourceGridView>, r, rC, R>& source,
-            const SourceGridView& source_grid_view,
-            const TargetGridView& /*target_grid_view*/)
+auto reinterpret(const GridFunctionInterface<XT::Grid::extract_entity_t<SourceGridView>, r, rC, R>& source,
+                 const SourceGridView& source_grid_view,
+                 const TargetGridView& /*target_grid_view*/)
 {
+  static_assert(XT::Grid::is_layer<SourceGridView>::value);
+  static_assert(XT::Grid::is_layer<TargetGridView>::value);
   return reinterpret<XT::Grid::extract_entity_t<TargetGridView>>(source, source_grid_view);
 }
 
diff --git a/dune/xt/functions/base/visualization.hh b/dune/xt/functions/base/visualization.hh
index c078d0744f0062b1eb681141885a73a366ecf544..a621b9dad8a7110432fbca796cb33a4802758bc4 100644
--- a/dune/xt/functions/base/visualization.hh
+++ b/dune/xt/functions/base/visualization.hh
@@ -68,7 +68,7 @@ public:
   }
 
 private:
-  template <size_t r_ = r, size_t rC_ = rC, bool anything = true>
+  template <size_t r_ = r, size_t rC_ = rC>
   struct helper
   {
     static int ncomps()
@@ -82,8 +82,8 @@ private:
     }
   }; // class helper<...>
 
-  template <size_t r_, bool anything>
-  struct helper<r_, 1, anything>
+  template <size_t r_>
+  struct helper<r_, 1>
   {
     static int ncomps()
     {
diff --git a/dune/xt/functions/generic/grid-function.hh b/dune/xt/functions/generic/grid-function.hh
index b80c4183d3672c53ec34f4d44b2416a26b7bb273..6d2280c1c39a4f0bbb291f09c063d5ed48014fbc 100644
--- a/dune/xt/functions/generic/grid-function.hh
+++ b/dune/xt/functions/generic/grid-function.hh
@@ -124,7 +124,16 @@ private:
       auto parsed_param = this->parse_parameter(param);
       auto local_jacobian = jacobian_(point_in_local_coordinates, parsed_param);
       const auto J_inv_T = this->element().geometry().jacobianInverseTransposed(point_in_local_coordinates);
-      return JacobianHelper<>::jacobian(local_jacobian, J_inv_T);
+      DerivativeRangeReturnType global_jacobian;
+      if constexpr (rC == 1) {
+        for (size_t rr = 0; rr < r; ++rr)
+          J_inv_T.mv(local_jacobian[rr], global_jacobian[rr]);
+      } else {
+        for (size_t rr = 0; rr < r; ++rr)
+          for (size_t ii = 0; ii < rC; ++ii)
+            J_inv_T.mv(local_jacobian[rr][ii], global_jacobian[rr][ii]);
+      }
+      return global_jacobian;
     }
 
     DerivativeRangeReturnType derivative(const std::array<size_t, d>& alpha,
@@ -144,33 +153,6 @@ private:
     }
 
   private:
-    template <size_t range_cols = rC, bool anything = true>
-    struct JacobianHelper
-    {
-      static DerivativeRangeReturnType jacobian(const DerivativeRangeType& local_jacobian,
-                                                const FieldMatrix<R, d, d>& J_inv_T)
-      {
-        DerivativeRangeReturnType global_jacobian;
-        for (size_t rr = 0; rr < r; ++rr)
-          for (size_t ii = 0; ii < rC; ++ii)
-            J_inv_T.mv(local_jacobian[rr][ii], global_jacobian[rr][ii]);
-        return global_jacobian;
-      }
-    };
-
-    template <bool anything>
-    struct JacobianHelper<1, anything>
-    {
-      static DerivativeRangeReturnType jacobian(const DerivativeRangeType& local_jacobian,
-                                                const FieldMatrix<R, d, d>& J_inv_T)
-      {
-        DerivativeRangeReturnType global_jacobian;
-        for (size_t rr = 0; rr < r; ++rr)
-          J_inv_T.mv(local_jacobian[rr], global_jacobian[rr]);
-        return global_jacobian;
-      }
-    };
-
     const GenericOrderFunctionType& order_;
     const GenericPostBindFunctionType& post_bind_;
     const GenericEvaluateFunctionType& evaluate_;
diff --git a/dune/xt/functions/interfaces/element-flux-functions.hh b/dune/xt/functions/interfaces/element-flux-functions.hh
index ea8db9a8c8bd99f4539a701d23a35d348b10be0d..c35c7391d8927a85f70ac73649d183db05ac197e 100644
--- a/dune/xt/functions/interfaces/element-flux-functions.hh
+++ b/dune/xt/functions/interfaces/element-flux-functions.hh
@@ -222,7 +222,13 @@ assert(max_set_size <= local_function_set.max_size());
     const auto tmp_values = this->evaluate_set(point_in_reference_element, u, param);
     if (result.size() < tmp_values.size())
       result.resize(tmp_values.size());
-    single_evaluate_helper<>::call(tmp_values, row, col, result);
+    if constexpr (rC == 1) {
+      for (size_t ii = 0; ii < tmp_values.size(); ++ii)
+        result[ii] = tmp_values[ii][row];
+    } else {
+      for (size_t ii = 0; ii < tmp_values.size(); ++ii)
+        result[ii] = tmp_values[ii][row][col];
+    }
   }
 
   /**
@@ -239,7 +245,14 @@ assert(max_set_size <= local_function_set.max_size());
     const auto tmp_values = this->jacobian_of_set(point_in_reference_element, u, param);
     if (result.size() < tmp_values.size())
       result.resize(tmp_values.size());
-    single_derivative_helper<>::call(tmp_values, row, col, result);
+    if constexpr (rC == 1) {
+      for (size_t ii = 0; ii < tmp_values.size(); ++ii)
+        result[ii] = tmp_values[ii][row];
+    } else {
+      for (size_t ii = 0; ii < tmp_values.size(); ++ii)
+        for (size_t dd = 0; dd < d; ++dd)
+          result[ii][dd] = tmp_values[ii][row][col][dd];
+    }
   }
 
   /**
@@ -324,53 +337,6 @@ protected:
 #else // DUNE_XT_FUNCTIONS_DISABLE_CHECKS
   static void assert_correct_dims(const size_t /*row*/, const size_t /*col*/, const std::string& /*caller*/) {}
 #endif
-
-private:
-  template <size_t _r = r, size_t _rC = rC, bool anything = true>
-  struct single_evaluate_helper
-  {
-    template <class FullType>
-    static void call(const std::vector<FullType>& val, const size_t row, const size_t col, std::vector<R>& ret)
-    {
-      for (size_t ii = 0; ii < val.size(); ++ii)
-        ret[ii] = val[ii][row][col];
-    }
-  }; // struct single_evaluate_helper<...>
-
-  template <size_t _r, bool anything>
-  struct single_evaluate_helper<_r, 1, anything>
-  {
-    template <class FullType>
-    static void call(const std::vector<FullType>& val, const size_t row, const size_t /*col*/, std::vector<R>& ret)
-    {
-      for (size_t ii = 0; ii < val.size(); ++ii)
-        ret[ii] = val[ii][row];
-    }
-  }; // struct single_evaluate_helper<..., 1, ...>
-
-  template <size_t _r = r, size_t _rC = rC, bool anything = true>
-  struct single_derivative_helper
-  {
-    template <class FullType, class SingleType>
-    static void call(const std::vector<FullType>& val, const size_t row, const size_t col, std::vector<SingleType>& ret)
-    {
-      for (size_t ii = 0; ii < val.size(); ++ii)
-        for (size_t dd = 0; dd < d; ++dd)
-          ret[ii][dd] = val[ii][row][col][dd];
-    }
-  }; // struct single_derivative_helper<...>
-
-  template <size_t _r, bool anything>
-  struct single_derivative_helper<_r, 1, anything>
-  {
-    template <class FullType, class SingleType>
-    static void
-    call(const std::vector<FullType>& val, const size_t row, const size_t /*col*/, std::vector<SingleType>& ret)
-    {
-      for (size_t ii = 0; ii < val.size(); ++ii)
-        ret[ii] = val[ii][row];
-    }
-  }; // struct single_derivative_helper<..., 1, ...>
 }; // class ElementFluxFunctionSetInterface
 
 
@@ -483,7 +449,13 @@ public:
                      const Common::Parameter& param = {}) const
   {
     this->assert_correct_dims(row, col, "evaluate");
-    return single_evaluate_helper<R>::call(this->evaluate(point_in_reference_element, u, param), row, col);
+
+    const auto val = this->evaluate(point_in_reference_element, u, param);
+    if constexpr (rC == 1) {
+      return val[row];
+    } else {
+      return val[row][col];
+    }
   }
 
   virtual SingleJacobianRangeReturnType jacobian(const DomainType& point_in_reference_element,
@@ -493,8 +465,15 @@ public:
                                                  const Common::Parameter& param = {}) const
   {
     this->assert_correct_dims(row, col, "jacobian");
-    return single_derivative_helper<SingleJacobianRangeReturnType>::call(
-        this->jacobian(point_in_reference_element, u, param), row, col);
+    const auto val = this->jacobian(point_in_reference_element, u, param);
+    if constexpr (rC == 1) {
+      return val[row];
+    } else {
+      SingleJacobianRangeReturnType ret;
+      for (size_t dd = 0; dd < d; ++dd)
+        ret[dd] = val[row][col][dd];
+      return ret;
+    }
   }
 
   /**
@@ -567,50 +546,6 @@ public:
       result.resize(1);
     result[0] = this->jacobian(point_in_reference_element, u, param);
   }
-
-private:
-  template <class SingleType, size_t _r = BaseType::r, size_t _rC = BaseType::rC, bool anything = true>
-  struct single_evaluate_helper
-  {
-    template <class FullType>
-    static SingleType call(const FullType& val, const size_t row, const size_t col)
-    {
-      return val[row][col];
-    }
-  }; // struct single_evaluate_helper<...>
-
-  template <class SingleType, size_t _r, bool anything>
-  struct single_evaluate_helper<SingleType, _r, 1, anything>
-  {
-    template <class FullType>
-    static SingleType call(const FullType& val, const size_t row, const size_t /*col*/)
-    {
-      return val[row];
-    }
-  }; // struct single_evaluate_helper<..., 1, ...>
-
-  template <class SingleType, size_t _r = BaseType::r, size_t _rC = BaseType::rC, bool anything = true>
-  struct single_derivative_helper
-  {
-    template <class FullType>
-    static SingleType call(const FullType& val, const size_t row, const size_t col)
-    {
-      SingleType ret;
-      for (size_t dd = 0; dd < d; ++dd)
-        ret[dd] = val[row][col][dd];
-      return ret;
-    }
-  }; // struct single_derivative_helper<...>
-
-  template <class SingleType, size_t _r, bool anything>
-  struct single_derivative_helper<SingleType, _r, 1, anything>
-  {
-    template <class FullType>
-    static SingleType call(const FullType& val, const size_t row, const size_t /*col*/)
-    {
-      return val[row];
-    }
-  }; // struct single_derivative_helper<..., 1, ...>
 }; // class ElementFluxFunctionInterface
 
 
diff --git a/dune/xt/functions/interfaces/element-functions.hh b/dune/xt/functions/interfaces/element-functions.hh
index 8ab901652f2fd6685427205950c662d72ae90497..51c3254e0a9abe2a04c66ccafcdc97205ba2e1bc 100644
--- a/dune/xt/functions/interfaces/element-functions.hh
+++ b/dune/xt/functions/interfaces/element-functions.hh
@@ -274,7 +274,13 @@ assert(max_set_size <= local_function_set.max_size());
     const auto tmp_values = this->evaluate_set(point_in_reference_element, param);
     if (result.size() < tmp_values.size())
       result.resize(tmp_values.size());
-    single_evaluate_helper<>::call(tmp_values, row, col, result);
+    if constexpr (rC == 1) {
+      for (size_t ii = 0; ii < tmp_values.size(); ++ii)
+        result[ii] = tmp_values[ii][row];
+    } else {
+      for (size_t ii = 0; ii < tmp_values.size(); ++ii)
+        result[ii] = tmp_values[ii][row][col];
+    }
   }
 
   /**
@@ -290,7 +296,7 @@ assert(max_set_size <= local_function_set.max_size());
     const auto tmp_values = this->jacobians_of_set(point_in_reference_element, param);
     if (result.size() < tmp_values.size())
       result.resize(tmp_values.size());
-    single_derivative_helper<>::call(tmp_values, row, col, result);
+    single_derivative_helper_call(tmp_values, row, col, result);
   }
 
   /**
@@ -307,7 +313,7 @@ assert(max_set_size <= local_function_set.max_size());
     const auto tmp_values = this->derivatives_of_set(alpha, point_in_reference_element, param);
     if (result.size() < tmp_values.size())
       result.resize(tmp_values.size());
-    single_derivative_helper<>::call(tmp_values, row, col, result);
+    single_derivative_helper_call(tmp_values, row, col, result);
   }
 
   /**
@@ -414,51 +420,21 @@ protected:
 #endif
 
 private:
-  template <size_t _r = r, size_t _rC = rC, bool anything = true>
-  struct single_evaluate_helper
+  template <class FullType, class SingleType>
+  void single_derivative_helper_call(const std::vector<FullType>& val,
+                                     const size_t row,
+                                     const size_t col,
+                                     std::vector<SingleType>& ret) const
   {
-    template <class FullType>
-    static void call(const std::vector<FullType>& val, const size_t row, const size_t col, std::vector<R>& ret)
-    {
-      for (size_t ii = 0; ii < val.size(); ++ii)
-        ret[ii] = val[ii][row][col];
-    }
-  }; // struct single_evaluate_helper<...>
-
-  template <size_t _r, bool anything>
-  struct single_evaluate_helper<_r, 1, anything>
-  {
-    template <class FullType>
-    static void call(const std::vector<FullType>& val, const size_t row, const size_t /*col*/, std::vector<R>& ret)
-    {
+    if constexpr (rC == 1) {
       for (size_t ii = 0; ii < val.size(); ++ii)
         ret[ii] = val[ii][row];
-    }
-  }; // struct single_evaluate_helper<..., 1, ...>
-
-  template <size_t _r = r, size_t _rC = rC, bool anything = true>
-  struct single_derivative_helper
-  {
-    template <class FullType, class SingleType>
-    static void call(const std::vector<FullType>& val, const size_t row, const size_t col, std::vector<SingleType>& ret)
-    {
+    } else {
       for (size_t ii = 0; ii < val.size(); ++ii)
         for (size_t dd = 0; dd < d; ++dd)
           ret[ii][dd] = val[ii][row][col][dd];
     }
-  }; // struct single_derivative_helper<...>
-
-  template <size_t _r, bool anything>
-  struct single_derivative_helper<_r, 1, anything>
-  {
-    template <class FullType, class SingleType>
-    static void
-    call(const std::vector<FullType>& val, const size_t row, const size_t /*col*/, std::vector<SingleType>& ret)
-    {
-      for (size_t ii = 0; ii < val.size(); ++ii)
-        ret[ii] = val[ii][row];
-    }
-  }; // struct single_derivative_helper<..., 1, ...>
+  }
 }; // class ElementFunctionSetInterface
 
 
@@ -564,7 +540,12 @@ public:
                      const Common::Parameter& param = {}) const
   {
     this->assert_correct_dims(row, col, "evaluate");
-    return single_evaluate_helper<R>::call(this->evaluate(point_in_reference_element, param), row, col);
+    const auto value = this->evaluate(point_in_reference_element, param);
+    if constexpr (rC == 1) {
+      return value[row];
+    } else {
+      return value[row][col];
+    }
   }
 
   virtual SingleDerivativeRangeReturnType jacobian(const DomainType& point_in_reference_element,
@@ -573,8 +554,7 @@ public:
                                                    const Common::Parameter& param = {}) const
   {
     this->assert_correct_dims(row, col, "jacobian");
-    return single_derivative_helper<SingleDerivativeRangeType>::call(
-        this->jacobian(point_in_reference_element, param), row, col);
+    return single_derivative_helper_call(this->jacobian(point_in_reference_element, param), row, col);
   }
 
   virtual SingleDerivativeRangeReturnType derivative(const std::array<size_t, d>& alpha,
@@ -584,8 +564,7 @@ public:
                                                      const Common::Parameter& param = {}) const
   {
     this->assert_correct_dims(row, col, "derivative");
-    return single_derivative_helper<SingleDerivativeRangeType>::call(
-        this->derivative(alpha, point_in_reference_element, param), row, col);
+    return single_derivative_helper_call(this->derivative(alpha, point_in_reference_element, param), row, col);
   }
 
   /**
@@ -822,48 +801,19 @@ public:
   /// \}
 
 private:
-  template <class SingleType, size_t _r = BaseType::r, size_t _rC = BaseType::rC, bool anything = true>
-  struct single_evaluate_helper
+  template <class FullType>
+  static SingleDerivativeRangeType
+  single_derivative_helper_call(const FullType& val, const size_t row, const size_t col)
   {
-    template <class FullType>
-    static SingleType call(const FullType& val, const size_t row, const size_t col)
-    {
-      return val[row][col];
-    }
-  }; // struct single_evaluate_helper<...>
-
-  template <class SingleType, size_t _r, bool anything>
-  struct single_evaluate_helper<SingleType, _r, 1, anything>
-  {
-    template <class FullType>
-    static SingleType call(const FullType& val, const size_t row, const size_t /*col*/)
-    {
+    if constexpr (rC == 1) {
       return val[row];
-    }
-  }; // struct single_evaluate_helper<..., 1, ...>
-
-  template <class SingleType, size_t _r = BaseType::r, size_t _rC = BaseType::rC, bool anything = true>
-  struct single_derivative_helper
-  {
-    template <class FullType>
-    static SingleType call(const FullType& val, const size_t row, const size_t col)
-    {
-      SingleType ret;
+    } else {
+      SingleDerivativeRangeType ret;
       for (size_t dd = 0; dd < d; ++dd)
         ret[dd] = val[row][col][dd];
       return ret;
     }
-  }; // struct single_derivative_helper<...>
-
-  template <class SingleType, size_t _r, bool anything>
-  struct single_derivative_helper<SingleType, _r, 1, anything>
-  {
-    template <class FullType>
-    static SingleType call(const FullType& val, const size_t row, const size_t /*col*/)
-    {
-      return val[row];
-    }
-  }; // struct single_derivative_helper<..., 1, ...>
+  }
 }; // class ElementFunctionInterface
 
 
diff --git a/dune/xt/functions/interfaces/function.hh b/dune/xt/functions/interfaces/function.hh
index f46b78cf19c7bafe272e005c14f32ef87720fef8..3d552ac535528f7336b34acedc1ef9e94fda8616 100644
--- a/dune/xt/functions/interfaces/function.hh
+++ b/dune/xt/functions/interfaces/function.hh
@@ -182,8 +182,7 @@ public:
   }
 
   template <class OtherType>
-  typename std::enable_if<true, Functions::ProductFunction<ThisType, OtherType>>::type
-  operator*(const OtherType& other) const
+  auto operator*(const OtherType& other) const
   {
     return Functions::ProductFunction<ThisType, OtherType>(*this, other);
   }
diff --git a/dune/xt/functions/interfaces/grid-function.hh b/dune/xt/functions/interfaces/grid-function.hh
index 107b4bc01b958366dd1e704e4e04320ecda97e0a..8b7a8e851bc9ca08029fae322f396c24fd7566f4 100644
--- a/dune/xt/functions/interfaces/grid-function.hh
+++ b/dune/xt/functions/interfaces/grid-function.hh
@@ -264,14 +264,14 @@ public:
    *        visualization may thus be a refinement of the actual grid!
    */
   template <class GridViewType>
-  typename std::enable_if<Grid::is_view<GridViewType>::value, void>::type
-  visualize(const GridViewType& grid_view,
-            const std::string path,
-            const bool subsampling = true,
-            const VTK::OutputType vtk_output_type = VTK::appendedraw,
-            const XT::Common::Parameter& param = {},
-            const VisualizerInterface<r, rC, R>& visualizer = default_visualizer<r, rC, R>()) const
+  void visualize(const GridViewType& grid_view,
+                 const std::string path,
+                 const bool subsampling = true,
+                 const VTK::OutputType vtk_output_type = VTK::appendedraw,
+                 const XT::Common::Parameter& param = {},
+                 const VisualizerInterface<r, rC, R>& visualizer = default_visualizer<r, rC, R>()) const
   {
+    static_assert(Grid::is_view<GridViewType>::value);
     auto vtk_writer = create_vtkwriter(grid_view, subsampling);
     add_to_vtkwriter(*vtk_writer, param, visualizer);
     write_visualization(*vtk_writer, path, vtk_output_type);
@@ -283,33 +283,34 @@ public:
    * \note  Not yet implemented for vector-valued functions.
    */
   template <class GridViewType>
-  typename std::enable_if<Grid::is_view<GridViewType>::value, void>::type
-  visualize_gradient(const GridViewType& grid_view,
-                     const std::string path,
-                     const bool subsampling = true,
-                     const VTK::OutputType vtk_output_type = VTK::appendedraw,
-                     const XT::Common::Parameter& param = {},
-                     const VisualizerInterface<d, 1, R>& visualizer = default_visualizer<d, 1, R>()) const
+  void visualize_gradient(const GridViewType& grid_view,
+                          const std::string path,
+                          const bool subsampling = true,
+                          const VTK::OutputType vtk_output_type = VTK::appendedraw,
+                          const XT::Common::Parameter& param = {},
+                          const VisualizerInterface<d, 1, R>& visualizer = default_visualizer<d, 1, R>()) const
   {
+    static_assert(Grid::is_view<GridViewType>::value);
     auto vtk_writer = create_vtkwriter(grid_view, subsampling);
     add_gradient_to_vtkwriter(*vtk_writer, param, visualizer);
     write_visualization(*vtk_writer, path, vtk_output_type);
   } // ... visualize_gradient(...)
 
   template <class GridViewType>
-  typename std::enable_if<Grid::is_view<GridViewType>::value, std::unique_ptr<VTKWriter<GridViewType>>>::type
-  create_vtkwriter(const GridViewType& grid_view, const bool subsampling = true) const
+  std::unique_ptr<VTKWriter<GridViewType>> create_vtkwriter(const GridViewType& grid_view,
+                                                            const bool subsampling = true) const
   {
+    static_assert(Grid::is_view<GridViewType>::value);
     return subsampling ? std::make_unique<SubsamplingVTKWriter<GridViewType>>(grid_view, /*subsampling_level=*/2)
                        : std::make_unique<VTKWriter<GridViewType>>(grid_view, VTK::nonconforming);
   }
 
   template <class GridViewType>
-  typename std::enable_if<Grid::is_view<GridViewType>::value, void>::type
-  add_to_vtkwriter(VTKWriter<GridViewType>& vtk_writer,
-                   const XT::Common::Parameter& param = {},
-                   const VisualizerInterface<r, rC, R>& visualizer = default_visualizer<r, rC, R>()) const
+  void add_to_vtkwriter(VTKWriter<GridViewType>& vtk_writer,
+                        const XT::Common::Parameter& param = {},
+                        const VisualizerInterface<r, rC, R>& visualizer = default_visualizer<r, rC, R>()) const
   {
+    static_assert(Grid::is_view<GridViewType>::value);
     const auto adapter =
         std::make_shared<VisualizationAdapter<GridViewType, range_dim, range_dim_cols, RangeFieldType>>(
             *this, visualizer, "", param);
@@ -317,11 +318,11 @@ public:
   }
 
   template <class GridViewType>
-  typename std::enable_if<Grid::is_view<GridViewType>::value, void>::type
-  add_gradient_to_vtkwriter(VTKWriter<GridViewType>& vtk_writer,
-                            const XT::Common::Parameter& param = {},
-                            const VisualizerInterface<d, 1, R>& visualizer = default_visualizer<d, 1, R>()) const
+  void add_gradient_to_vtkwriter(VTKWriter<GridViewType>& vtk_writer,
+                                 const XT::Common::Parameter& param = {},
+                                 const VisualizerInterface<d, 1, R>& visualizer = default_visualizer<d, 1, R>()) const
   {
+    static_assert(Grid::is_view<GridViewType>::value);
     const auto adapter =
         std::make_shared<GradientVisualizationAdapter<GridViewType, range_dim, range_dim_cols, RangeFieldType>>(
             *this, visualizer, "", param);
@@ -329,11 +330,11 @@ public:
   }
 
   template <class GridViewType>
-  typename std::enable_if<Grid::is_view<GridViewType>::value, void>::type
-  write_visualization(VTKWriter<GridViewType>& vtk_writer,
-                      const std::string path,
-                      const VTK::OutputType vtk_output_type = VTK::appendedraw) const
+  auto write_visualization(VTKWriter<GridViewType>& vtk_writer,
+                           const std::string path,
+                           const VTK::OutputType vtk_output_type = VTK::appendedraw) const
   {
+    static_assert(Grid::is_view<GridViewType>::value);
     if (path.empty())
       DUNE_THROW(Exceptions::wrong_input_given, "path must not be empty!");
     const auto directory = Common::directory_only(path);
diff --git a/dune/xt/functions/inverse.hh b/dune/xt/functions/inverse.hh
index d8bd3e68d4e9f64fefabb5376e485a4a0f50b186..9b9a497957d7be26b60c607580ba2357ad741703 100644
--- a/dune/xt/functions/inverse.hh
+++ b/dune/xt/functions/inverse.hh
@@ -43,34 +43,18 @@ public:
   using DomainType = Dune::FieldVector<double, d>;
   using RangeReturnType = typename RangeTypeSelector<R, r, rC>::return_type;
 
-  template <size_t r_ = r, size_t rC_ = rC, bool anything = true>
-  struct dim_switch
-  {
-    static const constexpr bool available = false;
-  };
+public:
+  static const constexpr bool available = (FunctionType::rC == FunctionType::r);
 
-  template <bool anything>
-  struct dim_switch<1, 1, anything>
+  static RangeReturnType compute(const FunctionType& func, const DomainType& xx, const XT::Common::Parameter& param)
   {
-    static const constexpr bool available = true;
-
-    static RangeReturnType compute(const FunctionType& func, const DomainType& xx, const XT::Common::Parameter& param)
-    {
+    if constexpr (FunctionType::rC == 1 && FunctionType::r == 1) {
       auto value_to_invert = func.evaluate(xx, param)[0];
       DUNE_THROW_IF(XT::Common::FloatCmp::eq(value_to_invert, 0.),
                     Exceptions::wrong_input_given,
                     "Scalar function value was not invertible!\n\nvalue_to_invert = " << value_to_invert);
       return 1. / value_to_invert;
-    }
-  };
-
-  template <size_t r_, bool anything>
-  struct dim_switch<r_, r_, anything>
-  {
-    static const constexpr bool available = true;
-
-    static RangeReturnType compute(const FunctionType& func, const DomainType& xx, const XT::Common::Parameter& param)
-    {
+    } else if constexpr (available) {
       auto matrix_to_invert = func.evaluate(xx, param);
       RangeReturnType inverse_matrix;
       try {
@@ -81,15 +65,9 @@ public:
                        << matrix_to_invert << "\n\nThis was the original error: " << ee.what());
       }
       return inverse_matrix;
+    } else {
+      static_assert(AlwaysFalse<FunctionType>::value, "Not available for these dimensions!");
     }
-  };
-
-public:
-  static const constexpr bool available = dim_switch<>::available;
-
-  static RangeReturnType compute(const FunctionType& func, const DomainType& xx, const XT::Common::Parameter& param)
-  {
-    return dim_switch<>::compute(func, xx, param);
   }
 }; // class InverseFunctionHelper
 
@@ -260,29 +238,35 @@ private:
 
 
 template <class E, size_t r, size_t rC, class R>
-std::enable_if_t<internal::InverseFunctionHelper<ElementFunctionInterface<E, r, rC, R>>::available,
-                 InverseElementFunction<ElementFunctionInterface<E, r, rC, R>>>
-inverse(ElementFunctionInterface<E, r, rC, R>& func, const int order)
+auto inverse(ElementFunctionInterface<E, r, rC, R>& func, const int order)
 {
-  return InverseElementFunction<ElementFunctionInterface<E, r, rC, R>>(func, order);
+  if constexpr (internal::InverseFunctionHelper<ElementFunctionInterface<E, r, rC, R>>::available) {
+    return InverseElementFunction<ElementFunctionInterface<E, r, rC, R>>(func, order);
+  } else {
+    static_assert(AlwaysFalse<R>::value, "Not available for these dimensions!");
+  }
 }
 
 
 template <size_t d, size_t r, size_t rC, class R>
-std::enable_if_t<internal::InverseFunctionHelper<FunctionInterface<d, r, rC, R>>::available,
-                 InverseFunction<FunctionInterface<d, r, rC, R>>>
-inverse(const FunctionInterface<d, r, rC, R>& func, const int order)
+auto inverse(const FunctionInterface<d, r, rC, R>& func, const int order)
 {
-  return InverseFunction<FunctionInterface<d, r, rC, R>>(func, order);
+  if constexpr (internal::InverseFunctionHelper<FunctionInterface<d, r, rC, R>>::available) {
+    return InverseFunction<FunctionInterface<d, r, rC, R>>(func, order);
+  } else {
+    static_assert(AlwaysFalse<R>::value, "Not available for these dimensions!");
+  }
 }
 
 
 template <class E, size_t r, size_t rC, class R>
-std::enable_if_t<internal::InverseFunctionHelper<GridFunctionInterface<E, r, rC, R>>::available,
-                 InverseGridFunction<GridFunctionInterface<E, r, rC, R>>>
-inverse(const GridFunctionInterface<E, r, rC, R>& func, const int order)
+auto inverse(const GridFunctionInterface<E, r, rC, R>& func, const int order)
 {
-  return InverseGridFunction<GridFunctionInterface<E, r, rC, R>>(func, order);
+  if constexpr (internal::InverseFunctionHelper<GridFunctionInterface<E, r, rC, R>>::available) {
+    return InverseGridFunction<GridFunctionInterface<E, r, rC, R>>(func, order);
+  } else {
+    static_assert(AlwaysFalse<R>::value, "Not available for these dimensions!");
+  }
 }
 
 
diff --git a/dune/xt/grid/capabilities.hh b/dune/xt/grid/capabilities.hh
index 4cd440844437fbce887543be5cd9849ca0523129..2d14af335a19fb0d92af25de3b2c3086ce884fc6 100644
--- a/dune/xt/grid/capabilities.hh
+++ b/dune/xt/grid/capabilities.hh
@@ -21,7 +21,7 @@ namespace XT {
 namespace Grid {
 
 
-template <class G, bool anything = true>
+template <class G>
 struct has_boundary_id
 {
 #if DUNE_VERSION_GTE(DUNE_GRID, 2, 6)
@@ -38,8 +38,8 @@ struct has_boundary_id
 
 #if HAVE_DUNE_UGGRID || HAVE_UG
 
-template <int dim, bool anything>
-struct has_boundary_id<UGGrid<dim>, anything>
+template <int dim>
+struct has_boundary_id<UGGrid<dim>>
 {
   static const constexpr bool value = false;
 };
diff --git a/dune/xt/grid/gridprovider/cube.hh b/dune/xt/grid/gridprovider/cube.hh
index 64b1a1977e896bdf4b90778da8770094756884d1..773944f7da6d059142bcd540fe5dc93cc47ea014 100644
--- a/dune/xt/grid/gridprovider/cube.hh
+++ b/dune/xt/grid/gridprovider/cube.hh
@@ -217,7 +217,7 @@ public:
 
 
 template <class GridType>
-typename std::enable_if<is_grid<GridType>::value, GridProvider<GridType>>::type make_cube_grid(
+auto make_cube_grid(
     const FieldVector<typename GridType::ctype, GridType::dimension>& lower_left,
     const FieldVector<typename GridType::ctype, GridType::dimension>& upper_right,
     const std::array<unsigned int, GridType::dimension> num_elements =
@@ -228,33 +228,35 @@ typename std::enable_if<is_grid<GridType>::value, GridProvider<GridType>>::type
         cube_gridprovider_default_config().template get<std::array<unsigned int, GridType::dimension>>("overlap_size"),
     MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
 {
+  static_assert(is_grid<GridType>::value);
   return CubeGridProviderFactory<GridType>::create(
       lower_left, upper_right, num_elements, num_refinements, overlap_size, mpi_comm);
 }
 
 
 template <class GridType>
-typename std::enable_if<is_grid<GridType>::value, GridProvider<GridType>>::type
-make_cube_grid(const typename GridType::ctype& lower_left,
-               const typename GridType::ctype& upper_right,
-               const unsigned int num_elements =
-                   cube_gridprovider_default_config().template get<std::vector<unsigned int>>("num_elements").at(0),
-               const unsigned int num_refinements =
-                   cube_gridprovider_default_config().template get<unsigned int>("num_refinements"),
-               const unsigned int overlap_size =
-                   cube_gridprovider_default_config().template get<std::vector<unsigned int>>("overlap_size").at(0),
-               MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
+auto make_cube_grid(
+    const typename GridType::ctype& lower_left,
+    const typename GridType::ctype& upper_right,
+    const unsigned int num_elements =
+        cube_gridprovider_default_config().template get<std::vector<unsigned int>>("num_elements").at(0),
+    const unsigned int num_refinements =
+        cube_gridprovider_default_config().template get<unsigned int>("num_refinements"),
+    const unsigned int overlap_size =
+        cube_gridprovider_default_config().template get<std::vector<unsigned int>>("overlap_size").at(0),
+    MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
 {
+  static_assert(is_grid<GridType>::value);
   return CubeGridProviderFactory<GridType>::create(
       lower_left, upper_right, num_elements, num_refinements, overlap_size, mpi_comm);
 }
 
 
 template <class GridType>
-typename std::enable_if<is_grid<GridType>::value, GridProvider<GridType>>::type
-make_cube_grid(const Common::Configuration& cfg = cube_gridprovider_default_config(),
-               MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
+auto make_cube_grid(const Common::Configuration& cfg = cube_gridprovider_default_config(),
+                    MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
 {
+  static_assert(is_grid<GridType>::value);
   return CubeGridProviderFactory<GridType>::create(cfg, mpi_comm);
 }
 
diff --git a/dune/xt/grid/gridprovider/dgf.hh b/dune/xt/grid/gridprovider/dgf.hh
index f557135c2da28201991d4c4fd12cae94a92d1d6a..bf33cd46e19191298b18e9e6be6a49cb5ea3747e 100644
--- a/dune/xt/grid/gridprovider/dgf.hh
+++ b/dune/xt/grid/gridprovider/dgf.hh
@@ -81,18 +81,18 @@ public:
 
 
 template <class GridType>
-typename std::enable_if<is_grid<GridType>::value, GridProvider<GridType>>::type
-make_dgf_grid(const std::string& filename, MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
+auto make_dgf_grid(const std::string& filename, MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
 {
+  static_assert(is_grid<GridType>::value);
   return DgfGridProviderFactory<GridType>(filename, mpi_comm);
 }
 
 
 template <class GridType>
-typename std::enable_if<is_grid<GridType>::value, GridProvider<GridType>>::type
-make_dgf_grid(const Common::Configuration& cfg = DgfGridProviderFactory<GridType>::default_config(),
-              MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
+auto make_dgf_grid(const Common::Configuration& cfg = DgfGridProviderFactory<GridType>::default_config(),
+                   MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
 {
+  static_assert(is_grid<GridType>::value);
   return DgfGridProviderFactory<GridType>::create(cfg, mpi_comm);
 }
 
diff --git a/dune/xt/grid/gridprovider/gmsh.hh b/dune/xt/grid/gridprovider/gmsh.hh
index a9c2dd34a5e577c8335c9d62b1ff8cda5ac3dde0..9ab191860a92dfe4d72e59d30f4387b3063131af 100644
--- a/dune/xt/grid/gridprovider/gmsh.hh
+++ b/dune/xt/grid/gridprovider/gmsh.hh
@@ -114,18 +114,18 @@ public:
 
 
 template <class GridType>
-typename std::enable_if<is_grid<GridType>::value, GridProvider<GridType>>::type
-make_gmsh_grid(const std::string& filename, MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
+auto make_gmsh_grid(const std::string& filename, MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
 {
+  static_assert(is_grid<GridType>::value);
   return GmshGridProviderFactory<GridType>::create(filename, mpi_comm);
 }
 
 
 template <class GridType>
-typename std::enable_if<is_grid<GridType>::value, GridProvider<GridType>>::type
-make_gmsh_grid(const Common::Configuration& cfg = GmshGridProviderFactory<GridType>::default_config(),
-               MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
+auto make_gmsh_grid(const Common::Configuration& cfg = GmshGridProviderFactory<GridType>::default_config(),
+                    MPIHelper::MPICommunicator mpi_comm = MPIHelper::getCommunicator())
 {
+  static_assert(is_grid<GridType>::value);
   return GmshGridProviderFactory<GridType>::create(cfg, mpi_comm);
 }
 
diff --git a/dune/xt/grid/gridprovider/provider.hh b/dune/xt/grid/gridprovider/provider.hh
index 615f32733bcb4a20af4a3560924c96e81a756144..5c0f21c8d799542562998b8c820fb76f33dcfbe0 100644
--- a/dune/xt/grid/gridprovider/provider.hh
+++ b/dune/xt/grid/gridprovider/provider.hh
@@ -148,7 +148,7 @@ public:
   }
 
 private:
-  template <class G, bool anything = true>
+  template <class G>
   struct global_refine_helper
   {
     void operator()(G& g, int count)
@@ -162,8 +162,8 @@ private:
 
 #if HAVE_ALBERTA
 
-  template <int d, int dW, bool anything>
-  struct global_refine_helper<AlbertaGrid<d, dW>, anything>
+  template <int d, int dW>
+  struct global_refine_helper<AlbertaGrid<d, dW>>
   {
     typedef AlbertaGrid<d, dW> G;
 
diff --git a/dune/xt/grid/intersection.hh b/dune/xt/grid/intersection.hh
index 579e7a9cdce7236bfbde2d77a07c001852fcef62..295c79ab3116b2e2e1689a54436444f161f9b448 100644
--- a/dune/xt/grid/intersection.hh
+++ b/dune/xt/grid/intersection.hh
@@ -86,10 +86,9 @@ double diameter(const Intersection<G, I>& intersection)
  *        Returns true, if global_point and intersection coincide.
  */
 template <class G, class I, class D>
-typename std::enable_if<G::dimension == 1, bool>::type
-contains(const Dune::Intersection<G, I>& intersection,
-         const Dune::FieldVector<D, 1>& global_point,
-         const D& tolerance = Common::FloatCmp::DefaultEpsilon<D>::value())
+bool contains(const Dune::Intersection<G, I>& intersection,
+              const Dune::FieldVector<D, 1>& global_point,
+              const D& tolerance = Common::FloatCmp::DefaultEpsilon<D>::value())
 {
   return Common::FloatCmp::eq(intersection.geometry().center(), global_point, tolerance);
 }
@@ -101,10 +100,9 @@ contains(const Dune::Intersection<G, I>& intersection,
  *        Returns true if global_point lies on the line between the corners of intersection.
  */
 template <class G, class I, class D>
-typename std::enable_if<G::dimension == 2, bool>::type
-contains(const Dune::Intersection<G, I>& intersection,
-         const Dune::FieldVector<D, 2>& global_point,
-         const D& tolerance = Common::FloatCmp::DefaultEpsilon<D>::value())
+bool contains(const Dune::Intersection<G, I>& intersection,
+              const Dune::FieldVector<D, 2>& global_point,
+              const D& tolerance = Common::FloatCmp::DefaultEpsilon<D>::value())
 {
   const auto& geometry = intersection.geometry();
   // get the global coordinates of the intersections corners
@@ -140,10 +138,9 @@ contains(const Dune::Intersection<G, I>& intersection,
  * http://math.stackexchange.com/questions/684141/check-if-a-point-is-on-a-plane-minimize-the-use-of-multiplications-and-divisio
  */
 template <class G, class I, class D>
-typename std::enable_if<G::dimension == 3, bool>::type
-contains(const Dune::Intersection<G, I>& intersection,
-         const Dune::FieldVector<D, 3>& global_point,
-         const D& tolerance = Common::FloatCmp::DefaultEpsilon<D>::value())
+bool contains(const Dune::Intersection<G, I>& intersection,
+              const Dune::FieldVector<D, 3>& global_point,
+              const D& tolerance = Common::FloatCmp::DefaultEpsilon<D>::value())
 {
   const auto& geometry = intersection.geometry();
   // get the global coordinates of the intersections corners, there should be at least 3 (ignore the fourth if there is
diff --git a/dune/xt/grid/print.hh b/dune/xt/grid/print.hh
index 4805e130b24c82d16b4520b5efb5dc61ca3e71bf..e9b186159b0320b9deb6b4a2c401637f6ee2f716 100644
--- a/dune/xt/grid/print.hh
+++ b/dune/xt/grid/print.hh
@@ -21,9 +21,8 @@ namespace Common {
 
 
 /// \sa Common::Printer
-template <class G, class I, bool use_repr, typename anything>
-class Printer<Dune::Intersection<G, I>, use_repr, anything>
-  : public internal::DefaultPrinter<Dune::Intersection<G, I>, use_repr>
+template <class G, class I, bool use_repr>
+class Printer<Dune::Intersection<G, I>, use_repr> : public internal::DefaultPrinter<Dune::Intersection<G, I>, use_repr>
 {
 public:
   using T = Dune::Intersection<G, I>;
@@ -57,8 +56,8 @@ public:
 
 
 /// \sa Common::Printer
-template <int cd, int dim, class GridImp, template <int, int, class> class EntityImp, bool use_repr, typename anything>
-class Printer<Dune::Entity<cd, dim, GridImp, EntityImp>, use_repr, anything>
+template <int cd, int dim, class GridImp, template <int, int, class> class EntityImp, bool use_repr>
+class Printer<Dune::Entity<cd, dim, GridImp, EntityImp>, use_repr>
   : public internal::DefaultPrinter<Dune::Entity<cd, dim, GridImp, EntityImp>, use_repr>
 {
 public:
diff --git a/dune/xt/la/container/common/matrix/dense.hh b/dune/xt/la/container/common/matrix/dense.hh
index 24f0bb91d9e367b744b915bdf9fe9bdc21bfd5c1..2f5d6aac3c98221bf472621dcf0c2a35cb620542 100644
--- a/dune/xt/la/container/common/matrix/dense.hh
+++ b/dune/xt/la/container/common/matrix/dense.hh
@@ -325,36 +325,29 @@ public:
   }
 
   template <class OtherMatrixType>
-  std::enable_if_t<Common::is_matrix<OtherMatrixType>::value, void> axpy(const ScalarType& alpha,
-                                                                         const OtherMatrixType& xx)
+  void axpy(const ScalarType& alpha, const OtherMatrixType& xx)
   {
+    static_assert(Common::is_matrix<OtherMatrixType>::value);
     if (!has_equal_shape(xx))
       DUNE_THROW(Common::Exceptions::shapes_do_not_match,
                  "The shape of xx (" << xx.rows() << "x" << xx.cols() << ") does not match the shape of this ("
                                      << rows() << "x" << cols() << ")!");
     const internal::VectorLockGuard DUNE_UNUSED(guard)(*mutexes_);
-    axpy_impl(alpha, xx);
+    if constexpr (std::is_same<ThisType, OtherMatrixType>::value) {
+      for (size_t ii = 0; ii < backend_->entries_.size(); ++ii)
+        backend_->entries_[ii] += alpha * xx.backend_->entries_[ii];
+    } else {
+      for (size_t ii = 0; ii < rows(); ++ii)
+        for (size_t jj = 0; jj < cols(); ++jj)
+          backend_->get_entry_ref(ii, jj) += alpha * Common::MatrixAbstraction<OtherMatrixType>::get_entry(xx, ii, jj);
+    }
   } // ... axpy(...)
 
-private:
-  void axpy_impl(const ScalarType& alpha, const ThisType& xx)
-  {
-    for (size_t ii = 0; ii < backend_->entries_.size(); ++ii)
-      backend_->entries_[ii] += alpha * xx.backend_->entries_[ii];
-  }
-
-  template <class OtherMatrixType>
-  void axpy_impl(const ScalarType& alpha, const OtherMatrixType& xx)
-  {
-    for (size_t ii = 0; ii < rows(); ++ii)
-      for (size_t jj = 0; jj < cols(); ++jj)
-        backend_->get_entry_ref(ii, jj) += alpha * Common::MatrixAbstraction<OtherMatrixType>::get_entry(xx, ii, jj);
-  }
-
 public:
   template <class OtherMatrixType>
-  std::enable_if_t<Common::is_matrix<OtherMatrixType>::value, bool> has_equal_shape(const OtherMatrixType& other) const
+  bool has_equal_shape(const OtherMatrixType& other) const
   {
+    static_assert(Common::is_matrix<OtherMatrixType>::value);
     return (rows() == other.rows()) && (cols() == other.cols());
   }
 
diff --git a/dune/xt/la/container/common/matrix/sparse.hh b/dune/xt/la/container/common/matrix/sparse.hh
index 8867a707739ae2b0872124b7e8bb0181a59491c4..14fc0116ad45a325a4f1ee6fd790821e52c4d740 100644
--- a/dune/xt/la/container/common/matrix/sparse.hh
+++ b/dune/xt/la/container/common/matrix/sparse.hh
@@ -232,9 +232,9 @@ public:
   }
 
   template <class OtherMatrixImp>
-  typename std::enable_if_t<XT::Common::MatrixAbstraction<OtherMatrixImp>::is_matrix, ThisType>&
-  assign(const OtherMatrixImp& other, const SparsityPatternDefault& pattern)
+  ThisType& assign(const OtherMatrixImp& other, const SparsityPatternDefault& pattern)
   {
+    static_assert(XT::Common::MatrixAbstraction<OtherMatrixImp>::is_matrix);
     clear();
     using MatAbstrType = XT::Common::MatrixAbstraction<OtherMatrixImp>;
     num_rows_ = MatAbstrType::rows(other);
diff --git a/dune/xt/la/container/eigen/base.hh b/dune/xt/la/container/eigen/base.hh
index ba66d1068ee15f86013ae51d5a25793edb32bd92..bb6e703bef77e4dca53923aa142d0e4ac0b78185 100644
--- a/dune/xt/la/container/eigen/base.hh
+++ b/dune/xt/la/container/eigen/base.hh
@@ -128,28 +128,22 @@ public:
     backend() *= alpha;
   }
 
-  template <class T>
-  void axpy(const ScalarType& alpha, const EigenBaseVector<T, ScalarType>& xx)
-  {
-    if (xx.size() != size())
-      DUNE_THROW(Common::Exceptions::shapes_do_not_match,
-                 "The size of xx (" << xx.size() << ") does not match the size of this (" << size() << ")!");
-    const internal::VectorLockGuard DUNE_UNUSED(guard)(*mutexes_);
-    backend() += alpha * xx.backend();
-  } // ... axpy(...)
-
   template <class Vec>
-  std::enable_if_t<XT::Common::is_vector<Vec>::value
-                       && !std::is_base_of<EigenBaseVector<typename Vec::Traits, ScalarType>, Vec>::value,
-                   void>
-  axpy(const ScalarType& alpha, const Vec& xx)
+  void axpy(const ScalarType& alpha, const Vec& xx)
   {
     if (xx.size() != size())
       DUNE_THROW(Common::Exceptions::shapes_do_not_match,
                  "The size of xx (" << xx.size() << ") does not match the size of this (" << size() << ")!");
     const internal::VectorLockGuard DUNE_UNUSED(guard)(*mutexes_);
-    for (size_t ii = 0; ii < size(); ++ii)
-      set_entry(ii, get_entry(ii) + alpha * xx[ii]);
+    if constexpr (XT::Common::is_vector<Vec>::value
+                  && !std::is_base_of<EigenBaseVector<typename Vec::Traits, ScalarType>, Vec>::value) {
+      for (size_t ii = 0; ii < size(); ++ii)
+        set_entry(ii, get_entry(ii) + alpha * xx[ii]);
+    } else if constexpr (std::is_base_of<EigenBaseVector<typename Vec::Traits, ScalarType>, Vec>::value) {
+      backend() += alpha * xx.backend();
+    } else {
+      static_assert(AlwaysFalse<Vec>::value, "Not Implemented");
+    }
   } // ... axpy(...)
 
   bool has_equal_shape(const VectorImpType& other) const
diff --git a/dune/xt/la/container/matrix-interface.hh b/dune/xt/la/container/matrix-interface.hh
index d1d847e9fc04c96193b08f9b65b85c9efc6f2abf..a5c644d1a04d95ed21b17971ce5ebae66ccf4e53 100644
--- a/dune/xt/la/container/matrix-interface.hh
+++ b/dune/xt/la/container/matrix-interface.hh
@@ -17,6 +17,7 @@
 #include <limits>
 #include <iostream>
 #include <type_traits>
+#include <set>
 
 #include <dune/common/ftraits.hh>
 
diff --git a/dune/xt/la/container/vector-interface.hh b/dune/xt/la/container/vector-interface.hh
index 20862bbd5e33ffd9b6a9b579fb71630e777227f6..9fcf3d3731e49b1b2e4fb6cff6f710e0cd0ad773 100644
--- a/dune/xt/la/container/vector-interface.hh
+++ b/dune/xt/la/container/vector-interface.hh
@@ -205,12 +205,12 @@ public:
 
   virtual ScalarType min() const
   {
-    return complex_switch<>::min(this->as_imp());
+    return complex_switch::min(this->as_imp());
   }
 
   virtual ScalarType max() const
   {
-    return complex_switch<>::max(this->as_imp());
+    return complex_switch::max(this->as_imp());
   }
 
   virtual ScalarType mean() const
@@ -287,7 +287,7 @@ public:
    */
   virtual ScalarType dot(const derived_type& other) const
   {
-    return complex_switch<>::dot(this->as_imp(), other);
+    return complex_switch::dot(this->as_imp(), other);
   }
 
   /**
@@ -622,70 +622,59 @@ protected:
     return ret;
   }
 
-  template <bool is_complex = Common::is_complex<ScalarType>::value, bool anything = true>
   struct complex_switch
-  {
-    static ScalarType min(const derived_type& /*self*/)
-    {
-      DUNE_THROW(Exceptions::not_available, "For complex data types (implement this if you think otherwise)!");
-      return ScalarType();
-    }
-
-    static ScalarType max(const derived_type& /*self*/)
-    {
-      DUNE_THROW(Exceptions::not_available, "For complex data types (implement this if you think otherwise)!");
-      return ScalarType();
-    }
-
-    template <class T>
-    static ScalarType dot(const derived_type& self, const VectorInterface<T, ScalarType>& other)
-    {
-      using std::conj;
-      if (other.size() != self.size())
-        DUNE_THROW(Common::Exceptions::shapes_do_not_match,
-                   "The size of other (" << other.size() << ") does not match the size of this (" << self.size()
-                                         << ")!");
-      ScalarType result = 0;
-      for (size_t ii = 0; ii < self.size(); ++ii)
-        result += conj(self.get_unchecked_ref(ii)) * other.get_entry(ii);
-      return result;
-    }
-  }; // struct complex_switch<true, ...>
-
-  template <bool anything>
-  struct complex_switch<false, anything>
   {
     static ScalarType min(const derived_type& self)
     {
-      using std::min;
-      ScalarType ret = 0;
-      for (const auto& element : self)
-        ret = min(ret, element);
-      return ret;
+      if constexpr (!Common::is_complex<ScalarType>::value) {
+        using std::min;
+        ScalarType ret = 0;
+        for (const auto& element : self)
+          ret = min(ret, element);
+        return ret;
+      } else {
+        DUNE_THROW(Exceptions::not_available, "For complex data types (implement this if you think otherwise)!");
+      }
     }
 
     static ScalarType max(const derived_type& self)
     {
-      using std::max;
-      ScalarType ret = 0;
-      for (const auto& element : self)
-        ret = max(ret, element);
-      return ret;
+      if constexpr (!Common::is_complex<ScalarType>::value) {
+        using std::max;
+        ScalarType ret = 0;
+        for (const auto& element : self)
+          ret = max(ret, element);
+        return ret;
+      } else {
+        DUNE_THROW(Exceptions::not_available, "For complex data types (implement this if you think otherwise)!");
+      }
     }
 
     template <class T>
     static ScalarType dot(const derived_type& self, const VectorInterface<T, ScalarType>& other)
     {
-      if (other.size() != self.size())
-        DUNE_THROW(Common::Exceptions::shapes_do_not_match,
-                   "The size of other (" << other.size() << ") does not match the size of this (" << self.size()
-                                         << ")!");
-      ScalarType result = 0;
-      for (size_t ii = 0; ii < self.size(); ++ii)
-        result += self.get_unchecked_ref(ii) * other.get_entry(ii);
-      return result;
+      if constexpr (!Common::is_complex<ScalarType>::value) {
+        if (other.size() != self.size())
+          DUNE_THROW(Common::Exceptions::shapes_do_not_match,
+                     "The size of other (" << other.size() << ") does not match the size of this (" << self.size()
+                                           << ")!");
+        ScalarType result = 0;
+        for (size_t ii = 0; ii < self.size(); ++ii)
+          result += self.get_unchecked_ref(ii) * other.get_entry(ii);
+        return result;
+      } else {
+        using std::conj;
+        if (other.size() != self.size())
+          DUNE_THROW(Common::Exceptions::shapes_do_not_match,
+                     "The size of other (" << other.size() << ") does not match the size of this (" << self.size()
+                                           << ")!");
+        ScalarType result = 0;
+        for (size_t ii = 0; ii < self.size(); ++ii)
+          result += conj(self.get_unchecked_ref(ii)) * other.get_entry(ii);
+        return result;
+      }
     }
-  }; // struct complex_switch<false, ...>
+  }; // struct complex_switch
 
   template <class T, class S>
   friend std::ostream& operator<<(std::ostream& /*out*/, const VectorInterface<T, S>& /*vector*/);
diff --git a/dune/xt/la/eigen-solver/internal/base.hh b/dune/xt/la/eigen-solver/internal/base.hh
index 44f5ea17cc8b77d08b49c3eff7fbe9eb1a9fe053..1a84adda3fb2705c80996b6d1eb29ee1cef82ec5 100644
--- a/dune/xt/la/eigen-solver/internal/base.hh
+++ b/dune/xt/la/eigen-solver/internal/base.hh
@@ -241,7 +241,7 @@ public:
     if (!disable_checks_) {
       const double check_eigendecomposition = options_->get<double>("assert_eigendecomposition");
       if (check_eigendecomposition > 0)
-        complex_eigendecomposition_helper<>::check(
+        complex_eigendecomposition_check(
             *this, check_eigendecomposition > 0 ? check_eigendecomposition : options_->get<double>("real_tolerance"));
     }
     return *eigenvectors_inverse_;
@@ -407,7 +407,7 @@ protected:
         compute_real_eigenvectors();
       const double check_eigendecomposition = options_->get<double>("assert_eigendecomposition");
       if (check_eigendecomposition > 0)
-        complex_eigendecomposition_helper<>::check(*this, check_eigendecomposition);
+        complex_eigendecomposition_check(*this, check_eigendecomposition);
       if (check_real_eigendecomposition > 0) {
         invert_real_eigenvectors();
         assert_eigendecomposition(matrix_,
@@ -686,33 +686,22 @@ protected:
                                      << "\n\n(T * (lambda * T^-1)) - matrix = " << decomposition_error);
   } // ... assert_eigendecomposition(...)
 
-  template <bool upcast_required = !std::is_same<MatrixType, ComplexMatrixType>::value, bool anything = true>
-  struct complex_eigendecomposition_helper;
-
-  template <bool anything>
-  struct complex_eigendecomposition_helper<true, anything>
+  static void complex_eigendecomposition_check(const ThisType& self, const double& tolerance)
   {
-    static void check(const ThisType& self, const double& tolerance)
-    {
-      self.invert_eigenvectors();
+    const constexpr bool upcast_required = !std::is_same<MatrixType, ComplexMatrixType>::value;
+    self.invert_eigenvectors();
+    if constexpr (upcast_required) {
       self.assert_eigendecomposition(Dune::XT::LA::convert_to<ComplexMatrixType>(self.matrix_),
                                      *self.eigenvalues_,
                                      *self.eigenvectors_,
                                      *self.eigenvectors_inverse_,
                                      tolerance);
-    }
-  };
-
-  template <bool anything>
-  struct complex_eigendecomposition_helper<false, anything>
-  {
-    static void check(const ThisType& self, const double& tolerance)
-    {
-      self.invert_eigenvectors();
+    } else {
       self.assert_eigendecomposition(
           self.matrix_, *self.eigenvalues_, *self.eigenvectors_, *self.eigenvectors_inverse_, tolerance);
     }
-  };
+  }
+
 
   template <class M>
   void check_size(const MatrixInterface<M>& mat) const
diff --git a/dune/xt/la/eigen-solver/internal/lapacke.hh b/dune/xt/la/eigen-solver/internal/lapacke.hh
index 9e7d69d70584397e28f11f926643bd2b42f5369e..85dfd5817af30f083507f81974020a03df4096b2 100644
--- a/dune/xt/la/eigen-solver/internal/lapacke.hh
+++ b/dune/xt/la/eigen-solver/internal/lapacke.hh
@@ -338,50 +338,33 @@ struct lapack_helper
 {
   static_assert(Common::is_matrix<MatrixType>::value, "");
 
-  template <bool is_complex = Common::is_complex<typename Common::MatrixAbstraction<MatrixType>::S>::value,
-            bool anything = true>
-  struct dtype_switch;
-
-  template <bool anything>
-  struct dtype_switch<true, anything>
+  template <class MatrixImp>
+  static inline std::vector<std::complex<double>> eigenvalues(MatrixImp&& matrix)
   {
-    template <class MatrixImp>
-    static inline std::vector<std::complex<double>> eigenvalues(MatrixImp&& /*matrix*/)
-    {
+    if constexpr (Common::is_complex<typename Common::MatrixAbstraction<MatrixType>::S>::value) {
       static_assert(AlwaysFalse<MatrixImp>::value,
                     "Not yet implemented for complex matrices, take a look at "
                     "https://software.intel.com/en-us/mkl-developer-reference-c-geev "
                     "and add a corresponding free function like "
                     "compute_eigenvalues_of_a_real_matrix_using_lapack(...)!");
-      return std::vector<std::complex<double>>();
+    } else {
+      return compute_eigenvalues_of_a_real_matrix_using_lapack(std::forward<MatrixImp>(matrix));
     }
+  }
 
-    template <class V, class E, class MatrixImp>
-    static inline void eigenvectors(MatrixImp&& /*matrix*/, V& /*eigenvalues*/, E& /*eigenvectors*/)
-    {
+  template <class V, class E, class MatrixImp>
+  static inline void eigenvectors(MatrixImp&& matrix, V& eigenvalues, E& eigenvectors)
+  {
+    if constexpr (Common::is_complex<typename Common::MatrixAbstraction<MatrixType>::S>::value) {
       static_assert(AlwaysFalse<MatrixImp>::value,
                     "Not yet implemented for complex matrices, take a look at "
                     "https://software.intel.com/en-us/mkl-developer-reference-c-geev and add a corresponding free "
                     "function like compute_eigenvalues_and_right_eigenvectors_of_a_real_matrix_using_lapack(...)!");
-    }
-  };
-
-  template <bool anything>
-  struct dtype_switch<false, anything>
-  {
-    template <class MatrixImp>
-    static inline std::vector<std::complex<double>> eigenvalues(MatrixImp&& matrix)
-    {
-      return compute_eigenvalues_of_a_real_matrix_using_lapack(std::forward<MatrixImp>(matrix));
-    }
-
-    template <class V, class E, class MatrixImp>
-    static inline void eigenvectors(MatrixImp&& matrix, V& eigenvalues, E& eigenvectors)
-    {
+    } else {
       compute_eigenvalues_and_right_eigenvectors_of_a_real_matrix_using_lapack(
           std::forward<MatrixImp>(matrix), eigenvalues, eigenvectors);
     }
-  };
+  }
 }; // class lapack_helper
 
 
@@ -389,8 +372,7 @@ template <class MatrixType>
 typename std::enable_if<Common::is_matrix<std::decay_t<MatrixType>>::value, std::vector<std::complex<double>>>::type
 compute_eigenvalues_using_lapack(MatrixType&& matrix)
 {
-  return lapack_helper<std::decay_t<MatrixType>>::template dtype_switch<>::eigenvalues(
-      std::forward<MatrixType>(matrix));
+  return lapack_helper<std::decay_t<MatrixType>>::eigenvalues(std::forward<MatrixType>(matrix));
 }
 
 
@@ -402,7 +384,7 @@ compute_eigenvalues_and_right_eigenvectors_using_lapack(MatrixType&& matrix,
                                                         std::vector<std::complex<double>>& eigenvalues,
                                                         ComplexMatrixType& right_eigenvectors)
 {
-  lapack_helper<std::decay_t<MatrixType>>::template dtype_switch<>::eigenvectors(
+  lapack_helper<std::decay_t<MatrixType>>::eigenvectors(
       std::forward<MatrixType>(matrix), eigenvalues, right_eigenvectors);
 }
 
diff --git a/dune/xt/la/generalized-eigen-solver/internal/lapacke.hh b/dune/xt/la/generalized-eigen-solver/internal/lapacke.hh
index b38beb378badbe8ed6304311695d4fef3ad82659..95cf8dea8d8f587b4cf8c914c36fade8c4ee5a63 100644
--- a/dune/xt/la/generalized-eigen-solver/internal/lapacke.hh
+++ b/dune/xt/la/generalized-eigen-solver/internal/lapacke.hh
@@ -123,35 +123,20 @@ struct generalized_eigenvalues_lapack_helper
 {
   static_assert(Common::is_matrix<MatrixType>::value, "");
 
-  template <bool is_complex = Common::is_complex<typename Common::MatrixAbstraction<MatrixType>::S>::value,
-            bool anything = true>
-  struct dtype_switch;
-
-  template <bool anything>
-  struct dtype_switch<true, anything>
+  template <class MatrixImp>
+  static inline std::vector<std::complex<double>> eigenvalues(MatrixImp&& lhs_matrix, MatrixImp&& rhs_matrix)
   {
-    template <class MatrixImp>
-    static inline std::vector<std::complex<double>> eigenvalues(MatrixImp&& /*lhs_matrix*/, MatrixImp&& /*rhs_matrix*/)
-    {
+    if constexpr (Common::is_complex<typename Common::MatrixAbstraction<MatrixType>::S>::value) {
       static_assert(AlwaysFalse<MatrixImp>::value,
                     "Not yet implemented for complex matrices, take a look at "
                     "https://software.intel.com/en-us/mkl-developer-reference-c-sygv "
                     "and add a corresponding free function like "
                     "compute_generalized_eigenvalues_of_real_matrices_using_lapack(...)!");
-      return std::vector<std::complex<double>>();
-    }
-  };
-
-  template <bool anything>
-  struct dtype_switch<false, anything>
-  {
-    template <class MatrixImp>
-    static inline std::vector<std::complex<double>> eigenvalues(MatrixImp&& lhs_matrix, MatrixImp&& rhs_matrix)
-    {
+    } else {
       return compute_generalized_eigenvalues_of_real_matrices_using_lapack(std::forward<MatrixImp>(lhs_matrix),
                                                                            std::forward<MatrixImp>(rhs_matrix));
     }
-  };
+  }
 }; // class generalized_eigenvalues_lapack_helper
 
 
@@ -159,7 +144,7 @@ template <class MatrixType>
 typename std::enable_if<Common::is_matrix<std::decay_t<MatrixType>>::value, std::vector<std::complex<double>>>::type
 compute_generalized_eigenvalues_using_lapack(MatrixType&& lhs_matrix, MatrixType&& rhs_matrix)
 {
-  return generalized_eigenvalues_lapack_helper<std::decay_t<MatrixType>>::template dtype_switch<>::eigenvalues(
+  return generalized_eigenvalues_lapack_helper<std::decay_t<MatrixType>>::eigenvalues(
       std::forward<MatrixType>(lhs_matrix), std::forward<MatrixType>(rhs_matrix));
 }
 
diff --git a/dune/xt/la/matrix-inverter.hh b/dune/xt/la/matrix-inverter.hh
index c1a7eea8730092cd654deac5fa05513d925bbaaa..cf25d8519823ed55affa189abda0627465e28672 100644
--- a/dune/xt/la/matrix-inverter.hh
+++ b/dune/xt/la/matrix-inverter.hh
@@ -111,17 +111,17 @@ MatrixInverter<M> make_matrix_inverter(const M& matrix, const Common::Configurat
 
 
 template <class M>
-typename std::enable_if<is_matrix<M>::value || Common::is_matrix<M>::value, M>::type
-invert_matrix(const M& matrix, const std::string& inversion_type = "")
+auto invert_matrix(const M& matrix, const std::string& inversion_type = "")
 {
+  static_assert(is_matrix<M>::value || Common::is_matrix<M>::value);
   return MatrixInverter<M>(matrix, inversion_type).inverse();
 }
 
 
 template <class M>
-typename std::enable_if<is_matrix<M>::value || Common::is_matrix<M>::value, M>::type
-invert_matrix(const M& matrix, const Common::Configuration& inversion_options)
+auto invert_matrix(const M& matrix, const Common::Configuration& inversion_options)
 {
+  static_assert(is_matrix<M>::value || Common::is_matrix<M>::value);
   return MatrixInverter<M>(matrix, inversion_options).inverse();
 }
 
diff --git a/dune/xt/la/solver.hh b/dune/xt/la/solver.hh
index dd96d990bc728581602ca313dd3eebe8c405c828..37714483b934f0876c64cca1adc84c8dd6263693 100644
--- a/dune/xt/la/solver.hh
+++ b/dune/xt/la/solver.hh
@@ -141,17 +141,17 @@ public:
 
 
 template <class M>
-typename std::enable_if<is_matrix<M>::value || XT::Common::is_matrix<M>::value, Solver<M>>::type
-make_solver(const M& matrix)
+auto make_solver(const M& matrix)
 {
+  static_assert(is_matrix<M>::value || XT::Common::is_matrix<M>::value);
   return Solver<M>(matrix);
 }
 
 
 template <class M, class V, class... Args>
-typename std::enable_if<is_matrix<M>::value && is_vector<V>::value, void>::type
-solve(const M& A, const V& b, V& x, Args&&... args)
+void solve(const M& A, const V& b, V& x, Args&&... args)
 {
+  static_assert(is_matrix<M>::value && is_vector<V>::value);
   make_solver(A).apply(b, x, std::forward<Args>(args)...);
 }
 
diff --git a/dune/xt/test/grid/dd_glued.hh b/dune/xt/test/grid/dd_glued.hh
index 6ee39f21fac265188868639346783c55771b5d19..66e2f4418ace0bf8c40a3e05dd42c8692074ed42 100644
--- a/dune/xt/test/grid/dd_glued.hh
+++ b/dune/xt/test/grid/dd_glued.hh
@@ -81,7 +81,7 @@ namespace XT {
 namespace Grid {
 
 
-template <class M, class L, bool anything = true>
+template <class M, class L>
 struct ExpectedResults
 {
   static_assert(AlwaysFalse<M>::value, "Please add me for this grid!");
diff --git a/dune/xt/test/grid/dd_glued_2d.cc b/dune/xt/test/grid/dd_glued_2d.cc
index 81ed00ad9e2a7cf8a60aabf97abd9006d3f91eb3..39139797090ea9561f17ad8015154110e7792715 100644
--- a/dune/xt/test/grid/dd_glued_2d.cc
+++ b/dune/xt/test/grid/dd_glued_2d.cc
@@ -20,10 +20,9 @@ namespace XT {
 namespace Grid {
 
 
-template <bool anything>
+template <>
 struct ExpectedResults<YaspGrid<2, EquidistantOffsetCoordinates<double, 2>>,
-                       YaspGrid<2, EquidistantOffsetCoordinates<double, 2>>,
-                       anything>
+                       YaspGrid<2, EquidistantOffsetCoordinates<double, 2>>>
 {
   static int num_coarse_refinements()
   {
@@ -49,8 +48,8 @@ struct ExpectedResults<YaspGrid<2, EquidistantOffsetCoordinates<double, 2>>,
 
 #  if HAVE_DUNE_UGGRID || HAVE_UG
 
-template <bool anything>
-struct ExpectedResults<YaspGrid<2, EquidistantOffsetCoordinates<double, 2>>, UGGrid<2>, anything>
+template <>
+struct ExpectedResults<YaspGrid<2, EquidistantOffsetCoordinates<double, 2>>, UGGrid<2>>
 {
   static int num_coarse_refinements()
   {
@@ -76,8 +75,8 @@ struct ExpectedResults<YaspGrid<2, EquidistantOffsetCoordinates<double, 2>>, UGG
 #  endif // HAVE_DUNE_UGGRID || HAVE_UG
 #  if HAVE_ALBERTA
 
-template <bool anything>
-struct ExpectedResults<YaspGrid<2, EquidistantOffsetCoordinates<double, 2>>, AlbertaGrid<2, 2>, anything>
+template <>
+struct ExpectedResults<YaspGrid<2, EquidistantOffsetCoordinates<double, 2>>, AlbertaGrid<2, 2>>
 {
   static int num_coarse_refinements()
   {
diff --git a/dune/xt/test/grid/dd_glued_3d.cc b/dune/xt/test/grid/dd_glued_3d.cc
index eda7d9e528b2bb73ef86268a2457372cf0308a3c..d8af68590cffb81e722d46f8d647f4d8a5b952fe 100644
--- a/dune/xt/test/grid/dd_glued_3d.cc
+++ b/dune/xt/test/grid/dd_glued_3d.cc
@@ -20,10 +20,9 @@ namespace XT {
 namespace Grid {
 
 
-template <bool anything>
+template <>
 struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>,
-                       YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>,
-                       anything>
+                       YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>>
 {
   static int num_coarse_refinements()
   {
@@ -50,10 +49,8 @@ struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>,
 
 #  if HAVE_DUNE_ALUGRID
 
-template <class Comm, bool anything>
-struct ExpectedResults<ALUGrid<3, 3, cube, nonconforming, Comm>,
-                       YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>,
-                       anything>
+template <class Comm>
+struct ExpectedResults<ALUGrid<3, 3, cube, nonconforming, Comm>, YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>>
 {
   static int num_coarse_refinements()
   {
@@ -77,10 +74,8 @@ struct ExpectedResults<ALUGrid<3, 3, cube, nonconforming, Comm>,
 }; // struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>, ALUGrid<3, 3, simplex, nonconforming,
 // Comm>, anything>
 
-template <class Comm, bool anything>
-struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>,
-                       ALUGrid<3, 3, cube, nonconforming, Comm>,
-                       anything>
+template <class Comm>
+struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>, ALUGrid<3, 3, cube, nonconforming, Comm>>
 {
   static int num_coarse_refinements()
   {
@@ -107,8 +102,8 @@ struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>,
 #  endif // HAVE_DUNE_ALUGRID
 #  if HAVE_DUNE_UGGRID || HAVE_UG
 
-template <bool anything>
-struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>, UGGrid<3>, anything>
+template <>
+struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>, UGGrid<3>>
 {
   static int num_coarse_refinements()
   {
@@ -135,8 +130,8 @@ struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>, UGG
 #  endif // HAVE_DUNE_UGGRID || HAVE_UG
 #  if HAVE_ALBERTA
 
-template <bool anything>
-struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>, AlbertaGrid<3, 3>, anything>
+template <>
+struct ExpectedResults<YaspGrid<3, EquidistantOffsetCoordinates<double, 3>>, AlbertaGrid<3, 3>>
 {
   static int num_coarse_refinements()
   {
diff --git a/pybind11/.travis.yml b/pybind11/.travis.yml
index 4cc5cf07c04abdd3bc2944f88113295392ed4219..d81cd8c7b812387ede562636269ce6d07e25d795 100644
--- a/pybind11/.travis.yml
+++ b/pybind11/.travis.yml
@@ -16,7 +16,8 @@ matrix:
     - PY_CMD=python3
     - $PY_CMD -m pip install --user --upgrade pip wheel setuptools
     install:
-    - $PY_CMD -m pip install --user --upgrade sphinx sphinx_rtd_theme breathe flake8 pep8-naming pytest
+    # breathe 4.14 doesn't work with bit fields. See https://github.com/michaeljones/breathe/issues/462
+    - $PY_CMD -m pip install --user --upgrade sphinx sphinx_rtd_theme breathe==4.13.1 flake8 pep8-naming pytest
     - curl -fsSL https://sourceforge.net/projects/doxygen/files/rel-1.8.15/doxygen-1.8.15.linux.bin.tar.gz/download | tar xz
     - export PATH="$PWD/doxygen-1.8.15/bin:$PATH"
     script:
@@ -32,8 +33,7 @@ matrix:
     - |
       # Barebones build
       cmake -DCMAKE_BUILD_TYPE=Debug -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -DPYTHON_EXECUTABLE=$(which $PY_CMD) .
-      make pytest -j 2
-      make cpptest -j 2
+      make pytest -j 2 && make cpptest -j 2
   # The following are regular test configurations, including optional dependencies.
   # With regard to each other they differ in Python version, C++ standard and compiler.
   - os: linux
@@ -61,7 +61,7 @@ matrix:
   - os: linux
     dist: trusty
     env: PYTHON=2.7 CPP=14 GCC=6 CMAKE=1
-    name: Python 2.7, c++14, gcc 4.8, CMake test
+    name: Python 2.7, c++14, gcc 6, CMake test
     addons:
       apt:
         sources:
@@ -106,13 +106,39 @@ matrix:
           - lld-7
           - libc++-7-dev
           - libc++abi-7-dev  # Why is this necessary???
+  - os: linux
+    dist: xenial
+    env: PYTHON=3.8 CPP=17 GCC=7
+    name: Python 3.8, c++17, gcc 7 (w/o numpy/scipy) # TODO: update build name when the numpy/scipy wheels become available
+    addons:
+      apt:
+        sources:
+          - deadsnakes
+          - ubuntu-toolchain-r-test
+        packages:
+          - g++-7
+          - python3.8-dev
+          - python3.8-venv
+    # Currently there is no numpy/scipy wheels available for python3.8
+    # TODO: remove next before_install, install and script clause when the wheels become available
+    before_install:
+      - pyenv global $(pyenv whence 2to3)  # activate all python versions
+      - PY_CMD=python3
+      - $PY_CMD -m pip install --user --upgrade pip wheel setuptools
+    install:
+      - $PY_CMD -m pip install --user --upgrade pytest
+    script:
+      - |
+        # Barebones build
+        cmake -DCMAKE_BUILD_TYPE=Debug -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -DPYTHON_EXECUTABLE=$(which $PY_CMD) .
+        make pytest -j 2 && make cpptest -j 2
   - os: osx
     name: Python 2.7, c++14, AppleClang 7.3, CMake test
     osx_image: xcode7.3
     env: PYTHON=2.7 CPP=14 CLANG CMAKE=1
   - os: osx
     name: Python 3.7, c++14, AppleClang 9, Debug build
-    osx_image: xcode9
+    osx_image: xcode9.4
     env: PYTHON=3.7 CPP=14 CLANG DEBUG=1
   # Test a PyPy 2.7 build
   - os: linux
@@ -130,7 +156,7 @@ matrix:
     dist: trusty
     services: docker
     env: DOCKER=i386/debian:stretch PYTHON=3.5 CPP=14 GCC=6 INSTALL=1
-    name: Python 3.4, c++14, gcc 6, 32-bit
+    name: Python 3.5, c++14, gcc 6, 32-bit
     script:
       - |
         # Consolidated 32-bit Docker Build + Install
@@ -193,7 +219,7 @@ before_install:
       PY_CMD=python$PYTHON
       if [ "$TRAVIS_OS_NAME" = "osx" ]; then
         if [ "$PY" = "3" ]; then
-          brew update && brew upgrade python
+          brew update && brew unlink python@2 && brew upgrade python
         else
           curl -fsSL https://bootstrap.pypa.io/get-pip.py | $PY_CMD - --user
         fi
diff --git a/pybind11/CMakeLists.txt b/pybind11/CMakeLists.txt
index 493724613188e7456f15eb9b319fe3a172475986..2ad229fb419708f9f38f3d3da691d73337803352 100644
--- a/pybind11/CMakeLists.txt
+++ b/pybind11/CMakeLists.txt
@@ -107,7 +107,7 @@ if(NOT (CMAKE_VERSION VERSION_LESS 3.0)) # CMake >= 3.0 Build an interface libra
   add_library(module INTERFACE)
   add_library(pybind11::module ALIAS module)
   if(NOT MSVC)
-    target_compile_options(module PRIVATE -fvisibility=hidden)
+    target_compile_options(module INTERFACE -fvisibility=hidden)
   endif()
   target_link_libraries(module INTERFACE pybind11::pybind11)
   if(WIN32 OR CYGWIN)
diff --git a/pybind11/docs/advanced/cast/chrono.rst b/pybind11/docs/advanced/cast/chrono.rst
index 8c6b3d7e59098724533402847e902aa9317fa8ee..fbd46057aa392c86ae3747c2b21768367205ea49 100644
--- a/pybind11/docs/advanced/cast/chrono.rst
+++ b/pybind11/docs/advanced/cast/chrono.rst
@@ -59,7 +59,7 @@ Provided conversions
 
 .. rubric:: Python to C++
 
-- ``datetime.datetime`` → ``std::chrono::system_clock::time_point``
+- ``datetime.datetime`` or ``datetime.date`` or ``datetime.time`` → ``std::chrono::system_clock::time_point``
     Date/time objects are converted into system clock timepoints. Any
     timezone information is ignored and the type is treated as a naive
     object.
diff --git a/pybind11/docs/advanced/classes.rst b/pybind11/docs/advanced/classes.rst
index c9a0da5a12078eed1bf3b6e67241d1dd46c23023..ae5907deed8a78d1a13c78c24959c0852a2662f2 100644
--- a/pybind11/docs/advanced/classes.rst
+++ b/pybind11/docs/advanced/classes.rst
@@ -662,6 +662,7 @@ to Python.
             .def(py::self *= float())
             .def(float() * py::self)
             .def(py::self * float())
+            .def(-py::self)
             .def("__repr__", &Vector2::toString);
     }
 
diff --git a/pybind11/docs/advanced/exceptions.rst b/pybind11/docs/advanced/exceptions.rst
index 75ac24ae9a524b67cbe36660377a4ef7ee825dc6..75ad7f7f4a527a93a41eb0785e2beb8419208014 100644
--- a/pybind11/docs/advanced/exceptions.rst
+++ b/pybind11/docs/advanced/exceptions.rst
@@ -28,6 +28,8 @@ exceptions:
 +--------------------------------------+--------------------------------------+
 | :class:`std::range_error`            | ``ValueError``                       |
 +--------------------------------------+--------------------------------------+
+| :class:`std::overflow_error`         | ``OverflowError``                    |
++--------------------------------------+--------------------------------------+
 | :class:`pybind11::stop_iteration`    | ``StopIteration`` (used to implement |
 |                                      | custom iterators)                    |
 +--------------------------------------+--------------------------------------+
diff --git a/pybind11/docs/basics.rst b/pybind11/docs/basics.rst
index 447250ed9e88dbe09cc7d06329e021761103b038..7bf4d426d39db811a10f0cdfcda950d5e5fcbbf8 100644
--- a/pybind11/docs/basics.rst
+++ b/pybind11/docs/basics.rst
@@ -164,7 +164,7 @@ load and execute the example:
 Keyword arguments
 =================
 
-With a simple modification code, it is possible to inform Python about the
+With a simple code modification, it is possible to inform Python about the
 names of the arguments ("i" and "j" in this case).
 
 .. code-block:: cpp
diff --git a/pybind11/docs/changelog.rst b/pybind11/docs/changelog.rst
index f99d3516a90122e5e70242e156b9ad82ffd30461..2def2b071933d7abe76d2154cf60c15c1df4fd9d 100644
--- a/pybind11/docs/changelog.rst
+++ b/pybind11/docs/changelog.rst
@@ -6,11 +6,131 @@ Changelog
 Starting with version 1.8.0, pybind11 releases use a `semantic versioning
 <http://semver.org>`_ policy.
 
+v2.5.0 (Mar 31, 2020)
+-----------------------------------------------------
+
+* Use C++17 fold expressions in type casters, if available. This can
+  improve performance during overload resolution when functions have
+  multiple arguments.
+  `#2043 <https://github.com/pybind/pybind11/pull/2043>`_.
+
+* Changed include directory resolution in ``pybind11/__init__.py``
+  and installation in ``setup.py``. This fixes a number of open issues
+  where pybind11 headers could not be found in certain environments.
+  `#1995 <https://github.com/pybind/pybind11/pull/1995>`_.
+
+* C++20 ``char8_t`` and ``u8string`` support. `#2026
+  <https://github.com/pybind/pybind11/pull/2026>`_.
+
+* CMake: search for Python 3.9. `bb9c91
+  <https://github.com/pybind/pybind11/commit/bb9c91>`_.
+
+* Fixes for MSYS-based build environments.
+  `#2087 <https://github.com/pybind/pybind11/pull/2087>`_,
+  `#2053 <https://github.com/pybind/pybind11/pull/2053>`_.
+
+* STL bindings for ``std::vector<...>::clear``. `#2074
+  <https://github.com/pybind/pybind11/pull/2074>`_.
+
+* Read-only flag for ``py::buffer``. `#1466
+  <https://github.com/pybind/pybind11/pull/1466>`_.
+
+* Exception handling during module initialization.
+  `bf2b031 <https://github.com/pybind/pybind11/commit/bf2b031>`_.
+
+* Support linking against a CPython debug build.
+  `#2025 <https://github.com/pybind/pybind11/pull/2025>`_.
+
+* Fixed issues involving the availability and use of aligned ``new`` and
+  ``delete``. `#1988 <https://github.com/pybind/pybind11/pull/1988>`_,
+  `759221 <https://github.com/pybind/pybind11/commit/759221>`_.
+
+* Fixed a resource leak upon interpreter shutdown.
+  `#2020 <https://github.com/pybind/pybind11/pull/2020>`_.
 
-v2.3.1 (Not yet released)
+* Fixed error handling in the boolean caster.
+  `#1976 <https://github.com/pybind/pybind11/pull/1976>`_.
+
+v2.4.3 (Oct 15, 2019)
+-----------------------------------------------------
+
+* Adapt pybind11 to a C API convention change in Python 3.8. `#1950
+  <https://github.com/pybind/pybind11/pull/1950>`_.
+
+v2.4.2 (Sep 21, 2019)
+-----------------------------------------------------
+
+* Replaced usage of a C++14 only construct. `#1929
+  <https://github.com/pybind/pybind11/pull/1929>`_.
+
+* Made an ifdef future-proof for Python >= 4. `f3109d
+  <https://github.com/pybind/pybind11/commit/f3109d>`_.
+
+v2.4.1 (Sep 20, 2019)
+-----------------------------------------------------
+
+* Fixed a problem involving implicit conversion from enumerations to integers
+  on Python 3.8. `#1780 <https://github.com/pybind/pybind11/pull/1780>`_.
+
+v2.4.0 (Sep 19, 2019)
 -----------------------------------------------------
 
-* TBA
+* Try harder to keep pybind11-internal data structures separate when there
+  are potential ABI incompatibilities. Fixes crashes that occurred when loading
+  multiple pybind11 extensions that were e.g. compiled by GCC (libstdc++)
+  and Clang (libc++).
+  `#1588 <https://github.com/pybind/pybind11/pull/1588>`_ and
+  `c9f5a <https://github.com/pybind/pybind11/commit/c9f5a>`_.
+
+* Added support for ``__await__``, ``__aiter__``, and ``__anext__`` protocols.
+  `#1842 <https://github.com/pybind/pybind11/pull/1842>`_.
+
+* ``pybind11_add_module()``: don't strip symbols when compiling in
+  ``RelWithDebInfo`` mode. `#1980
+  <https://github.com/pybind/pybind11/pull/1980>`_.
+
+* ``enum_``: Reproduce Python behavior when comparing against invalid values
+  (e.g. ``None``, strings, etc.). Add back support for ``__invert__()``.
+  `#1912 <https://github.com/pybind/pybind11/pull/1912>`_,
+  `#1907 <https://github.com/pybind/pybind11/pull/1907>`_.
+
+* List insertion operation for ``py::list``.
+  Added ``.empty()`` to all collection types.
+  Added ``py::set::contains()`` and ``py::dict::contains()``.
+  `#1887 <https://github.com/pybind/pybind11/pull/1887>`_,
+  `#1884 <https://github.com/pybind/pybind11/pull/1884>`_,
+  `#1888 <https://github.com/pybind/pybind11/pull/1888>`_.
+
+* ``py::details::overload_cast_impl`` is available in C++11 mode, can be used
+  like ``overload_cast`` with an additional set of parantheses.
+  `#1581 <https://github.com/pybind/pybind11/pull/1581>`_.
+
+* Fixed ``get_include()`` on Conda.
+  `#1877 <https://github.com/pybind/pybind11/pull/1877>`_.
+
+* ``stl_bind.h``: negative indexing support.
+  `#1882 <https://github.com/pybind/pybind11/pull/1882>`_.
+
+* Minor CMake fix to add MinGW compatibility.
+  `#1851 <https://github.com/pybind/pybind11/pull/1851>`_.
+
+* GIL-related fixes.
+  `#1836 <https://github.com/pybind/pybind11/pull/1836>`_,
+  `8b90b <https://github.com/pybind/pybind11/commit/8b90b>`_.
+
+* Other very minor/subtle fixes and improvements.
+  `#1329 <https://github.com/pybind/pybind11/pull/1329>`_,
+  `#1910 <https://github.com/pybind/pybind11/pull/1910>`_,
+  `#1863 <https://github.com/pybind/pybind11/pull/1863>`_,
+  `#1847 <https://github.com/pybind/pybind11/pull/1847>`_,
+  `#1890 <https://github.com/pybind/pybind11/pull/1890>`_,
+  `#1860 <https://github.com/pybind/pybind11/pull/1860>`_,
+  `#1848 <https://github.com/pybind/pybind11/pull/1848>`_,
+  `#1821 <https://github.com/pybind/pybind11/pull/1821>`_,
+  `#1837 <https://github.com/pybind/pybind11/pull/1837>`_,
+  `#1833 <https://github.com/pybind/pybind11/pull/1833>`_,
+  `#1748 <https://github.com/pybind/pybind11/pull/1748>`_,
+  `#1852 <https://github.com/pybind/pybind11/pull/1852>`_.
 
 v2.3.0 (June 11, 2019)
 -----------------------------------------------------
@@ -105,7 +225,6 @@ v2.3.0 (June 11, 2019)
   `#1744 <https://github.com/pybind/pybind11/pull/1744>`_,
   `#1670 <https://github.com/pybind/pybind11/pull/1670>`_.
 
-
 v2.2.4 (September 11, 2018)
 -----------------------------------------------------
 
diff --git a/pybind11/docs/classes.rst b/pybind11/docs/classes.rst
index 1deec9b537fbae32145e5366b4b92d71389b054d..a63f6a1969222d0348282bcea86c9c71ce92d5b7 100644
--- a/pybind11/docs/classes.rst
+++ b/pybind11/docs/classes.rst
@@ -422,6 +422,17 @@ on constness, the ``py::const_`` tag should be used:
        .def("foo_mutable", py::overload_cast<int, float>(&Widget::foo))
        .def("foo_const",   py::overload_cast<int, float>(&Widget::foo, py::const_));
 
+If you prefer the ``py::overload_cast`` syntax but have a C++11 compatible compiler only,
+you can use ``py::detail::overload_cast_impl`` with an additional set of parentheses:
+
+.. code-block:: cpp
+
+    template <typename... Args>
+    using overload_cast_ = pybind11::detail::overload_cast_impl<Args...>;
+
+    py::class_<Pet>(m, "Pet")
+        .def("set", overload_cast_<int>()(&Pet::set), "Set the pet's age")
+        .def("set", overload_cast_<const std::string &>()(&Pet::set), "Set the pet's name");
 
 .. [#cpp14] A compiler which supports the ``-std=c++14`` flag
             or Visual Studio 2015 Update 2 and newer.
diff --git a/pybind11/docs/conf.py b/pybind11/docs/conf.py
index 54879dd357e27ec3f96ce6ba7d4051db4dd3169c..0be00505822464454335cc1185d4d4d2a3f8ffb6 100644
--- a/pybind11/docs/conf.py
+++ b/pybind11/docs/conf.py
@@ -61,9 +61,9 @@ author = 'Wenzel Jakob'
 # built documents.
 #
 # The short X.Y version.
-version = '2.3'
+version = '2.5'
 # The full version, including alpha/beta/rc tags.
-release = '2.3.dev1'
+release = '2.5.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/pybind11/docs/faq.rst b/pybind11/docs/faq.rst
index 93ccf10e57c16d07e585331d9839d43b4b5ddc39..4d491fb87f7f94b807a5f87cee625c2be3801062 100644
--- a/pybind11/docs/faq.rst
+++ b/pybind11/docs/faq.rst
@@ -248,6 +248,33 @@ that that were ``malloc()``-ed in another shared library, using data
 structures with incompatible ABIs, and so on. pybind11 is very careful not
 to make these types of mistakes.
 
+How can I properly handle Ctrl-C in long-running functions?
+===========================================================
+
+Ctrl-C is received by the Python interpreter, and holds it until the GIL
+is released, so a long-running function won't be interrupted.
+
+To interrupt from inside your function, you can use the ``PyErr_CheckSignals()``
+function, that will tell if a signal has been raised on the Python side.  This
+function merely checks a flag, so its impact is negligible. When a signal has
+been received, you must either explicitly interrupt execution by throwing
+``py::error_already_set`` (which will propagate the existing
+``KeyboardInterrupt``), or clear the error (which you usually will not want):
+
+.. code-block:: cpp
+
+    PYBIND11_MODULE(example, m)
+    {
+        m.def("long running_func", []()
+        {
+            for (;;) {
+                if (PyErr_CheckSignals() != 0)
+                    throw py::error_already_set();
+                // Long running iteration
+            }
+        });
+    }
+
 Inconsistent detection of Python version in CMake and pybind11
 ==============================================================
 
diff --git a/pybind11/docs/release.rst b/pybind11/docs/release.rst
index b31bbe97ebbcb72b327ff14b2fce818494e6331e..9846f971a6ff88e40ceeaf16e14227ba3b6ae63c 100644
--- a/pybind11/docs/release.rst
+++ b/pybind11/docs/release.rst
@@ -13,10 +13,6 @@ To release a new version of pybind11:
     - ``git push --tags``.
     - ``python setup.py sdist upload``.
     - ``python setup.py bdist_wheel upload``.
-- Update conda-forge (https://github.com/conda-forge/pybind11-feedstock) via PR
-    - download release package from Github: ``wget https://github.com/pybind/pybind11/archive/vX.Y.Z.tar.gz``
-    - compute checksum: ``shasum -a 256  vX.Y.Z.tar.gz``
-    - change version number and checksum in ``recipe/meta.yml``
 - Get back to work
     - Update ``_version.py`` (add 'dev' and increment minor).
     - Update version in ``docs/conf.py``
diff --git a/pybind11/include/pybind11/buffer_info.h b/pybind11/include/pybind11/buffer_info.h
index ef29e97227748db277269df3da66e990f653805b..cd436083c3c8eb66dbff3e2979ad7eae75e194f5 100644
--- a/pybind11/include/pybind11/buffer_info.h
+++ b/pybind11/include/pybind11/buffer_info.h
@@ -22,7 +22,8 @@ struct buffer_info
   std::string format; // For homogeneous buffers, this should be set to format_descriptor<T>::format()
   ssize_t ndim = 0; // Number of dimensions
   std::vector<ssize_t> shape; // Shape of the tensor (1 entry per dimension)
-  std::vector<ssize_t> strides; // Number of entries between adjacent entries (for each per dimension)
+  std::vector<ssize_t> strides; // Number of bytes between adjacent entries (for each per dimension)
+  bool readonly = false; // flag to indicate if the underlying storage may be written to
 
   buffer_info() {}
 
@@ -31,7 +32,8 @@ struct buffer_info
               const std::string& format,
               ssize_t ndim,
               detail::any_container<ssize_t> shape_in,
-              detail::any_container<ssize_t> strides_in)
+              detail::any_container<ssize_t> strides_in,
+              bool readonly = false)
     : ptr(ptr)
     , itemsize(itemsize)
     , size(1)
@@ -39,6 +41,7 @@ struct buffer_info
     , ndim(ndim)
     , shape(std::move(shape_in))
     , strides(std::move(strides_in))
+    , readonly(readonly)
   {
     if (ndim != (ssize_t)shape.size() || ndim != (ssize_t)strides.size())
       pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length");
@@ -47,23 +50,32 @@ struct buffer_info
   }
 
   template <typename T>
-  buffer_info(T* ptr, detail::any_container<ssize_t> shape_in, detail::any_container<ssize_t> strides_in)
+  buffer_info(T* ptr,
+              detail::any_container<ssize_t> shape_in,
+              detail::any_container<ssize_t> strides_in,
+              bool readonly = false)
     : buffer_info(private_ctr_tag(),
                   ptr,
                   sizeof(T),
                   format_descriptor<T>::format(),
                   static_cast<ssize_t>(shape_in->size()),
                   std::move(shape_in),
-                  std::move(strides_in))
+                  std::move(strides_in),
+                  readonly)
+  {}
+
+  buffer_info(void* ptr, ssize_t itemsize, const std::string& format, ssize_t size, bool readonly = false)
+    : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly)
   {}
 
-  buffer_info(void* ptr, ssize_t itemsize, const std::string& format, ssize_t size)
-    : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize})
+  template <typename T>
+  buffer_info(T* ptr, ssize_t size, bool readonly = false)
+    : buffer_info(ptr, sizeof(T), format_descriptor<T>::format(), size, readonly)
   {}
 
   template <typename T>
-  buffer_info(T* ptr, ssize_t size)
-    : buffer_info(ptr, sizeof(T), format_descriptor<T>::format(), size)
+  buffer_info(const T* ptr, ssize_t size, bool readonly = true)
+    : buffer_info(const_cast<T*>(ptr), sizeof(T), format_descriptor<T>::format(), size, readonly)
   {}
 
   explicit buffer_info(Py_buffer* view, bool ownview = true)
@@ -72,7 +84,8 @@ struct buffer_info
                   view->format,
                   view->ndim,
                   {view->shape, view->shape + view->ndim},
-                  {view->strides, view->strides + view->ndim})
+                  {view->strides, view->strides + view->ndim},
+                  view->readonly)
   {
     this->view = view;
     this->ownview = ownview;
@@ -97,6 +110,7 @@ struct buffer_info
     strides = std::move(rhs.strides);
     std::swap(view, rhs.view);
     std::swap(ownview, rhs.ownview);
+    readonly = rhs.readonly;
     return *this;
   }
 
@@ -118,8 +132,9 @@ private:
               const std::string& format,
               ssize_t ndim,
               detail::any_container<ssize_t>&& shape_in,
-              detail::any_container<ssize_t>&& strides_in)
-    : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in))
+              detail::any_container<ssize_t>&& strides_in,
+              bool readonly)
+    : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly)
   {}
 
   Py_buffer* view = nullptr;
diff --git a/pybind11/include/pybind11/cast.h b/pybind11/include/pybind11/cast.h
index 23e69f1a880c937d95b46cf6e416bf58b032c93d..641ed21068138b04a32b125ad3cb46f859b93a11 100644
--- a/pybind11/include/pybind11/cast.h
+++ b/pybind11/include/pybind11/cast.h
@@ -32,6 +32,10 @@
 #  include <string_view>
 #endif
 
+#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
+#  define PYBIND11_HAS_U8STRING
+#endif
+
 NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 NAMESPACE_BEGIN(detail)
 
@@ -609,9 +613,16 @@ public:
       case return_value_policy::copy:
         if (copy_constructor)
           valueptr = copy_constructor(src);
-        else
-          throw cast_error("return_value_policy = copy, but the "
-                           "object is non-copyable!");
+        else {
+#if defined(NDEBUG)
+          throw cast_error("return_value_policy = copy, but type is "
+                           "non-copyable! (compile in debug mode for details)");
+#else
+          std::string type_name(tinfo->cpptype->name());
+          detail::clean_type_id(type_name);
+          throw cast_error("return_value_policy = copy, but type " + type_name + " is non-copyable!");
+#endif
+        }
         wrapper->owned = true;
         break;
 
@@ -620,9 +631,17 @@ public:
           valueptr = move_constructor(src);
         else if (copy_constructor)
           valueptr = copy_constructor(src);
-        else
-          throw cast_error("return_value_policy = move, but the "
-                           "object is neither movable nor copyable!");
+        else {
+#if defined(NDEBUG)
+          throw cast_error("return_value_policy = move, but type is neither "
+                           "movable nor copyable! "
+                           "(compile in debug mode for details)");
+#else
+          std::string type_name(tinfo->cpptype->name());
+          detail::clean_type_id(type_name);
+          throw cast_error("return_value_policy = move, but type " + type_name + " is neither movable nor copyable!");
+#endif
+        }
         wrapper->owned = true;
         break;
 
@@ -651,9 +670,9 @@ public:
       if (type->operator_new) {
         vptr = type->operator_new(type->type_size);
       } else {
-#if defined(PYBIND11_CPP17)
+#if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912)
         if (type->type_align > __STDCPP_DEFAULT_NEW_ALIGNMENT__)
-          vptr = ::operator new(type->type_size, (std::align_val_t)type->type_align);
+          vptr = ::operator new(type->type_size, std::align_val_t(type->type_align));
         else
 #endif
           vptr = ::operator new(type->type_size);
@@ -862,17 +881,33 @@ template <typename Container>
 struct is_copy_constructible<
     Container,
     enable_if_t<all_of<std::is_copy_constructible<Container>,
-                       std::is_same<typename Container::value_type&, typename Container::reference>>::value>>
+                       std::is_same<typename Container::value_type&, typename Container::reference>,
+                       // Avoid infinite recursion
+                       negation<std::is_same<Container, typename Container::value_type>>>::value>>
   : is_copy_constructible<typename Container::value_type>
 {};
 
-#if !defined(PYBIND11_CPP17)
-// Likewise for std::pair before C++17 (which mandates that the copy constructor not exist when the
-// two types aren't themselves copy constructible).
+// Likewise for std::pair
+// (after C++17 it is mandatory that the copy constructor not exist when the two types aren't themselves
+// copy constructible, but this can not be relied upon when T1 or T2 are themselves containers).
 template <typename T1, typename T2>
 struct is_copy_constructible<std::pair<T1, T2>> : all_of<is_copy_constructible<T1>, is_copy_constructible<T2>>
 {};
-#endif
+
+// The same problems arise with std::is_copy_assignable, so we use the same workaround.
+template <typename T, typename SFINAE = void>
+struct is_copy_assignable : std::is_copy_assignable<T>
+{};
+template <typename Container>
+struct is_copy_assignable<
+    Container,
+    enable_if_t<all_of<std::is_copy_assignable<Container>,
+                       std::is_same<typename Container::value_type&, typename Container::reference>>::value>>
+  : is_copy_assignable<typename Container::value_type>
+{};
+template <typename T1, typename T2>
+struct is_copy_assignable<std::pair<T1, T2>> : all_of<is_copy_assignable<T1>, is_copy_assignable<T2>>
+{};
 
 NAMESPACE_END(detail)
 
@@ -1109,6 +1144,9 @@ public:
 
 template <typename CharT>
 using is_std_char_type = any_of<std::is_same<CharT, char>, /* std::string */
+#if defined(PYBIND11_HAS_U8STRING)
+                                std::is_same<CharT, char8_t>, /* std::u8string */
+#endif
                                 std::is_same<CharT, char16_t>, /* std::u16string */
                                 std::is_same<CharT, char32_t>, /* std::u32string */
                                 std::is_same<CharT, wchar_t> /* std::wstring */
@@ -1144,10 +1182,12 @@ public:
     }
 
     bool py_err = py_value == (py_type)-1 && PyErr_Occurred();
+
+    // Protect std::numeric_limits::min/max with parentheses
     if (py_err
         || (std::is_integral<T>::value && sizeof(py_type) != sizeof(T)
-            && (py_value < (py_type)std::numeric_limits<T>::min()
-                || py_value > (py_type)std::numeric_limits<T>::max()))) {
+            && (py_value < (py_type)(std::numeric_limits<T>::min)()
+                || py_value > (py_type)(std::numeric_limits<T>::max)()))) {
       bool type_error = py_err
                         && PyErr_ExceptionMatches(
 #if PY_VERSION_HEX < 0x03000000 && !defined(PYPY_VERSION)
@@ -1331,6 +1371,8 @@ public:
       if (res == 0 || res == 1) {
         value = (bool)res;
         return true;
+      } else {
+        PyErr_Clear();
       }
     }
     return false;
@@ -1351,6 +1393,9 @@ struct string_caster
   // Simplify life by being able to assume standard char sizes (the standard only guarantees
   // minimums, but Python requires exact sizes)
   static_assert(!std::is_same<CharT, char>::value || sizeof(CharT) == 1, "Unsupported char size != 1");
+#if defined(PYBIND11_HAS_U8STRING)
+  static_assert(!std::is_same<CharT, char8_t>::value || sizeof(CharT) == 1, "Unsupported char8_t size != 1");
+#endif
   static_assert(!std::is_same<CharT, char16_t>::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2");
   static_assert(!std::is_same<CharT, char32_t>::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4");
   // wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
@@ -1370,7 +1415,7 @@ struct string_caster
 #if PY_MAJOR_VERSION >= 3
       return load_bytes(load_src);
 #else
-      if (sizeof(CharT) == 1) {
+      if (std::is_same<CharT, char>::value) {
         return load_bytes(load_src);
       }
 
@@ -1441,7 +1486,7 @@ private:
   // without any encoding/decoding attempt).  For other C++ char sizes this is a no-op.
   // which supports loading a unicode from a str, doesn't take this path.
   template <typename C = CharT>
-  bool load_bytes(enable_if_t<sizeof(C) == 1, handle> src)
+  bool load_bytes(enable_if_t<std::is_same<C, char>::value, handle> src)
   {
     if (PYBIND11_BYTES_CHECK(src.ptr())) {
       // We were passed a Python 3 raw bytes; accept it into a std::string or char*
@@ -1457,7 +1502,7 @@ private:
   }
 
   template <typename C = CharT>
-  bool load_bytes(enable_if_t<sizeof(C) != 1, handle>)
+  bool load_bytes(enable_if_t<!std::is_same<C, char>::value, handle>)
   {
     return false;
   }
@@ -1636,9 +1681,14 @@ protected:
   template <size_t... Is>
   bool load_impl(const sequence& seq, bool convert, index_sequence<Is...>)
   {
+#ifdef __cpp_fold_expressions
+    if ((... || !std::get<Is>(subcasters).load(seq[Is], convert)))
+      return false;
+#else
     for (bool r : {std::get<Is>(subcasters).load(seq[Is], convert)...})
       if (!r)
         return false;
+#endif
     return true;
   }
 
@@ -2351,14 +2401,19 @@ private:
   template <size_t... Is>
   bool load_impl_sequence(function_call& call, index_sequence<Is...>)
   {
+#ifdef __cpp_fold_expressions
+    if ((... || !std::get<Is>(argcasters).load(call.args[Is], call.args_convert[Is])))
+      return false;
+#else
     for (bool r : {std::get<Is>(argcasters).load(call.args[Is], call.args_convert[Is])...})
       if (!r)
         return false;
+#endif
     return true;
   }
 
   template <typename Return, typename Func, size_t... Is, typename Guard>
-  Return call_impl(Func&& f, index_sequence<Is...>, Guard&&)
+  Return call_impl(Func&& f, index_sequence<Is...>, Guard&&) &&
   {
     return std::forward<Func>(f)(cast_op<Args>(std::move(std::get<Is>(argcasters)))...);
   }
diff --git a/pybind11/include/pybind11/chrono.h b/pybind11/include/pybind11/chrono.h
index 81d9b55504f6d4f0f88b954e36f74a48e7f7a717..9744d171cd1f25bb63c983b4faf43a81d15aa458 100644
--- a/pybind11/include/pybind11/chrono.h
+++ b/pybind11/include/pybind11/chrono.h
@@ -124,8 +124,11 @@ public:
 
     if (!src)
       return false;
+
+    std::tm cal;
+    microseconds msecs;
+
     if (PyDateTime_Check(src.ptr())) {
-      std::tm cal;
       cal.tm_sec = PyDateTime_DATE_GET_SECOND(src.ptr());
       cal.tm_min = PyDateTime_DATE_GET_MINUTE(src.ptr());
       cal.tm_hour = PyDateTime_DATE_GET_HOUR(src.ptr());
@@ -133,11 +136,30 @@ public:
       cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1;
       cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900;
       cal.tm_isdst = -1;
-
-      value = system_clock::from_time_t(std::mktime(&cal)) + microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr()));
-      return true;
+      msecs = microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr()));
+    } else if (PyDate_Check(src.ptr())) {
+      cal.tm_sec = 0;
+      cal.tm_min = 0;
+      cal.tm_hour = 0;
+      cal.tm_mday = PyDateTime_GET_DAY(src.ptr());
+      cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1;
+      cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900;
+      cal.tm_isdst = -1;
+      msecs = microseconds(0);
+    } else if (PyTime_Check(src.ptr())) {
+      cal.tm_sec = PyDateTime_TIME_GET_SECOND(src.ptr());
+      cal.tm_min = PyDateTime_TIME_GET_MINUTE(src.ptr());
+      cal.tm_hour = PyDateTime_TIME_GET_HOUR(src.ptr());
+      cal.tm_mday = 1; // This date (day, month, year) = (1, 0, 70)
+      cal.tm_mon = 0; // represents 1-Jan-1970, which is the first
+      cal.tm_year = 70; // earliest available date for Python's datetime
+      cal.tm_isdst = -1;
+      msecs = microseconds(PyDateTime_TIME_GET_MICROSECOND(src.ptr()));
     } else
       return false;
+
+    value = system_clock::from_time_t(std::mktime(&cal)) + msecs;
+    return true;
   }
 
   static handle cast(const std::chrono::time_point<std::chrono::system_clock, Duration>& src,
diff --git a/pybind11/include/pybind11/detail/class.h b/pybind11/include/pybind11/detail/class.h
index c1005d5bce918990a3eda7f1302b2f9c633e908e..49c0921753d9c9bc2c52554b3d6537adaa54055a 100644
--- a/pybind11/include/pybind11/detail/class.h
+++ b/pybind11/include/pybind11/detail/class.h
@@ -373,6 +373,7 @@ extern "C" inline void pybind11_object_dealloc(PyObject* self)
   auto type = Py_TYPE(self);
   type->tp_free(self);
 
+#if PY_VERSION_HEX < 0x03080000
   // `type->tp_dealloc != pybind11_object_dealloc` means that we're being called
   // as part of a derived type's dealloc, in which case we're not allowed to decref
   // the type here. For cross-module compatibility, we shouldn't compare directly
@@ -380,6 +381,11 @@ extern "C" inline void pybind11_object_dealloc(PyObject* self)
   auto pybind11_object_type = (PyTypeObject*)get_internals().instance_base;
   if (type->tp_dealloc == pybind11_object_type->tp_dealloc)
     Py_DECREF(type);
+#else
+  // This was not needed before Python 3.8 (Python issue 35810)
+  // https://github.com/pybind/pybind11/issues/1946
+  Py_DECREF(type);
+#endif
 }
 
 /** Create the type which can be used as a common base for all classes.  This is
@@ -514,6 +520,13 @@ extern "C" inline int pybind11_getbuffer(PyObject* obj, Py_buffer* view, int fla
   view->len = view->itemsize;
   for (auto s : info->shape)
     view->len *= s;
+  view->readonly = info->readonly;
+  if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE && info->readonly) {
+    if (view)
+      view->obj = nullptr;
+    PyErr_SetString(PyExc_BufferError, "Writable buffer requested for readonly storage");
+    return -1;
+  }
   if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
     view->format = const_cast<char*>(info->format.c_str());
   if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) {
@@ -616,6 +629,9 @@ inline PyObject* make_new_python_type(const type_record& rec)
   type->tp_as_number = &heap_type->as_number;
   type->tp_as_sequence = &heap_type->as_sequence;
   type->tp_as_mapping = &heap_type->as_mapping;
+#if PY_VERSION_HEX >= 0x03050000
+  type->tp_as_async = &heap_type->as_async;
+#endif
 
   /* Flags */
   type->tp_flags |= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE;
diff --git a/pybind11/include/pybind11/detail/common.h b/pybind11/include/pybind11/detail/common.h
index bf15f79e0e9b65844985a9ed03971da17f2ac79a..5a4c8514ef0eb42ace0b1ce8ef2559f8705123d0 100644
--- a/pybind11/include/pybind11/detail/common.h
+++ b/pybind11/include/pybind11/detail/common.h
@@ -93,8 +93,8 @@
 #endif
 
 #define PYBIND11_VERSION_MAJOR 2
-#define PYBIND11_VERSION_MINOR 3
-#define PYBIND11_VERSION_PATCH dev1
+#define PYBIND11_VERSION_MINOR 5
+#define PYBIND11_VERSION_PATCH 0
 
 /// Include Python header, disable linking to pythonX_d.lib on Windows in debug mode
 #if defined(_MSC_VER)
@@ -103,7 +103,7 @@
 #  endif
 #  pragma warning(push)
 #  pragma warning(disable : 4510 4610 4512 4005)
-#  if defined(_DEBUG)
+#  if defined(_DEBUG) && !defined(Py_DEBUG)
 #    define PYBIND11_DEBUG_MARKER
 #    undef _DEBUG
 #  endif
@@ -113,10 +113,9 @@
 #include <frameobject.h>
 #include <pythread.h>
 
-#if defined(_WIN32) && (defined(min) || defined(max))
-#  error Macro clash with min and max -- define NOMINMAX when compiling your program on Windows
-#endif
-
+/* Python #defines overrides on all sorts of core functions, which
+   tends to weak havok in C++ codebases that expect these to work
+   like regular functions (potentially with several overloads) */
 #if defined(isalnum)
 #  undef isalnum
 #  undef isalpha
@@ -127,6 +126,10 @@
 #  undef toupper
 #endif
 
+#if defined(copysign)
+#  undef copysign
+#endif
+
 #if defined(_MSC_VER)
 #  if defined(PYBIND11_DEBUG_MARKER)
 #    define _DEBUG
@@ -168,7 +171,10 @@
 #  define PYBIND11_STR_TYPE ::pybind11::str
 #  define PYBIND11_BOOL_ATTR "__bool__"
 #  define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_bool)
-#  define PYBIND11_PLUGIN_IMPL(name) extern "C" PYBIND11_EXPORT PyObject* PyInit_##name()
+// Providing a separate declaration to make Clang's -Wmissing-prototypes happy
+#  define PYBIND11_PLUGIN_IMPL(name)                                                                                   \
+    extern "C" PYBIND11_EXPORT PyObject* PyInit_##name();                                                              \
+    extern "C" PYBIND11_EXPORT PyObject* PyInit_##name()
 
 #else
 #  define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyMethod_New(ptr, nullptr, class_)
@@ -191,8 +197,10 @@
 #  define PYBIND11_STR_TYPE ::pybind11::bytes
 #  define PYBIND11_BOOL_ATTR "__nonzero__"
 #  define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_nonzero)
+// Providing a separate PyInit decl to make Clang's -Wmissing-prototypes happy
 #  define PYBIND11_PLUGIN_IMPL(name)                                                                                   \
     static PyObject* pybind11_init_wrapper();                                                                          \
+    extern "C" PYBIND11_EXPORT void init##name();                                                                      \
     extern "C" PYBIND11_EXPORT void init##name()                                                                       \
     {                                                                                                                  \
       (void)pybind11_init_wrapper();                                                                                   \
@@ -215,6 +223,7 @@ extern "C"
 #define PYBIND11_STRINGIFY(x) #x
 #define PYBIND11_TOSTRING(x) PYBIND11_STRINGIFY(x)
 #define PYBIND11_CONCAT(first, second) first##second
+#define PYBIND11_ENSURE_INTERNALS_READY pybind11::detail::get_internals();
 
 #define PYBIND11_CHECK_PYTHON_VERSION                                                                                  \
   {                                                                                                                    \
@@ -264,6 +273,7 @@ extern "C"
   PYBIND11_PLUGIN_IMPL(name)                                                                                           \
   {                                                                                                                    \
     PYBIND11_CHECK_PYTHON_VERSION                                                                                      \
+    PYBIND11_ENSURE_INTERNALS_READY                                                                                    \
     try {                                                                                                              \
       return pybind11_init();                                                                                          \
     }                                                                                                                  \
@@ -293,6 +303,7 @@ extern "C"
   PYBIND11_PLUGIN_IMPL(name)                                                                                           \
   {                                                                                                                    \
     PYBIND11_CHECK_PYTHON_VERSION                                                                                      \
+    PYBIND11_ENSURE_INTERNALS_READY                                                                                    \
     auto m = pybind11::module(PYBIND11_TOSTRING(name));                                                                \
     try {                                                                                                              \
       PYBIND11_CONCAT(pybind11_init_, name)(m);                                                                        \
@@ -839,6 +850,8 @@ PYBIND11_RUNTIME_EXCEPTION(index_error, PyExc_IndexError)
 PYBIND11_RUNTIME_EXCEPTION(key_error, PyExc_KeyError)
 PYBIND11_RUNTIME_EXCEPTION(value_error, PyExc_ValueError)
 PYBIND11_RUNTIME_EXCEPTION(type_error, PyExc_TypeError)
+PYBIND11_RUNTIME_EXCEPTION(buffer_error, PyExc_BufferError)
+PYBIND11_RUNTIME_EXCEPTION(import_error, PyExc_ImportError)
 PYBIND11_RUNTIME_EXCEPTION(cast_error, PyExc_RuntimeError) /// Thrown when pybind11::cast or handle::call fail due to a
                                                            /// type casting error
 PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used internally
@@ -921,10 +934,6 @@ struct nodelete
   {}
 };
 
-// overload_cast requires variable templates: C++14
-#if defined(PYBIND11_CPP14)
-#  define PYBIND11_OVERLOAD_CAST 1
-
 NAMESPACE_BEGIN(detail)
 template <typename... Args>
 struct overload_cast_impl
@@ -951,19 +960,23 @@ struct overload_cast_impl
 };
 NAMESPACE_END(detail)
 
+// overload_cast requires variable templates: C++14
+#if defined(PYBIND11_CPP14)
+#  define PYBIND11_OVERLOAD_CAST 1
 /// Syntax sugar for resolving overloaded function pointers:
 ///  - regular: static_cast<Return (Class::*)(Arg0, Arg1, Arg2)>(&Class::func)
 ///  - sweet:   overload_cast<Arg0, Arg1, Arg2>(&Class::func)
 template <typename... Args>
 static constexpr detail::overload_cast_impl<Args...> overload_cast = {};
 // MSVC 2015 only accepts this particular initialization syntax for this variable template.
+#endif
 
 /// Const member function selector for overload_cast
 ///  - regular: static_cast<Return (Class::*)(Arg) const>(&Class::func)
 ///  - sweet:   overload_cast<Arg>(&Class::func, const_)
 static constexpr auto const_ = std::true_type{};
 
-#else // no overload_cast: providing something that static_assert-fails:
+#if !defined(PYBIND11_CPP14) // no overload_cast: providing something that static_assert-fails:
 template <typename... Args>
 struct overload_cast
 {
diff --git a/pybind11/include/pybind11/detail/internals.h b/pybind11/include/pybind11/detail/internals.h
index 808ac979b9bcafcc80d1aa6e7c7ab425e308cc4b..3712da057723bb07358234f5a5e2b8f0a3404d09 100644
--- a/pybind11/include/pybind11/detail/internals.h
+++ b/pybind11/include/pybind11/detail/internals.h
@@ -25,6 +25,7 @@ inline PyObject* make_object_base_type(PyTypeObject* metaclass);
 #  define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key))
 #  define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value))
 #  define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr)
+#  define PYBIND11_TLS_FREE(key) PyThread_tss_free(key)
 #else
 // Usually an int but a long on Cygwin64 with Python 3.x
 #  define PYBIND11_TLS_KEY_INIT(var) decltype(PyThread_create_key()) var = 0
@@ -40,6 +41,7 @@ inline PyObject* make_object_base_type(PyTypeObject* metaclass);
 #    define PYBIND11_TLS_DELETE_VALUE(key) PyThread_set_key_value((key), nullptr)
 #    define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_set_key_value((key), (value))
 #  endif
+#  define PYBIND11_TLS_FREE(key) (void)key
 #endif
 
 // Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly
@@ -116,6 +118,17 @@ struct internals
 #if defined(WITH_THREAD)
   PYBIND11_TLS_KEY_INIT(tstate);
   PyInterpreterState* istate = nullptr;
+  ~internals()
+  {
+    // This destructor is called *after* Py_Finalize() in finalize_interpreter().
+    // That *SHOULD BE* fine. The following details what happens whe PyThread_tss_free is called.
+    // PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does nothing.
+    // PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
+    // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). Neither
+    // of those have anything to do with CPython internals.
+    // PyMem_RawFree *requires* that the `tstate` be allocated with the CPython allocator.
+    PYBIND11_TLS_FREE(tstate);
+  }
 #endif
 };
 
@@ -147,14 +160,49 @@ struct type_info
 };
 
 /// Tracks the `internals` and `type_info` ABI version independent of the main library version
-#define PYBIND11_INTERNALS_VERSION 3
+#define PYBIND11_INTERNALS_VERSION 4
 
-#if defined(_DEBUG)
+/// On MSVC, debug and release builds are not ABI-compatible!
+#if defined(_MSC_VER) && defined(_DEBUG)
 #  define PYBIND11_BUILD_TYPE "_debug"
 #else
 #  define PYBIND11_BUILD_TYPE ""
 #endif
 
+/// Let's assume that different compilers are ABI-incompatible.
+#if defined(_MSC_VER)
+#  define PYBIND11_COMPILER_TYPE "_msvc"
+#elif defined(__INTEL_COMPILER)
+#  define PYBIND11_COMPILER_TYPE "_icc"
+#elif defined(__clang__)
+#  define PYBIND11_COMPILER_TYPE "_clang"
+#elif defined(__PGI)
+#  define PYBIND11_COMPILER_TYPE "_pgi"
+#elif defined(__MINGW32__)
+#  define PYBIND11_COMPILER_TYPE "_mingw"
+#elif defined(__CYGWIN__)
+#  define PYBIND11_COMPILER_TYPE "_gcc_cygwin"
+#elif defined(__GNUC__)
+#  define PYBIND11_COMPILER_TYPE "_gcc"
+#else
+#  define PYBIND11_COMPILER_TYPE "_unknown"
+#endif
+
+#if defined(_LIBCPP_VERSION)
+#  define PYBIND11_STDLIB "_libcpp"
+#elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
+#  define PYBIND11_STDLIB "_libstdcpp"
+#else
+#  define PYBIND11_STDLIB ""
+#endif
+
+/// On Linux/OSX, changes in __GXX_ABI_VERSION__ indicate ABI incompatibility.
+#if defined(__GXX_ABI_VERSION)
+#  define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION)
+#else
+#  define PYBIND11_BUILD_ABI ""
+#endif
+
 #if defined(WITH_THREAD)
 #  define PYBIND11_INTERNALS_KIND ""
 #else
@@ -162,12 +210,12 @@ struct type_info
 #endif
 
 #define PYBIND11_INTERNALS_ID                                                                                          \
-  "__pybind11_internals_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_BUILD_TYPE   \
-      "__"
+  "__pybind11_internals_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION)                                               \
+      PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__"
 
 #define PYBIND11_MODULE_LOCAL_ID                                                                                       \
   "__pybind11_module_local_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION)                                            \
-      PYBIND11_INTERNALS_KIND PYBIND11_BUILD_TYPE "__"
+      PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__"
 
 /// Each module locally stores a pointer to the `internals` data. The data
 /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`.
@@ -177,6 +225,63 @@ inline internals**& get_internals_pp()
   return internals_pp;
 }
 
+inline void translate_exception(std::exception_ptr p)
+{
+  try {
+    if (p)
+      std::rethrow_exception(p);
+  } catch (error_already_set& e) {
+    e.restore();
+    return;
+  } catch (const builtin_exception& e) {
+    e.set_error();
+    return;
+  } catch (const std::bad_alloc& e) {
+    PyErr_SetString(PyExc_MemoryError, e.what());
+    return;
+  } catch (const std::domain_error& e) {
+    PyErr_SetString(PyExc_ValueError, e.what());
+    return;
+  } catch (const std::invalid_argument& e) {
+    PyErr_SetString(PyExc_ValueError, e.what());
+    return;
+  } catch (const std::length_error& e) {
+    PyErr_SetString(PyExc_ValueError, e.what());
+    return;
+  } catch (const std::out_of_range& e) {
+    PyErr_SetString(PyExc_IndexError, e.what());
+    return;
+  } catch (const std::range_error& e) {
+    PyErr_SetString(PyExc_ValueError, e.what());
+    return;
+  } catch (const std::overflow_error& e) {
+    PyErr_SetString(PyExc_OverflowError, e.what());
+    return;
+  } catch (const std::exception& e) {
+    PyErr_SetString(PyExc_RuntimeError, e.what());
+    return;
+  } catch (...) {
+    PyErr_SetString(PyExc_RuntimeError, "Caught an unknown exception!");
+    return;
+  }
+}
+
+#if !defined(__GLIBCXX__)
+inline void translate_local_exception(std::exception_ptr p)
+{
+  try {
+    if (p)
+      std::rethrow_exception(p);
+  } catch (error_already_set& e) {
+    e.restore();
+    return;
+  } catch (const builtin_exception& e) {
+    e.set_error();
+    return;
+  }
+}
+#endif
+
 /// Return a reference to the current `internals` data
 PYBIND11_NOINLINE inline internals& get_internals()
 {
@@ -184,6 +289,20 @@ PYBIND11_NOINLINE inline internals& get_internals()
   if (internals_pp && *internals_pp)
     return **internals_pp;
 
+  // Ensure that the GIL is held since we will need to make Python calls.
+  // Cannot use py::gil_scoped_acquire here since that constructor calls get_internals.
+  struct gil_scoped_acquire_local
+  {
+    gil_scoped_acquire_local()
+      : state(PyGILState_Ensure())
+    {}
+    ~gil_scoped_acquire_local()
+    {
+      PyGILState_Release(state);
+    }
+    const PyGILState_STATE state;
+  } gil;
+
   constexpr auto* id = PYBIND11_INTERNALS_ID;
   auto builtins = handle(PyEval_GetBuiltins());
   if (builtins.contains(id) && isinstance<capsule>(builtins[id])) {
@@ -195,18 +314,7 @@ PYBIND11_NOINLINE inline internals& get_internals()
     //
     // libstdc++ doesn't require this (types there are identified only by name)
 #if !defined(__GLIBCXX__)
-    (*internals_pp)->registered_exception_translators.push_front([](std::exception_ptr p) -> void {
-      try {
-        if (p)
-          std::rethrow_exception(p);
-      } catch (error_already_set& e) {
-        e.restore();
-        return;
-      } catch (const builtin_exception& e) {
-        e.set_error();
-        return;
-      }
-    });
+    (*internals_pp)->registered_exception_translators.push_front(&translate_local_exception);
 #endif
   } else {
     if (!internals_pp)
@@ -230,42 +338,7 @@ PYBIND11_NOINLINE inline internals& get_internals()
     internals_ptr->istate = tstate->interp;
 #endif
     builtins[id] = capsule(internals_pp);
-    internals_ptr->registered_exception_translators.push_front([](std::exception_ptr p) -> void {
-      try {
-        if (p)
-          std::rethrow_exception(p);
-      } catch (error_already_set& e) {
-        e.restore();
-        return;
-      } catch (const builtin_exception& e) {
-        e.set_error();
-        return;
-      } catch (const std::bad_alloc& e) {
-        PyErr_SetString(PyExc_MemoryError, e.what());
-        return;
-      } catch (const std::domain_error& e) {
-        PyErr_SetString(PyExc_ValueError, e.what());
-        return;
-      } catch (const std::invalid_argument& e) {
-        PyErr_SetString(PyExc_ValueError, e.what());
-        return;
-      } catch (const std::length_error& e) {
-        PyErr_SetString(PyExc_ValueError, e.what());
-        return;
-      } catch (const std::out_of_range& e) {
-        PyErr_SetString(PyExc_IndexError, e.what());
-        return;
-      } catch (const std::range_error& e) {
-        PyErr_SetString(PyExc_ValueError, e.what());
-        return;
-      } catch (const std::exception& e) {
-        PyErr_SetString(PyExc_RuntimeError, e.what());
-        return;
-      } catch (...) {
-        PyErr_SetString(PyExc_RuntimeError, "Caught an unknown exception!");
-        return;
-      }
-    });
+    internals_ptr->registered_exception_translators.push_front(&translate_exception);
     internals_ptr->static_property_type = make_static_property_type();
     internals_ptr->default_metaclass = make_default_metaclass();
     internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass);
diff --git a/pybind11/include/pybind11/embed.h b/pybind11/include/pybind11/embed.h
index 5e47f76dea252abd24436ef1a40695c7cc0f4c94..5a84e0da213d443d4d62a758775c3b7c119415d8 100644
--- a/pybind11/include/pybind11/embed.h
+++ b/pybind11/include/pybind11/embed.h
@@ -18,12 +18,14 @@
 
 #if PY_MAJOR_VERSION >= 3
 #  define PYBIND11_EMBEDDED_MODULE_IMPL(name)                                                                          \
+    extern "C" PyObject* pybind11_init_impl_##name();                                                                  \
     extern "C" PyObject* pybind11_init_impl_##name()                                                                   \
     {                                                                                                                  \
       return pybind11_init_wrapper_##name();                                                                           \
     }
 #else
 #  define PYBIND11_EMBEDDED_MODULE_IMPL(name)                                                                          \
+    extern "C" void pybind11_init_impl_##name();                                                                       \
     extern "C" void pybind11_init_impl_##name()                                                                        \
     {                                                                                                                  \
       pybind11_init_wrapper_##name();                                                                                  \
diff --git a/pybind11/include/pybind11/functional.h b/pybind11/include/pybind11/functional.h
index d2a61cf67f1353fa5e8678159c6fd076211df69d..54e8199e5938882d089d3eda41ce2b5c2b00af6a 100644
--- a/pybind11/include/pybind11/functional.h
+++ b/pybind11/include/pybind11/functional.h
@@ -75,12 +75,23 @@ public:
       }
     };
 
-    value = [hfunc = func_handle(std::move(func))](Args... args) -> Return {
-      gil_scoped_acquire acq;
-      object retval(hfunc.f(std::forward<Args>(args)...));
-      /* Visual studio 2015 parser issue: need parentheses around this expression */
-      return (retval.template cast<Return>());
+    // to emulate 'move initialization capture' in C++11
+    struct func_wrapper
+    {
+      func_handle hfunc;
+      func_wrapper(func_handle&& hf)
+        : hfunc(std::move(hf))
+      {}
+      Return operator()(Args... args) const
+      {
+        gil_scoped_acquire acq;
+        object retval(hfunc.f(std::forward<Args>(args)...));
+        /* Visual studio 2015 parser issue: need parentheses around this expression */
+        return (retval.template cast<Return>());
+      }
     };
+
+    value = func_wrapper(func_handle(std::move(func)));
     return true;
   }
 
diff --git a/pybind11/include/pybind11/iostream.h b/pybind11/include/pybind11/iostream.h
index 3a975a61bbe8d2a274308c0d8ec64873af775c7d..f393629b912633bc434e1fc51965068f5514452e 100644
--- a/pybind11/include/pybind11/iostream.h
+++ b/pybind11/include/pybind11/iostream.h
@@ -67,6 +67,8 @@ public:
     setp(d_buffer.get(), d_buffer.get() + buf_size - 1);
   }
 
+  pythonbuf(pythonbuf&&) = default;
+
   /// Sync before destroy
   ~pythonbuf()
   {
diff --git a/pybind11/include/pybind11/numpy.h b/pybind11/include/pybind11/numpy.h
index 22adb739c699f0bf6f368888f1e2a8afcd899eea..284abbbbe169dc2fc881b2dcf3956f520aa8a082 100644
--- a/pybind11/include/pybind11/numpy.h
+++ b/pybind11/include/pybind11/numpy.h
@@ -14,6 +14,7 @@
 #include <numeric>
 #include <algorithm>
 #include <array>
+#include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <sstream>
@@ -116,6 +117,26 @@ inline numpy_internals& get_numpy_internals()
   return *ptr;
 }
 
+template <typename T>
+struct same_size
+{
+  template <typename U>
+  using as = bool_constant<sizeof(T) == sizeof(U)>;
+};
+
+template <typename Concrete>
+constexpr int platform_lookup()
+{
+  return -1;
+}
+
+// Lookup a type according to its size, and return a value corresponding to the NumPy typenum.
+template <typename Concrete, typename T, typename... Ts, typename... Ints>
+constexpr int platform_lookup(int I, Ints... Is)
+{
+  return sizeof(Concrete) == sizeof(T) ? I : platform_lookup<Concrete, Ts...>(Is...);
+}
+
 struct npy_api
 {
   enum constants
@@ -147,7 +168,21 @@ struct npy_api
     NPY_OBJECT_ = 17,
     NPY_STRING_,
     NPY_UNICODE_,
-    NPY_VOID_
+    NPY_VOID_,
+    // Platform-dependent normalization
+    NPY_INT8_ = NPY_BYTE_,
+    NPY_UINT8_ = NPY_UBYTE_,
+    NPY_INT16_ = NPY_SHORT_,
+    NPY_UINT16_ = NPY_USHORT_,
+    // `npy_common.h` defines the integer aliases. In order, it checks:
+    // NPY_BITSOF_LONG, NPY_BITSOF_LONGLONG, NPY_BITSOF_INT, NPY_BITSOF_SHORT, NPY_BITSOF_CHAR
+    // and assigns the alias to the first matching size, so we should check in this order.
+    NPY_INT32_ = platform_lookup<std::int32_t, long, int, short>(NPY_LONG_, NPY_INT_, NPY_SHORT_),
+    NPY_UINT32_ =
+        platform_lookup<std::uint32_t, unsigned long, unsigned int, unsigned short>(NPY_ULONG_, NPY_UINT_, NPY_USHORT_),
+    NPY_INT64_ = platform_lookup<std::int64_t, long, long long, int>(NPY_LONG_, NPY_LONGLONG_, NPY_INT_),
+    NPY_UINT64_ = platform_lookup<std::uint64_t, unsigned long, unsigned long long, unsigned int>(
+        NPY_ULONG_, NPY_ULONGLONG_, NPY_UINT_),
   };
 
   typedef struct
@@ -1240,12 +1275,12 @@ private:
   constexpr static const int values[15] = {npy_api::NPY_BOOL_,
                                            npy_api::NPY_BYTE_,
                                            npy_api::NPY_UBYTE_,
-                                           npy_api::NPY_SHORT_,
-                                           npy_api::NPY_USHORT_,
-                                           npy_api::NPY_INT_,
-                                           npy_api::NPY_UINT_,
-                                           npy_api::NPY_LONGLONG_,
-                                           npy_api::NPY_ULONGLONG_,
+                                           npy_api::NPY_INT16_,
+                                           npy_api::NPY_UINT16_,
+                                           npy_api::NPY_INT32_,
+                                           npy_api::NPY_UINT32_,
+                                           npy_api::NPY_INT64_,
+                                           npy_api::NPY_UINT64_,
                                            npy_api::NPY_FLOAT_,
                                            npy_api::NPY_DOUBLE_,
                                            npy_api::NPY_LONGDOUBLE_,
@@ -1259,7 +1294,7 @@ public:
   static pybind11::dtype dtype()
   {
     if (auto ptr = npy_api::get().PyArray_DescrFromType_(value))
-      return reinterpret_borrow<pybind11::dtype>(ptr);
+      return reinterpret_steal<pybind11::dtype>(ptr);
     pybind11_fail("Unsupported buffer format!");
   }
 };
@@ -1333,8 +1368,15 @@ inline PYBIND11_NOINLINE void register_structured_dtype(any_container<field_desc
   if (numpy_internals.get_type_info(tinfo, false))
     pybind11_fail("NumPy: dtype is already registered");
 
+  // Use ordered fields because order matters as of NumPy 1.14:
+  // https://docs.scipy.org/doc/numpy/release.html#multiple-field-indexing-assignment-of-structured-arrays
+  std::vector<field_descriptor> ordered_fields(std::move(fields));
+  std::sort(ordered_fields.begin(), ordered_fields.end(), [](const field_descriptor& a, const field_descriptor& b) {
+    return a.offset < b.offset;
+  });
+
   list names, formats, offsets;
-  for (auto field : *fields) {
+  for (auto& field : ordered_fields) {
     if (!field.descr)
       pybind11_fail(std::string("NumPy: unsupported field dtype: `") + field.name + "` @ " + tinfo.name());
     names.append(PYBIND11_STR_TYPE(field.name));
@@ -1350,10 +1392,6 @@ inline PYBIND11_NOINLINE void register_structured_dtype(any_container<field_desc
   // - https://github.com/numpy/numpy/pull/7798
   // Because of this, we won't use numpy's logic to generate buffer format
   // strings and will just do it ourselves.
-  std::vector<field_descriptor> ordered_fields(std::move(fields));
-  std::sort(ordered_fields.begin(), ordered_fields.end(), [](const field_descriptor& a, const field_descriptor& b) {
-    return a.offset < b.offset;
-  });
   ssize_t offset = 0;
   std::ostringstream oss;
   // mark the structure as unaligned with '^', because numpy and C++ don't
diff --git a/pybind11/include/pybind11/pybind11.h b/pybind11/include/pybind11/pybind11.h
index c66456e3668ca158429791f813acfdf6863cc438..820490c8274edc7b82c031b42b7dde4a3b2eabbc 100644
--- a/pybind11/include/pybind11/pybind11.h
+++ b/pybind11/include/pybind11/pybind11.h
@@ -520,7 +520,7 @@ protected:
 
         function_call call(func, parent);
 
-        size_t args_to_copy = std::min(pos_args, n_args_in);
+        size_t args_to_copy = (std::min)(pos_args, n_args_in); // Protect std::min with parentheses
         size_t args_copied = 0;
 
         // 0. Inject new-style `self` argument
@@ -1060,11 +1060,18 @@ inline void call_operator_delete(void* p, size_t s, size_t a)
 {
   (void)s;
   (void)a;
-#if defined(PYBIND11_CPP17)
-  if (a > __STDCPP_DEFAULT_NEW_ALIGNMENT__)
+#if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912)
+  if (a > __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
+#  ifdef __cpp_sized_deallocation
     ::operator delete(p, s, std::align_val_t(a));
-  else
-    ::operator delete(p, s);
+#  else
+    ::operator delete(p, std::align_val_t(a));
+#  endif
+    return;
+  }
+#endif
+#ifdef __cpp_sized_deallocation
+  ::operator delete(p, s);
 #else
   ::operator delete(p);
 #endif
@@ -1601,9 +1608,17 @@ struct enum_base
       },                                                                                                               \
       is_method(m_base))
 
+#define PYBIND11_ENUM_OP_CONV_LHS(op, expr)                                                                            \
+  m_base.attr(op) = cpp_function(                                                                                      \
+      [](object a_, object b) {                                                                                        \
+        int_ a(a_);                                                                                                    \
+        return expr;                                                                                                   \
+      },                                                                                                               \
+      is_method(m_base))
+
     if (is_convertible) {
-      PYBIND11_ENUM_OP_CONV("__eq__", !b.is_none() && a.equal(b));
-      PYBIND11_ENUM_OP_CONV("__ne__", b.is_none() || !a.equal(b));
+      PYBIND11_ENUM_OP_CONV_LHS("__eq__", !b.is_none() && a.equal(b));
+      PYBIND11_ENUM_OP_CONV_LHS("__ne__", b.is_none() || !a.equal(b));
 
       if (is_arithmetic) {
         PYBIND11_ENUM_OP_CONV("__lt__", a < b);
@@ -1616,6 +1631,7 @@ struct enum_base
         PYBIND11_ENUM_OP_CONV("__ror__", a | b);
         PYBIND11_ENUM_OP_CONV("__xor__", a ^ b);
         PYBIND11_ENUM_OP_CONV("__rxor__", a ^ b);
+        m_base.attr("__invert__") = cpp_function([](object arg) { return ~(int_(arg)); }, is_method(m_base));
       }
     } else {
       PYBIND11_ENUM_OP_STRICT("__eq__", int_(a).equal(int_(b)), return false);
@@ -1631,6 +1647,7 @@ struct enum_base
       }
     }
 
+#undef PYBIND11_ENUM_OP_CONV_LHS
 #undef PYBIND11_ENUM_OP_CONV
 #undef PYBIND11_ENUM_OP_STRICT
 
@@ -1692,6 +1709,10 @@ public:
 #if PY_MAJOR_VERSION < 3
     def("__long__", [](Type value) { return (Scalar)value; });
 #endif
+#if PY_MAJOR_VERSION > 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8)
+    def("__index__", [](Type value) { return (Scalar)value; });
+#endif
+
     cpp_function setstate([](Type& value, Scalar arg) { value = static_cast<Type>(arg); }, is_method(*this));
     attr("__setstate__") = setstate;
   }
@@ -2204,8 +2225,8 @@ class gil_scoped_release
 error_already_set::~error_already_set()
 {
   if (m_type) {
-    error_scope scope;
     gil_scoped_acquire gil;
+    error_scope scope;
     m_type.release().dec_ref();
     m_value.release().dec_ref();
     m_trace.release().dec_ref();
diff --git a/pybind11/include/pybind11/pytypes.h b/pybind11/include/pybind11/pytypes.h
index f90bc11d42178be0f38de54562c25334efcbe82f..9d52eff494a360459af077bd9d80abd8c38e81a6 100644
--- a/pybind11/include/pybind11/pytypes.h
+++ b/pybind11/include/pybind11/pytypes.h
@@ -1732,6 +1732,10 @@ public:
   {
     return (size_t)PyTuple_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   detail::tuple_accessor operator[](size_t index) const
   {
     return {*this, index};
@@ -1772,6 +1776,10 @@ public:
   {
     return (size_t)PyDict_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   detail::dict_iterator begin() const
   {
     return {*this, 0};
@@ -1784,13 +1792,10 @@ public:
   {
     PyDict_Clear(ptr());
   }
-  bool contains(handle key) const
-  {
-    return PyDict_Contains(ptr(), key.ptr()) == 1;
-  }
-  bool contains(const char* key) const
+  template <typename T>
+  bool contains(T&& key) const
   {
-    return PyDict_Contains(ptr(), pybind11::str(key).ptr()) == 1;
+    return PyDict_Contains(m_ptr, detail::object_or_cast(std::forward<T>(key)).ptr()) == 1;
   }
 
 private:
@@ -1811,6 +1816,10 @@ public:
   {
     return (size_t)PySequence_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   detail::sequence_accessor operator[](size_t index) const
   {
     return {*this, index};
@@ -1843,6 +1852,10 @@ public:
   {
     return (size_t)PyList_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   detail::list_accessor operator[](size_t index) const
   {
     return {*this, index};
@@ -1864,6 +1877,11 @@ public:
   {
     PyList_Append(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr());
   }
+  template <typename T>
+  void insert(size_t index, T&& val) const
+  {
+    PyList_Insert(m_ptr, static_cast<ssize_t>(index), detail::object_or_cast(std::forward<T>(val)).ptr());
+  }
 };
 
 class args : public tuple
@@ -1889,6 +1907,10 @@ public:
   {
     return (size_t)PySet_Size(m_ptr);
   }
+  bool empty() const
+  {
+    return size() == 0;
+  }
   template <typename T>
   bool add(T&& val) const
   {
@@ -1898,6 +1920,11 @@ public:
   {
     PySet_Clear(m_ptr);
   }
+  template <typename T>
+  bool contains(T&& val) const
+  {
+    return PySet_Contains(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr()) == 1;
+  }
 };
 
 class function : public object
@@ -1928,7 +1955,7 @@ class buffer : public object
 public:
   PYBIND11_OBJECT_DEFAULT(buffer, object, PyObject_CheckBuffer)
 
-  buffer_info request(bool writable = false)
+  buffer_info request(bool writable = false) const
   {
     int flags = PyBUF_STRIDES | PyBUF_FORMAT;
     if (writable)
@@ -1965,7 +1992,7 @@ public:
     buf.strides = py_strides.data();
     buf.shape = py_shape.data();
     buf.suboffsets = nullptr;
-    buf.readonly = false;
+    buf.readonly = info.readonly;
     buf.internal = nullptr;
 
     m_ptr = PyMemoryView_FromBuffer(&buf);
diff --git a/pybind11/include/pybind11/stl_bind.h b/pybind11/include/pybind11/stl_bind.h
index 84f5bb3976d1f3c6f37fed7b50952403f360d2f7..f200de62e6bafcee1de7eee761a162b1d220085f 100644
--- a/pybind11/include/pybind11/stl_bind.h
+++ b/pybind11/include/pybind11/stl_bind.h
@@ -132,6 +132,14 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
   using SizeType = typename Vector::size_type;
   using DiffType = typename Vector::difference_type;
 
+  auto wrap_i = [](DiffType i, SizeType n) {
+    if (i < 0)
+      i += n;
+    if (i < 0 || (SizeType)i >= n)
+      throw index_error();
+    return i;
+  };
+
   cl.def(
       "append", [](Vector& v, const T& value) { v.push_back(value); }, arg("x"), "Add an item to the end of the list");
 
@@ -143,6 +151,9 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
     return v.release();
   }));
 
+  cl.def(
+      "clear", [](Vector& v) { v.clear(); }, "Clear the contents");
+
   cl.def(
       "extend",
       [](Vector& v, const Vector& src) { v.insert(v.end(), src.begin(), src.end()); },
@@ -173,10 +184,13 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
 
   cl.def(
       "insert",
-      [](Vector& v, SizeType i, const T& x) {
-        if (i > v.size())
+      [](Vector& v, DiffType i, const T& x) {
+        // Can't use wrap_i; i == v.size() is OK
+        if (i < 0)
+          i += v.size();
+        if (i < 0 || (SizeType)i > v.size())
           throw index_error();
-        v.insert(v.begin() + (DiffType)i, x);
+        v.insert(v.begin() + i, x);
       },
       arg("i"),
       arg("x"),
@@ -195,20 +209,18 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
 
   cl.def(
       "pop",
-      [](Vector& v, SizeType i) {
-        if (i >= v.size())
-          throw index_error();
-        T t = v[i];
-        v.erase(v.begin() + (DiffType)i);
+      [wrap_i](Vector& v, DiffType i) {
+        i = wrap_i(i, v.size());
+        T t = v[(SizeType)i];
+        v.erase(v.begin() + i);
         return t;
       },
       arg("i"),
       "Remove and return the item at index ``i``");
 
-  cl.def("__setitem__", [](Vector& v, SizeType i, const T& t) {
-    if (i >= v.size())
-      throw index_error();
-    v[i] = t;
+  cl.def("__setitem__", [wrap_i](Vector& v, DiffType i, const T& t) {
+    i = wrap_i(i, v.size());
+    v[(SizeType)i] = t;
   });
 
   /// Slicing protocol
@@ -251,10 +263,9 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
 
   cl.def(
       "__delitem__",
-      [](Vector& v, SizeType i) {
-        if (i >= v.size())
-          throw index_error();
-        v.erase(v.begin() + DiffType(i));
+      [wrap_i](Vector& v, DiffType i) {
+        i = wrap_i(i, v.size());
+        v.erase(v.begin() + i);
       },
       "Delete the list elements at index ``i``");
 
@@ -290,14 +301,22 @@ void vector_accessor(enable_if_t<!vector_needs_copy<Vector>::value, Class_>& cl)
 {
   using T = typename Vector::value_type;
   using SizeType = typename Vector::size_type;
+  using DiffType = typename Vector::difference_type;
   using ItType = typename Vector::iterator;
 
+  auto wrap_i = [](DiffType i, SizeType n) {
+    if (i < 0)
+      i += n;
+    if (i < 0 || (SizeType)i >= n)
+      throw index_error();
+    return i;
+  };
+
   cl.def(
       "__getitem__",
-      [](Vector& v, SizeType i) -> T& {
-        if (i >= v.size())
-          throw index_error();
-        return v[i];
+      [wrap_i](Vector& v, DiffType i) -> T& {
+        i = wrap_i(i, v.size());
+        return v[(SizeType)i];
       },
       return_value_policy::reference_internal // ref + keepalive
   );
@@ -317,11 +336,14 @@ void vector_accessor(enable_if_t<vector_needs_copy<Vector>::value, Class_>& cl)
 {
   using T = typename Vector::value_type;
   using SizeType = typename Vector::size_type;
+  using DiffType = typename Vector::difference_type;
   using ItType = typename Vector::iterator;
-  cl.def("__getitem__", [](const Vector& v, SizeType i) -> T {
-    if (i >= v.size())
+  cl.def("__getitem__", [](const Vector& v, DiffType i) -> T {
+    if (i < 0 && (i += v.size()) < 0)
+      throw index_error();
+    if ((SizeType)i >= v.size())
       throw index_error();
-    return v[i];
+    return v[(SizeType)i];
   });
 
   cl.def(
@@ -513,7 +535,7 @@ void map_assignment(const Args&...)
 
 // Map assignment when copy-assignable: just copy the value
 template <typename Map, typename Class_>
-void map_assignment(enable_if_t<std::is_copy_assignable<typename Map::mapped_type>::value, Class_>& cl)
+void map_assignment(enable_if_t<is_copy_assignable<typename Map::mapped_type>::value, Class_>& cl)
 {
   using KeyType = typename Map::key_type;
   using MappedType = typename Map::mapped_type;
@@ -529,7 +551,7 @@ void map_assignment(enable_if_t<std::is_copy_assignable<typename Map::mapped_typ
 
 // Not copy-assignable, but still copy-constructible: we can update the value by erasing and reinserting
 template <typename Map, typename Class_>
-void map_assignment(enable_if_t<!std::is_copy_assignable<typename Map::mapped_type>::value
+void map_assignment(enable_if_t<!is_copy_assignable<typename Map::mapped_type>::value
                                     && is_copy_constructible<typename Map::mapped_type>::value,
                                 Class_>& cl)
 {
diff --git a/pybind11/pybind11/__init__.py b/pybind11/pybind11/__init__.py
index 4859689de2911f4563a37bc188f7910b55fa13d9..9adbbc5d07c01585bb0187bdb975ff311d642a79 100644
--- a/pybind11/pybind11/__init__.py
+++ b/pybind11/pybind11/__init__.py
@@ -2,26 +2,11 @@ from ._version import version_info, __version__     # noqa: F401 imported but un
 
 
 def get_include(user=False):
-    from distutils.dist import Distribution
     import os
-    import sys
-
-    # Are we running in a virtual environment?
-    virtualenv = hasattr(sys, 'real_prefix') or \
-        sys.prefix != getattr(sys, "base_prefix", sys.prefix)
-
-    if virtualenv:
-        return os.path.join(sys.prefix, 'include', 'site', 'python' + sys.version[:3])
+    d = os.path.dirname(__file__)
+    if os.path.exists(os.path.join(d, "include")):
+        # Package is installed
+        return os.path.join(d, "include")
     else:
-        dist = Distribution({'name': 'pybind11'})
-        dist.parse_config_files()
-
-        dist_cobj = dist.get_command_obj('install', create=True)
-
-        # Search for packages in user's home directory?
-        if user:
-            dist_cobj.user = user
-            dist_cobj.prefix = ""
-        dist_cobj.finalize_options()
-
-        return os.path.dirname(dist_cobj.install_headers)
+        # Package is from a source directory
+        return os.path.join(os.path.dirname(d), "include")
diff --git a/pybind11/pybind11/__main__.py b/pybind11/pybind11/__main__.py
index e73ed781530db8c2fa5a7ce9c8c030bcdbcb500e..91e6e400008df7d7c8d5989b5c4b526fe8b6d481 100644
--- a/pybind11/pybind11/__main__.py
+++ b/pybind11/pybind11/__main__.py
@@ -8,7 +8,7 @@ from . import get_include
 
 
 def print_includes():
-    dirs = [sysconfig.get_path('include'), sysconfig.get_path('platinclude'), get_include(), get_include(True)]
+    dirs = [sysconfig.get_path('include'), sysconfig.get_path('platinclude'), get_include()]
 
     # Make unique but preserve order
     unique_dirs = []
diff --git a/pybind11/pybind11/_version.py b/pybind11/pybind11/_version.py
index fef541bdbc7d5d4f8f417290ed564486005eb6e9..8d5aa5c7675609643f294bd56fc77598e93b9bcf 100644
--- a/pybind11/pybind11/_version.py
+++ b/pybind11/pybind11/_version.py
@@ -1,2 +1,2 @@
-version_info = (2, 3, 'dev1')
+version_info = (2, 5, 0)
 __version__ = '.'.join(map(str, version_info))
diff --git a/pybind11/setup.py b/pybind11/setup.py
index 1ac6931d87fdb96a14e58036f86683225b0fb836..3f1755a29292949c75db14e398516e404091f462 100644
--- a/pybind11/setup.py
+++ b/pybind11/setup.py
@@ -4,40 +4,43 @@
 
 from setuptools import setup
 from distutils.command.install_headers import install_headers
+from distutils.command.build_py import build_py
 from pybind11 import __version__
 import os
 
+package_data = [
+    'include/pybind11/detail/class.h',
+    'include/pybind11/detail/common.h',
+    'include/pybind11/detail/descr.h',
+    'include/pybind11/detail/init.h',
+    'include/pybind11/detail/internals.h',
+    'include/pybind11/detail/typeid.h',
+    'include/pybind11/attr.h',
+    'include/pybind11/buffer_info.h',
+    'include/pybind11/cast.h',
+    'include/pybind11/chrono.h',
+    'include/pybind11/common.h',
+    'include/pybind11/complex.h',
+    'include/pybind11/eigen.h',
+    'include/pybind11/embed.h',
+    'include/pybind11/eval.h',
+    'include/pybind11/functional.h',
+    'include/pybind11/iostream.h',
+    'include/pybind11/numpy.h',
+    'include/pybind11/operators.h',
+    'include/pybind11/options.h',
+    'include/pybind11/pybind11.h',
+    'include/pybind11/pytypes.h',
+    'include/pybind11/stl.h',
+    'include/pybind11/stl_bind.h',
+]
+
 # Prevent installation of pybind11 headers by setting
 # PYBIND11_USE_CMAKE.
 if os.environ.get('PYBIND11_USE_CMAKE'):
     headers = []
 else:
-    headers = [
-        'include/pybind11/detail/class.h',
-        'include/pybind11/detail/common.h',
-        'include/pybind11/detail/descr.h',
-        'include/pybind11/detail/init.h',
-        'include/pybind11/detail/internals.h',
-        'include/pybind11/detail/typeid.h',
-        'include/pybind11/attr.h',
-        'include/pybind11/buffer_info.h',
-        'include/pybind11/cast.h',
-        'include/pybind11/chrono.h',
-        'include/pybind11/common.h',
-        'include/pybind11/complex.h',
-        'include/pybind11/eigen.h',
-        'include/pybind11/embed.h',
-        'include/pybind11/eval.h',
-        'include/pybind11/functional.h',
-        'include/pybind11/iostream.h',
-        'include/pybind11/numpy.h',
-        'include/pybind11/operators.h',
-        'include/pybind11/options.h',
-        'include/pybind11/pybind11.h',
-        'include/pybind11/pytypes.h',
-        'include/pybind11/stl.h',
-        'include/pybind11/stl_bind.h',
-    ]
+    headers = package_data
 
 
 class InstallHeaders(install_headers):
@@ -56,6 +59,17 @@ class InstallHeaders(install_headers):
             self.outfiles.append(out)
 
 
+# Install the headers inside the package as well
+class BuildPy(build_py):
+
+    def build_package_data(self):
+        build_py.build_package_data(self)
+        for header in package_data:
+            target = os.path.join(self.build_lib, 'pybind11', header)
+            self.mkpath(os.path.dirname(target))
+            self.copy_file(header, target, preserve_mode=False)
+
+
 setup(
     name='pybind11',
     version=__version__,
@@ -67,7 +81,8 @@ setup(
     packages=['pybind11'],
     license='BSD',
     headers=headers,
-    cmdclass=dict(install_headers=InstallHeaders),
+    zip_safe=False,
+    cmdclass=dict(install_headers=InstallHeaders, build_py=BuildPy),
     classifiers=[
         'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers',
         'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Utilities',
diff --git a/pybind11/tests/CMakeLists.txt b/pybind11/tests/CMakeLists.txt
index bc72a4bdb7bc778d7255d96e5865947f0d263313..d31f25012e6f5b1876d7b34793a979811df09c59 100644
--- a/pybind11/tests/CMakeLists.txt
+++ b/pybind11/tests/CMakeLists.txt
@@ -30,6 +30,7 @@ endif()
 
 # Full set of test files (you can override these; see below)
 set(PYBIND11_TEST_FILES
+    test_async.cpp
     test_buffers.cpp
     test_builtin_casters.cpp
     test_call_policies.cpp
@@ -72,6 +73,13 @@ if(PYBIND11_TEST_OVERRIDE)
   set(PYBIND11_TEST_FILES ${PYBIND11_TEST_OVERRIDE})
 endif()
 
+# Skip test_async for Python < 3.5
+list(FIND PYBIND11_TEST_FILES test_async.cpp PYBIND11_TEST_FILES_ASYNC_I)
+if((PYBIND11_TEST_FILES_ASYNC_I GREATER -1) AND ("${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}" VERSION_LESS 3.5))
+  message(STATUS "Skipping test_async because Python version ${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR} < 3.5")
+  list(REMOVE_AT PYBIND11_TEST_FILES ${PYBIND11_TEST_FILES_ASYNC_I})
+endif()
+
 string(REPLACE ".cpp"
                ".py"
                PYBIND11_PYTEST_FILES
@@ -81,6 +89,8 @@ string(REPLACE ".cpp"
 # because TEST_OVERRIDE is used and doesn't include them) the second module doesn't get built.
 set(PYBIND11_CROSS_MODULE_TESTS test_exceptions.py test_local_bindings.py test_stl.py test_stl_binders.py)
 
+set(PYBIND11_CROSS_MODULE_GIL_TESTS test_gil_scoped.py)
+
 # Check if Eigen is available; if not, remove from PYBIND11_TEST_FILES (but keep it in PYBIND11_PYTEST_FILES, so that
 # we get the "eigen is not installed" skip message).
 list(FIND PYBIND11_TEST_FILES test_eigen.cpp PYBIND11_TEST_FILES_EIGEN_I)
@@ -149,6 +159,14 @@ foreach(t ${PYBIND11_CROSS_MODULE_TESTS})
   endif()
 endforeach()
 
+foreach(t ${PYBIND11_CROSS_MODULE_GIL_TESTS})
+  list(FIND PYBIND11_PYTEST_FILES ${t} i)
+  if(i GREATER -1)
+    list(APPEND test_targets cross_module_gil_utils)
+    break()
+  endif()
+endforeach()
+
 set(testdir ${CMAKE_CURRENT_SOURCE_DIR})
 foreach(target ${test_targets})
   set(test_files ${PYBIND11_TEST_FILES})
diff --git a/pybind11/tests/conftest.py b/pybind11/tests/conftest.py
index 57f7f9aca919dfa03bb0dd751c8f4a5e8015f7ee..d362e19a30bc461c178630120f6c5c62270d6143 100644
--- a/pybind11/tests/conftest.py
+++ b/pybind11/tests/conftest.py
@@ -17,6 +17,11 @@ _unicode_marker = re.compile(r'u(\'[^\']*\')')
 _long_marker = re.compile(r'([0-9])L')
 _hexadecimal = re.compile(r'0x[0-9a-fA-F]+')
 
+# test_async.py requires support for async and await
+collect_ignore = []
+if sys.version_info[:2] < (3, 5):
+    collect_ignore.append("test_async.py")
+
 
 def _strip_and_dedent(s):
     """For triple-quote strings"""
diff --git a/pybind11/tests/cross_module_gil_utils.cpp b/pybind11/tests/cross_module_gil_utils.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1e1a44fae5dc88ff433ec60916f222db6fd43d81
--- /dev/null
+++ b/pybind11/tests/cross_module_gil_utils.cpp
@@ -0,0 +1,63 @@
+/*
+    tests/cross_module_gil_utils.cpp -- tools for acquiring GIL from a different module
+
+    Copyright (c) 2019 Google LLC
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+#include <pybind11/pybind11.h>
+#include <cstdint>
+
+// This file mimics a DSO that makes pybind11 calls but does not define a
+// PYBIND11_MODULE. The purpose is to test that such a DSO can create a
+// py::gil_scoped_acquire when the running thread is in a GIL-released state.
+//
+// Note that we define a Python module here for convenience, but in general
+// this need not be the case. The typical scenario would be a DSO that implements
+// shared logic used internally by multiple pybind11 modules.
+
+namespace {
+
+namespace py = pybind11;
+void gil_acquire()
+{
+  py::gil_scoped_acquire gil;
+}
+
+constexpr char kModuleName[] = "cross_module_gil_utils";
+
+#if PY_MAJOR_VERSION >= 3
+struct PyModuleDef moduledef = {PyModuleDef_HEAD_INIT, kModuleName, NULL, 0, NULL, NULL, NULL, NULL, NULL};
+#else
+PyMethodDef module_methods[] = {{NULL, NULL, 0, NULL}};
+#endif
+
+} // namespace
+
+extern "C" PYBIND11_EXPORT
+#if PY_MAJOR_VERSION >= 3
+    PyObject*
+    PyInit_cross_module_gil_utils()
+#else
+    void
+    initcross_module_gil_utils()
+#endif
+{
+
+  PyObject* m =
+#if PY_MAJOR_VERSION >= 3
+      PyModule_Create(&moduledef);
+#else
+      Py_InitModule(kModuleName, module_methods);
+#endif
+
+  if (m != NULL) {
+    static_assert(sizeof(&gil_acquire) == sizeof(void*), "Function pointer must have the same size as void*");
+    PyModule_AddObject(m, "gil_acquire_funcaddr", PyLong_FromVoidPtr(reinterpret_cast<void*>(&gil_acquire)));
+  }
+
+#if PY_MAJOR_VERSION >= 3
+  return m;
+#endif
+}
diff --git a/pybind11/tests/test_async.cpp b/pybind11/tests/test_async.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2502e6ce33bab1bad123e8ec999744919753aee5
--- /dev/null
+++ b/pybind11/tests/test_async.cpp
@@ -0,0 +1,28 @@
+/*
+    tests/test_async.cpp -- __await__ support
+
+    Copyright (c) 2019 Google Inc.
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#include "pybind11_tests.h"
+
+TEST_SUBMODULE(async_module, m)
+{
+  struct DoesNotSupportAsync
+  {};
+  py::class_<DoesNotSupportAsync>(m, "DoesNotSupportAsync").def(py::init<>());
+  struct SupportsAsync
+  {};
+  py::class_<SupportsAsync>(m, "SupportsAsync")
+      .def(py::init<>())
+      .def("__await__", [](const SupportsAsync& self) -> py::object {
+        static_cast<void>(self);
+        py::object loop = py::module::import("asyncio.events").attr("get_event_loop")();
+        py::object f = loop.attr("create_future")();
+        f.attr("set_result")(5);
+        return f.attr("__await__")();
+      });
+}
diff --git a/pybind11/tests/test_async.py b/pybind11/tests/test_async.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1c959d60260986195fb17fcc3bf580612503f8b
--- /dev/null
+++ b/pybind11/tests/test_async.py
@@ -0,0 +1,23 @@
+import asyncio
+import pytest
+from pybind11_tests import async_module as m
+
+
+@pytest.fixture
+def event_loop():
+    loop = asyncio.new_event_loop()
+    yield loop
+    loop.close()
+
+
+async def get_await_result(x):
+    return await x
+
+
+def test_await(event_loop):
+    assert 5 == event_loop.run_until_complete(get_await_result(m.SupportsAsync()))
+
+
+def test_await_missing(event_loop):
+    with pytest.raises(TypeError):
+        event_loop.run_until_complete(get_await_result(m.DoesNotSupportAsync()))
diff --git a/pybind11/tests/test_buffers.cpp b/pybind11/tests/test_buffers.cpp
index c129cc8c7e7c360cd2fc0fdde597f0b64b64ad49..2dd26cd3cbbf24b46cfd7de62d6322438fbb7270 100644
--- a/pybind11/tests/test_buffers.cpp
+++ b/pybind11/tests/test_buffers.cpp
@@ -109,7 +109,7 @@ TEST_SUBMODULE(buffers, m)
   py::class_<Matrix>(m, "Matrix", py::buffer_protocol())
       .def(py::init<ssize_t, ssize_t>())
       /// Construct from a buffer
-      .def(py::init([](py::buffer b) {
+      .def(py::init([](py::buffer const b) {
         py::buffer_info info = b.request();
         if (info.format != py::format_descriptor<float>::format() || info.ndim != 2)
           throw std::runtime_error("Incompatible buffer format!");
@@ -207,4 +207,36 @@ TEST_SUBMODULE(buffers, m)
       .def(py::init<>())
       .def_readwrite("value", (int32_t DerivedBuffer::*)&DerivedBuffer::value)
       .def_buffer(&DerivedBuffer::get_buffer_info);
+
+  struct BufferReadOnly
+  {
+    const uint8_t value = 0;
+    BufferReadOnly(uint8_t value)
+      : value(value)
+    {}
+
+    py::buffer_info get_buffer_info()
+    {
+      return py::buffer_info(&value, 1);
+    }
+  };
+  py::class_<BufferReadOnly>(m, "BufferReadOnly", py::buffer_protocol())
+      .def(py::init<uint8_t>())
+      .def_buffer(&BufferReadOnly::get_buffer_info);
+
+  struct BufferReadOnlySelect
+  {
+    uint8_t value = 0;
+    bool readonly = false;
+
+    py::buffer_info get_buffer_info()
+    {
+      return py::buffer_info(&value, 1, readonly);
+    }
+  };
+  py::class_<BufferReadOnlySelect>(m, "BufferReadOnlySelect", py::buffer_protocol())
+      .def(py::init<>())
+      .def_readwrite("value", &BufferReadOnlySelect::value)
+      .def_readwrite("readonly", &BufferReadOnlySelect::readonly)
+      .def_buffer(&BufferReadOnlySelect::get_buffer_info);
 }
diff --git a/pybind11/tests/test_buffers.py b/pybind11/tests/test_buffers.py
index 9352ecf526a5201082203b630b68bf76168c4d3f..6d68c4092dbacbdc8282a9a00f22aaa616075b2b 100644
--- a/pybind11/tests/test_buffers.py
+++ b/pybind11/tests/test_buffers.py
@@ -1,8 +1,14 @@
+import io
 import struct
+import sys
+
 import pytest
+
 from pybind11_tests import buffers as m
 from pybind11_tests import ConstructorStats
 
+PY3 = sys.version_info[0] >= 3
+
 pytestmark = pytest.requires_numpy
 
 with pytest.suppress(ImportError):
@@ -85,3 +91,28 @@ def test_pointer_to_member_fn():
         buf.value = 0x12345678
         value = struct.unpack('i', bytearray(buf))[0]
         assert value == 0x12345678
+
+
+@pytest.unsupported_on_pypy
+def test_readonly_buffer():
+    buf = m.BufferReadOnly(0x64)
+    view = memoryview(buf)
+    assert view[0] == 0x64 if PY3 else b'd'
+    assert view.readonly
+
+
+@pytest.unsupported_on_pypy
+def test_selective_readonly_buffer():
+    buf = m.BufferReadOnlySelect()
+
+    memoryview(buf)[0] = 0x64 if PY3 else b'd'
+    assert buf.value == 0x64
+
+    io.BytesIO(b'A').readinto(buf)
+    assert buf.value == ord(b'A')
+
+    buf.readonly = True
+    with pytest.raises(TypeError):
+        memoryview(buf)[0] = 0 if PY3 else b'\0'
+    with pytest.raises(TypeError):
+        io.BytesIO(b'1').readinto(buf)
diff --git a/pybind11/tests/test_builtin_casters.cpp b/pybind11/tests/test_builtin_casters.cpp
index 6523ac0bc5c258094d3b1fc9a37d81d6ebc252b1..306ab3482a3e335d0ac6dc65aaa6dae6103276bf 100644
--- a/pybind11/tests/test_builtin_casters.cpp
+++ b/pybind11/tests/test_builtin_casters.cpp
@@ -37,7 +37,8 @@ TEST_SUBMODULE(builtin_casters, m)
   } // 𝐀, utf32
   wstr.push_back(0x7a); // z
 
-  m.def("good_utf8_string", []() { return std::string(u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8‽ 🎂 𝐀
+  m.def("good_utf8_string",
+        []() { return std::string((const char*)u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8‽ 🎂 𝐀
   m.def("good_utf16_string", [=]() {
     return std::u16string({b16, ib16, cake16_1, cake16_2, mathbfA16_1, mathbfA16_2, z16});
   }); // b‽🎂𝐀z
@@ -72,6 +73,22 @@ TEST_SUBMODULE(builtin_casters, m)
   m.def("strlen", [](char* s) { return strlen(s); });
   m.def("string_length", [](std::string s) { return s.length(); });
 
+#ifdef PYBIND11_HAS_U8STRING
+  m.attr("has_u8string") = true;
+  m.def("good_utf8_u8string",
+        []() { return std::u8string(u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8‽ 🎂 𝐀
+  m.def("bad_utf8_u8string", []() {
+    return std::u8string((const char8_t*)"abc\xd0"
+                                         "def");
+  });
+
+  m.def("u8_char8_Z", []() -> char8_t { return u8'Z'; });
+
+  // test_single_char_arguments
+  m.def("ord_char8", [](char8_t c) -> int { return static_cast<unsigned char>(c); });
+  m.def("ord_char8_lv", [](char8_t& c) -> int { return static_cast<unsigned char>(c); });
+#endif
+
   // test_string_view
 #ifdef PYBIND11_HAS_STRING_VIEW
   m.attr("has_string_view") = true;
@@ -96,9 +113,20 @@ TEST_SUBMODULE(builtin_casters, m)
       l.append((int)c);
     return l;
   });
-  m.def("string_view_return", []() { return std::string_view(u8"utf8 secret \U0001f382"); });
+  m.def("string_view_return", []() { return std::string_view((const char*)u8"utf8 secret \U0001f382"); });
   m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
   m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });
+
+#  ifdef PYBIND11_HAS_U8STRING
+  m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); });
+  m.def("string_view8_chars", [](std::u8string_view s) {
+    py::list l;
+    for (auto c : s)
+      l.append((std::uint8_t)c);
+    return l;
+  });
+  m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); });
+#  endif
 #endif
 
   // test_integer_casting
diff --git a/pybind11/tests/test_builtin_casters.py b/pybind11/tests/test_builtin_casters.py
index 91725256979c7e84c6d3d53ce774d734f26f6a3b..9b960b9881e7a8a623f2dfed49f959b5aab60419 100644
--- a/pybind11/tests/test_builtin_casters.py
+++ b/pybind11/tests/test_builtin_casters.py
@@ -15,6 +15,8 @@ def test_unicode_conversion():
     assert m.good_utf16_string() == u"b‽🎂𝐀z"
     assert m.good_utf32_string() == u"a𝐀🎂‽z"
     assert m.good_wchar_string() == u"a⸘𝐀z"
+    if hasattr(m, "has_u8string"):
+        assert m.good_utf8_u8string() == u"Say utf8‽ 🎂 𝐀"
 
     with pytest.raises(UnicodeDecodeError):
         m.bad_utf8_string()
@@ -29,12 +31,17 @@ def test_unicode_conversion():
     if hasattr(m, "bad_wchar_string"):
         with pytest.raises(UnicodeDecodeError):
             m.bad_wchar_string()
+    if hasattr(m, "has_u8string"):
+        with pytest.raises(UnicodeDecodeError):
+            m.bad_utf8_u8string()
 
     assert m.u8_Z() == 'Z'
     assert m.u8_eacute() == u'é'
     assert m.u16_ibang() == u'‽'
     assert m.u32_mathbfA() == u'𝐀'
     assert m.wchar_heart() == u'♥'
+    if hasattr(m, "has_u8string"):
+        assert m.u8_char8_Z() == 'Z'
 
 
 def test_single_char_arguments():
@@ -94,6 +101,17 @@ def test_single_char_arguments():
         assert m.ord_wchar(u'aa')
     assert str(excinfo.value) == toolong_message
 
+    if hasattr(m, "has_u8string"):
+        assert m.ord_char8(u'a') == 0x61     # simple ASCII
+        assert m.ord_char8_lv(u'b') == 0x62
+        assert m.ord_char8(u'é') == 0xE9     # requires 2 bytes in utf-8, but can be stuffed in a char
+        with pytest.raises(ValueError) as excinfo:
+            assert m.ord_char8(u'Ä€') == 0x100     # requires 2 bytes, doesn't fit in a char
+        assert str(excinfo.value) == toobig_message(0x100)
+        with pytest.raises(ValueError) as excinfo:
+            assert m.ord_char8(u'ab')
+        assert str(excinfo.value) == toolong_message
+
 
 def test_bytes_to_string():
     """Tests the ability to pass bytes to C++ string-accepting functions.  Note that this is
@@ -118,10 +136,15 @@ def test_string_view(capture):
     assert m.string_view_chars("Hi 🎂") == [72, 105, 32, 0xf0, 0x9f, 0x8e, 0x82]
     assert m.string_view16_chars("Hi 🎂") == [72, 105, 32, 0xd83c, 0xdf82]
     assert m.string_view32_chars("Hi 🎂") == [72, 105, 32, 127874]
+    if hasattr(m, "has_u8string"):
+        assert m.string_view8_chars("Hi") == [72, 105]
+        assert m.string_view8_chars("Hi 🎂") == [72, 105, 32, 0xf0, 0x9f, 0x8e, 0x82]
 
     assert m.string_view_return() == "utf8 secret 🎂"
     assert m.string_view16_return() == "utf16 secret 🎂"
     assert m.string_view32_return() == "utf32 secret 🎂"
+    if hasattr(m, "has_u8string"):
+        assert m.string_view8_return() == "utf8 secret 🎂"
 
     with capture:
         m.string_view_print("Hi")
@@ -134,6 +157,14 @@ def test_string_view(capture):
         utf16 🎂 8
         utf32 🎂 7
     """
+    if hasattr(m, "has_u8string"):
+        with capture:
+            m.string_view8_print("Hi")
+            m.string_view8_print("utf8 🎂")
+        assert capture == """
+            Hi 2
+            utf8 🎂 9
+        """
 
     with capture:
         m.string_view_print("Hi, ascii")
@@ -146,6 +177,14 @@ def test_string_view(capture):
         Hi, utf16 🎂 12
         Hi, utf32 🎂 11
     """
+    if hasattr(m, "has_u8string"):
+        with capture:
+            m.string_view8_print("Hi, ascii")
+            m.string_view8_print("Hi, utf8 🎂")
+        assert capture == """
+            Hi, ascii 9
+            Hi, utf8 🎂 13
+        """
 
 
 def test_integer_casting():
@@ -321,11 +360,15 @@ def test_numpy_bool():
     import numpy as np
     convert, noconvert = m.bool_passthrough, m.bool_passthrough_noconvert
 
+    def cant_convert(v):
+        pytest.raises(TypeError, convert, v)
+
     # np.bool_ is not considered implicit
     assert convert(np.bool_(True)) is True
     assert convert(np.bool_(False)) is False
     assert noconvert(np.bool_(True)) is True
     assert noconvert(np.bool_(False)) is False
+    cant_convert(np.zeros(2, dtype='int'))
 
 
 def test_int_long():
diff --git a/pybind11/tests/test_chrono.py b/pybind11/tests/test_chrono.py
index 3a629c1af4d7fb73cac7a7f7cef139c179d7c223..7b7920317841cc9a5336a33a99db985a97747341 100644
--- a/pybind11/tests/test_chrono.py
+++ b/pybind11/tests/test_chrono.py
@@ -40,6 +40,62 @@ def test_chrono_system_clock_roundtrip():
     assert diff.microseconds == 0
 
 
+def test_chrono_system_clock_roundtrip_date():
+    date1 = datetime.date.today()
+
+    # Roundtrip the time
+    datetime2 = m.test_chrono2(date1)
+    date2 = datetime2.date()
+    time2 = datetime2.time()
+
+    # The returned value should be a datetime
+    assert isinstance(datetime2, datetime.datetime)
+    assert isinstance(date2, datetime.date)
+    assert isinstance(time2, datetime.time)
+
+    # They should be identical (no information lost on roundtrip)
+    diff = abs(date1 - date2)
+    assert diff.days == 0
+    assert diff.seconds == 0
+    assert diff.microseconds == 0
+
+    # Year, Month & Day should be the same after the round trip
+    assert date1.year == date2.year
+    assert date1.month == date2.month
+    assert date1.day == date2.day
+
+    # There should be no time information
+    assert time2.hour == 0
+    assert time2.minute == 0
+    assert time2.second == 0
+    assert time2.microsecond == 0
+
+
+def test_chrono_system_clock_roundtrip_time():
+    time1 = datetime.datetime.today().time()
+
+    # Roundtrip the time
+    datetime2 = m.test_chrono2(time1)
+    date2 = datetime2.date()
+    time2 = datetime2.time()
+
+    # The returned value should be a datetime
+    assert isinstance(datetime2, datetime.datetime)
+    assert isinstance(date2, datetime.date)
+    assert isinstance(time2, datetime.time)
+
+    # Hour, Minute, Second & Microsecond should be the same after the round trip
+    assert time1.hour == time2.hour
+    assert time1.minute == time2.minute
+    assert time1.second == time2.second
+    assert time1.microsecond == time2.microsecond
+
+    # There should be no date information (i.e. date = python base date)
+    assert date2.year == 1970
+    assert date2.month == 1
+    assert date2.day == 1
+
+
 def test_chrono_duration_roundtrip():
 
     # Get the difference between two times (a timedelta)
@@ -70,6 +126,19 @@ def test_chrono_duration_subtraction_equivalence():
     assert cpp_diff.microseconds == diff.microseconds
 
 
+def test_chrono_duration_subtraction_equivalence_date():
+
+    date1 = datetime.date.today()
+    date2 = datetime.date.today()
+
+    diff = date2 - date1
+    cpp_diff = m.test_chrono4(date2, date1)
+
+    assert cpp_diff.days == diff.days
+    assert cpp_diff.seconds == diff.seconds
+    assert cpp_diff.microseconds == diff.microseconds
+
+
 def test_chrono_steady_clock():
     time1 = m.test_chrono5()
     assert isinstance(time1, datetime.timedelta)
diff --git a/pybind11/tests/test_copy_move.py b/pybind11/tests/test_copy_move.py
index 1cd1066c410a6bdfdb07402d3eec3636f020df82..1e13f3fb16aec5551fe53ffccb211f43dbf4840d 100644
--- a/pybind11/tests/test_copy_move.py
+++ b/pybind11/tests/test_copy_move.py
@@ -5,13 +5,13 @@ from pybind11_tests import copy_move_policies as m
 def test_lacking_copy_ctor():
     with pytest.raises(RuntimeError) as excinfo:
         m.lacking_copy_ctor.get_one()
-    assert "the object is non-copyable!" in str(excinfo.value)
+    assert "is non-copyable!" in str(excinfo.value)
 
 
 def test_lacking_move_ctor():
     with pytest.raises(RuntimeError) as excinfo:
         m.lacking_move_ctor.get_one()
-    assert "the object is neither movable nor copyable!" in str(excinfo.value)
+    assert "is neither movable nor copyable!" in str(excinfo.value)
 
 
 def test_move_and_copy_casts():
@@ -98,7 +98,7 @@ def test_private_op_new():
 
     with pytest.raises(RuntimeError) as excinfo:
         m.private_op_new_value()
-    assert "the object is neither movable nor copyable" in str(excinfo.value)
+    assert "is neither movable nor copyable" in str(excinfo.value)
 
     assert m.private_op_new_reference().value == 1
 
diff --git a/pybind11/tests/test_eigen.py b/pybind11/tests/test_eigen.py
index 286c5f711d81b3d746997e65c4cae0380b2a2952..6729052e25e68e2c589729aa88efdfc62350f6b0 100644
--- a/pybind11/tests/test_eigen.py
+++ b/pybind11/tests/test_eigen.py
@@ -669,10 +669,10 @@ def test_issue1105():
     # These should still fail (incompatible dimensions):
     with pytest.raises(TypeError) as excinfo:
         m.iss1105_row(np.ones((7, 1)))
-    assert "incompatible function arguments" in str(excinfo)
+    assert "incompatible function arguments" in str(excinfo.value)
     with pytest.raises(TypeError) as excinfo:
         m.iss1105_col(np.ones((1, 7)))
-    assert "incompatible function arguments" in str(excinfo)
+    assert "incompatible function arguments" in str(excinfo.value)
 
 
 def test_custom_operator_new():
diff --git a/pybind11/tests/test_enum.cpp b/pybind11/tests/test_enum.cpp
index f93114beeb767711ba72ab0290ef55507191111f..9a64571ee164353de39c2704f3df4ae6b6a10c0d 100644
--- a/pybind11/tests/test_enum.cpp
+++ b/pybind11/tests/test_enum.cpp
@@ -15,11 +15,13 @@ TEST_SUBMODULE(enums, m)
   enum UnscopedEnum
   {
     EOne = 1,
-    ETwo
+    ETwo,
+    EThree
   };
   py::enum_<UnscopedEnum>(m, "UnscopedEnum", py::arithmetic(), "An unscoped enumeration")
       .value("EOne", EOne, "Docstring for EOne")
       .value("ETwo", ETwo, "Docstring for ETwo")
+      .value("EThree", EThree, "Docstring for EThree")
       .export_values();
 
   // test_scoped_enum
diff --git a/pybind11/tests/test_enum.py b/pybind11/tests/test_enum.py
index d0989adcdbcc04f626babd9c9d96636099404b06..f6dc0f8599d28e3f63e501679e23a86db860f1bb 100644
--- a/pybind11/tests/test_enum.py
+++ b/pybind11/tests/test_enum.py
@@ -21,7 +21,7 @@ def test_unscoped_enum():
 
     # __members__ property
     assert m.UnscopedEnum.__members__ == \
-        {"EOne": m.UnscopedEnum.EOne, "ETwo": m.UnscopedEnum.ETwo}
+        {"EOne": m.UnscopedEnum.EOne, "ETwo": m.UnscopedEnum.ETwo, "EThree": m.UnscopedEnum.EThree}
     # __members__ readonly
     with pytest.raises(AttributeError):
         m.UnscopedEnum.__members__ = {}
@@ -29,23 +29,18 @@ def test_unscoped_enum():
     foo = m.UnscopedEnum.__members__
     foo["bar"] = "baz"
     assert m.UnscopedEnum.__members__ == \
-        {"EOne": m.UnscopedEnum.EOne, "ETwo": m.UnscopedEnum.ETwo}
+        {"EOne": m.UnscopedEnum.EOne, "ETwo": m.UnscopedEnum.ETwo, "EThree": m.UnscopedEnum.EThree}
 
-    assert m.UnscopedEnum.__doc__ == \
-        '''An unscoped enumeration
+    for docstring_line in '''An unscoped enumeration
 
 Members:
 
   EOne : Docstring for EOne
 
-  ETwo : Docstring for ETwo''' or m.UnscopedEnum.__doc__ == \
-        '''An unscoped enumeration
-
-Members:
-
   ETwo : Docstring for ETwo
 
-  EOne : Docstring for EOne'''
+  EThree : Docstring for EThree'''.split('\n'):
+        assert docstring_line in m.UnscopedEnum.__doc__
 
     # Unscoped enums will accept ==/!= int comparisons
     y = m.UnscopedEnum.ETwo
@@ -53,6 +48,38 @@ Members:
     assert 2 == y
     assert y != 3
     assert 3 != y
+    # Compare with None
+    assert (y != None)     # noqa: E711
+    assert not (y == None)     # noqa: E711
+    # Compare with an object
+    assert (y != object())
+    assert not (y == object())
+    # Compare with string
+    assert y != "2"
+    assert "2" != y
+    assert not ("2" == y)
+    assert not (y == "2")
+
+    with pytest.raises(TypeError):
+        y < object()
+
+    with pytest.raises(TypeError):
+        y <= object()
+
+    with pytest.raises(TypeError):
+        y > object()
+
+    with pytest.raises(TypeError):
+        y >= object()
+
+    with pytest.raises(TypeError):
+        y | object()
+
+    with pytest.raises(TypeError):
+        y & object()
+
+    with pytest.raises(TypeError):
+        y ^ object()
 
     assert int(m.UnscopedEnum.ETwo) == 2
     assert str(m.UnscopedEnum(2)) == "UnscopedEnum.ETwo"
@@ -71,6 +98,11 @@ Members:
     assert not (m.UnscopedEnum.ETwo < m.UnscopedEnum.EOne)
     assert not (2 < m.UnscopedEnum.EOne)
 
+    # arithmetic
+    assert m.UnscopedEnum.EOne & m.UnscopedEnum.EThree == m.UnscopedEnum.EOne
+    assert m.UnscopedEnum.EOne | m.UnscopedEnum.ETwo == m.UnscopedEnum.EThree
+    assert m.UnscopedEnum.EOne ^ m.UnscopedEnum.EThree == m.UnscopedEnum.ETwo
+
 
 def test_scoped_enum():
     assert m.test_scoped_enum(m.ScopedEnum.Three) == "ScopedEnum::Three"
@@ -82,6 +114,12 @@ def test_scoped_enum():
     assert not 3 == z
     assert z != 3
     assert 3 != z
+    # Compare with None
+    assert (z != None)     # noqa: E711
+    assert not (z == None)     # noqa: E711
+    # Compare with an object
+    assert (z != object())
+    assert not (z == object())
     # Scoped enums will *NOT* accept >, <, >= and <= int comparisons (Will throw exceptions)
     with pytest.raises(TypeError):
         z > 3
@@ -140,6 +178,7 @@ def test_binary_operators():
     assert int(m.Flags.Read | m.Flags.Execute) == 5
     assert int(m.Flags.Write | m.Flags.Execute) == 3
     assert int(m.Flags.Write | 1) == 3
+    assert ~m.Flags.Write == -3
 
     state = m.Flags.Read | m.Flags.Write
     assert (state & m.Flags.Read) != 0
diff --git a/pybind11/tests/test_exceptions.cpp b/pybind11/tests/test_exceptions.cpp
index 690fcf66706150605fbf6ff3626de75c5aadbf4b..a0516d1c548bab57558b229d3925bdf2469b78aa 100644
--- a/pybind11/tests/test_exceptions.cpp
+++ b/pybind11/tests/test_exceptions.cpp
@@ -157,6 +157,7 @@ TEST_SUBMODULE(exceptions, m)
   m.def("throws5_1", []() { throw MyException5_1("MyException5 subclass"); });
   m.def("throws_logic_error",
         []() { throw std::logic_error("this error should fall through to the standard handler"); });
+  m.def("throws_overflow_error", []() { throw std::overflow_error(""); });
   m.def("exception_matches", []() {
     py::dict foo;
     try {
diff --git a/pybind11/tests/test_exceptions.py b/pybind11/tests/test_exceptions.py
index e61cab7bea3bb7cec4801515e1f4eb96ef7ed5d3..7b01ef05f2cdb9a167cc7946d0a526f66bcc03d1 100644
--- a/pybind11/tests/test_exceptions.py
+++ b/pybind11/tests/test_exceptions.py
@@ -79,6 +79,10 @@ def test_custom(msg):
         m.throws_logic_error()
     assert msg(excinfo.value) == "this error should fall through to the standard handler"
 
+    # OverFlow error translation.
+    with pytest.raises(OverflowError) as excinfo:
+        m.throws_overflow_error()
+
     # Can we handle a helper-declared exception?
     with pytest.raises(m.MyException5) as excinfo:
         m.throws5()
diff --git a/pybind11/tests/test_gil_scoped.cpp b/pybind11/tests/test_gil_scoped.cpp
index 21824ac8532fc260a50e9900bd3e843862bf7c1b..cb4b21c1278b15c8c4fadb0445d5519a9a6830d2 100644
--- a/pybind11/tests/test_gil_scoped.cpp
+++ b/pybind11/tests/test_gil_scoped.cpp
@@ -42,4 +42,10 @@ TEST_SUBMODULE(gil_scoped, m)
   m.def("test_callback_std_func", [](const std::function<void()>& func) { func(); });
   m.def("test_callback_virtual_func", [](VirtClass& virt) { virt.virtual_func(); });
   m.def("test_callback_pure_virtual_func", [](VirtClass& virt) { virt.pure_virtual_func(); });
+  m.def("test_cross_module_gil", []() {
+    auto cm = py::module::import("cross_module_gil_utils");
+    auto gil_acquire = reinterpret_cast<void (*)()>(PyLong_AsVoidPtr(cm.attr("gil_acquire_funcaddr").ptr()));
+    py::gil_scoped_release gil_release;
+    gil_acquire();
+  });
 }
diff --git a/pybind11/tests/test_gil_scoped.py b/pybind11/tests/test_gil_scoped.py
index da6c4a0aa1035a978b8abafe28cc62dd27e962a5..ed27139ed39a4d66ffbe053af13e169bfb1a3d1f 100644
--- a/pybind11/tests/test_gil_scoped.py
+++ b/pybind11/tests/test_gil_scoped.py
@@ -80,3 +80,8 @@ def test_python_to_cpp_to_python_from_process():
     This test is for completion, but it was never an issue.
     """
     assert _run_in_process(_python_to_cpp_to_python) == 0
+
+
+def test_cross_module_gil():
+    """Makes sure that the GIL can be acquired by another module from a GIL-released state."""
+    m.test_cross_module_gil()     # Should not raise a SIGSEGV
diff --git a/pybind11/tests/test_local_bindings.py b/pybind11/tests/test_local_bindings.py
index c48b6dea399d5711a10dd56ab0cd3f67ce1900f8..a69b45294bf44101ca1204e0e2f8809c901c970e 100644
--- a/pybind11/tests/test_local_bindings.py
+++ b/pybind11/tests/test_local_bindings.py
@@ -219,7 +219,7 @@ def test_cross_module_calls():
     c, d = m.MixGL2(3), cm.MixGL2(4)
     with pytest.raises(TypeError) as excinfo:
         m.get_gl_value(c)
-    assert "incompatible function arguments" in str(excinfo)
+    assert "incompatible function arguments" in str(excinfo.value)
     with pytest.raises(TypeError) as excinfo:
         m.get_gl_value(d)
-    assert "incompatible function arguments" in str(excinfo)
+    assert "incompatible function arguments" in str(excinfo.value)
diff --git a/pybind11/tests/test_methods_and_attributes.cpp b/pybind11/tests/test_methods_and_attributes.cpp
index 0638ecbd324c9b08eb5c7a29b3bb9d519b4af187..526ab742970555de8e2c173fa0192e0257a8615d 100644
--- a/pybind11/tests/test_methods_and_attributes.cpp
+++ b/pybind11/tests/test_methods_and_attributes.cpp
@@ -11,6 +11,11 @@
 #include "pybind11_tests.h"
 #include "constructor_stats.h"
 
+#if !defined(PYBIND11_OVERLOAD_CAST)
+template <typename... Args>
+using overload_cast_ = pybind11::detail::overload_cast_impl<Args...>;
+#endif
+
 class ExampleMandA
 {
 public:
@@ -491,18 +496,19 @@ TEST_SUBMODULE(methods_and_attributes, m)
       .def("overloaded_const", py::overload_cast<int, int>(&ExampleMandA::overloaded, py::const_))
       .def("overloaded_const", py::overload_cast<float, float>(&ExampleMandA::overloaded, py::const_))
 #else
-      .def("overloaded", static_cast<py::str (ExampleMandA::*)()>(&ExampleMandA::overloaded))
-      .def("overloaded", static_cast<py::str (ExampleMandA::*)(int)>(&ExampleMandA::overloaded))
-      .def("overloaded", static_cast<py::str (ExampleMandA::*)(int, float)>(&ExampleMandA::overloaded))
-      .def("overloaded", static_cast<py::str (ExampleMandA::*)(float, int)>(&ExampleMandA::overloaded))
-      .def("overloaded", static_cast<py::str (ExampleMandA::*)(int, int)>(&ExampleMandA::overloaded))
-      .def("overloaded", static_cast<py::str (ExampleMandA::*)(float, float)>(&ExampleMandA::overloaded))
-      .def("overloaded_float", static_cast<py::str (ExampleMandA::*)(float, float)>(&ExampleMandA::overloaded))
-      .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(int) const>(&ExampleMandA::overloaded))
-      .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(int, float) const>(&ExampleMandA::overloaded))
-      .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(float, int) const>(&ExampleMandA::overloaded))
-      .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(int, int) const>(&ExampleMandA::overloaded))
-      .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(float, float) const>(&ExampleMandA::overloaded))
+        // Use both the traditional static_cast method and the C++11 compatible overload_cast_
+        .def("overloaded", overload_cast_<>()(&ExampleMandA::overloaded))
+        .def("overloaded", overload_cast_<int>()(&ExampleMandA::overloaded))
+        .def("overloaded", overload_cast_<int,   float>()(&ExampleMandA::overloaded))
+        .def("overloaded", static_cast<py::str (ExampleMandA::*)(float,   int)>(&ExampleMandA::overloaded))
+        .def("overloaded", static_cast<py::str (ExampleMandA::*)(int,     int)>(&ExampleMandA::overloaded))
+        .def("overloaded", static_cast<py::str (ExampleMandA::*)(float, float)>(&ExampleMandA::overloaded))
+        .def("overloaded_float", overload_cast_<float, float>()(&ExampleMandA::overloaded))
+        .def("overloaded_const", overload_cast_<int         >()(&ExampleMandA::overloaded, py::const_))
+        .def("overloaded_const", overload_cast_<int,   float>()(&ExampleMandA::overloaded, py::const_))
+        .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(float,   int) const>(&ExampleMandA::overloaded))
+        .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(int,     int) const>(&ExampleMandA::overloaded))
+        .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(float, float) const>(&ExampleMandA::overloaded))
 #endif
       // test_no_mixed_overloads
       // Raise error if trying to mix static/non-static overloads on the same name:
diff --git a/pybind11/tests/test_methods_and_attributes.py b/pybind11/tests/test_methods_and_attributes.py
index 60977b7fc266c2b4affbabba9c2d28211d462d97..2a2ef3defdb65e7a2c11fe22dba3ef4b64685ce6 100644
--- a/pybind11/tests/test_methods_and_attributes.py
+++ b/pybind11/tests/test_methods_and_attributes.py
@@ -100,32 +100,32 @@ def test_properties():
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = instance.def_property_writeonly     # noqa: F841 unused var
-    assert "unreadable attribute" in str(excinfo)
+    assert "unreadable attribute" in str(excinfo.value)
 
     instance.def_property_writeonly = 4
     assert instance.def_property_readonly == 4
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = instance.def_property_impossible     # noqa: F841 unused var
-    assert "unreadable attribute" in str(excinfo)
+    assert "unreadable attribute" in str(excinfo.value)
 
     with pytest.raises(AttributeError) as excinfo:
         instance.def_property_impossible = 5
-    assert "can't set attribute" in str(excinfo)
+    assert "can't set attribute" in str(excinfo.value)
 
 
 def test_static_properties():
     assert m.TestProperties.def_readonly_static == 1
     with pytest.raises(AttributeError) as excinfo:
         m.TestProperties.def_readonly_static = 2
-    assert "can't set attribute" in str(excinfo)
+    assert "can't set attribute" in str(excinfo.value)
 
     m.TestProperties.def_readwrite_static = 2
     assert m.TestProperties.def_readwrite_static == 2
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = m.TestProperties.def_writeonly_static     # noqa: F841 unused var
-    assert "unreadable attribute" in str(excinfo)
+    assert "unreadable attribute" in str(excinfo.value)
 
     m.TestProperties.def_writeonly_static = 3
     assert m.TestProperties.def_readonly_static == 3
@@ -133,14 +133,14 @@ def test_static_properties():
     assert m.TestProperties.def_property_readonly_static == 3
     with pytest.raises(AttributeError) as excinfo:
         m.TestProperties.def_property_readonly_static = 99
-    assert "can't set attribute" in str(excinfo)
+    assert "can't set attribute" in str(excinfo.value)
 
     m.TestProperties.def_property_static = 4
     assert m.TestProperties.def_property_static == 4
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = m.TestProperties.def_property_writeonly_static
-    assert "unreadable attribute" in str(excinfo)
+    assert "unreadable attribute" in str(excinfo.value)
 
     m.TestProperties.def_property_writeonly_static = 5
     assert m.TestProperties.def_property_static == 5
@@ -158,7 +158,7 @@ def test_static_properties():
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = instance.def_property_writeonly_static     # noqa: F841 unused var
-    assert "unreadable attribute" in str(excinfo)
+    assert "unreadable attribute" in str(excinfo.value)
 
     instance.def_property_writeonly_static = 4
     assert instance.def_property_static == 4
diff --git a/pybind11/tests/test_numpy_array.cpp b/pybind11/tests/test_numpy_array.cpp
index a3ec1b9478ef00fd291e4374e24c835f29a64b9f..4351d4aee5158a56beb30d4da200270f36db802f 100644
--- a/pybind11/tests/test_numpy_array.cpp
+++ b/pybind11/tests/test_numpy_array.cpp
@@ -14,6 +14,71 @@
 
 #include <cstdint>
 
+// Size / dtype checks.
+struct DtypeCheck
+{
+  py::dtype numpy{};
+  py::dtype pybind11{};
+};
+
+template <typename T>
+DtypeCheck get_dtype_check(const char* name)
+{
+  py::module np = py::module::import("numpy");
+  DtypeCheck check{};
+  check.numpy = np.attr("dtype")(np.attr(name));
+  check.pybind11 = py::dtype::of<T>();
+  return check;
+}
+
+std::vector<DtypeCheck> get_concrete_dtype_checks()
+{
+  return {// Normalization
+          get_dtype_check<std::int8_t>("int8"),
+          get_dtype_check<std::uint8_t>("uint8"),
+          get_dtype_check<std::int16_t>("int16"),
+          get_dtype_check<std::uint16_t>("uint16"),
+          get_dtype_check<std::int32_t>("int32"),
+          get_dtype_check<std::uint32_t>("uint32"),
+          get_dtype_check<std::int64_t>("int64"),
+          get_dtype_check<std::uint64_t>("uint64")};
+}
+
+struct DtypeSizeCheck
+{
+  std::string name{};
+  int size_cpp{};
+  int size_numpy{};
+  // For debugging.
+  py::dtype dtype{};
+};
+
+template <typename T>
+DtypeSizeCheck get_dtype_size_check()
+{
+  DtypeSizeCheck check{};
+  check.name = py::type_id<T>();
+  check.size_cpp = sizeof(T);
+  check.dtype = py::dtype::of<T>();
+  check.size_numpy = check.dtype.attr("itemsize").template cast<int>();
+  return check;
+}
+
+std::vector<DtypeSizeCheck> get_platform_dtype_size_checks()
+{
+  return {
+      get_dtype_size_check<short>(),
+      get_dtype_size_check<unsigned short>(),
+      get_dtype_size_check<int>(),
+      get_dtype_size_check<unsigned int>(),
+      get_dtype_size_check<long>(),
+      get_dtype_size_check<unsigned long>(),
+      get_dtype_size_check<long long>(),
+      get_dtype_size_check<unsigned long long>(),
+  };
+}
+
+// Arrays.
 using arr = py::array;
 using arr_t = py::array_t<uint16_t, 0>;
 static_assert(std::is_same<arr_t::value_type, uint16_t>::value, "");
@@ -115,6 +180,25 @@ TEST_SUBMODULE(numpy_array, sm)
     return;
   }
 
+  // test_dtypes
+  py::class_<DtypeCheck>(sm, "DtypeCheck")
+      .def_readonly("numpy", &DtypeCheck::numpy)
+      .def_readonly("pybind11", &DtypeCheck::pybind11)
+      .def("__repr__", [](const DtypeCheck& self) {
+        return py::str("<DtypeCheck numpy={} pybind11={}>").format(self.numpy, self.pybind11);
+      });
+  sm.def("get_concrete_dtype_checks", &get_concrete_dtype_checks);
+
+  py::class_<DtypeSizeCheck>(sm, "DtypeSizeCheck")
+      .def_readonly("name", &DtypeSizeCheck::name)
+      .def_readonly("size_cpp", &DtypeSizeCheck::size_cpp)
+      .def_readonly("size_numpy", &DtypeSizeCheck::size_numpy)
+      .def("__repr__", [](const DtypeSizeCheck& self) {
+        return py::str("<DtypeSizeCheck name='{}' size_cpp={} size_numpy={} dtype={}>")
+            .format(self.name, self.size_cpp, self.size_numpy, self.dtype);
+      });
+  sm.def("get_platform_dtype_size_checks", &get_platform_dtype_size_checks);
+
   // test_array_attributes
   sm.def("ndim", [](const arr& a) { return a.ndim(); });
   sm.def("shape", [](const arr& a) { return arr(a.ndim(), a.shape()); });
diff --git a/pybind11/tests/test_numpy_array.py b/pybind11/tests/test_numpy_array.py
index 544ede2f1b619bbcb76e13d9e2d46655214ccf52..b893bfd4ea018b65dae452da59106ad98ee64ad3 100644
--- a/pybind11/tests/test_numpy_array.py
+++ b/pybind11/tests/test_numpy_array.py
@@ -7,6 +7,20 @@ with pytest.suppress(ImportError):
     import numpy as np
 
 
+def test_dtypes():
+    # See issue #1328.
+    # - Platform-dependent sizes.
+    for size_check in m.get_platform_dtype_size_checks():
+        print(size_check)
+        assert size_check.size_cpp == size_check.size_numpy, size_check
+    # - Concrete sizes.
+    for check in m.get_concrete_dtype_checks():
+        print(check)
+        assert check.numpy == check.pybind11, check
+        if check.numpy.num != check.pybind11.num:
+            print("NOTE: typenum mismatch for {}: {} != {}".format(check, check.numpy.num, check.pybind11.num))
+
+
 @pytest.fixture(scope='function')
 def arr():
     return np.array([[1, 2, 3], [4, 5, 6]], '=u2')
@@ -416,3 +430,14 @@ def test_array_create_and_resize(msg):
 def test_index_using_ellipsis():
     a = m.index_using_ellipsis(np.zeros((5, 6, 7)))
     assert a.shape == (6,)
+
+
+@pytest.unsupported_on_pypy
+def test_dtype_refcount_leak():
+    from sys import getrefcount
+    dtype = np.dtype(np.float_)
+    a = np.array([1], dtype=dtype)
+    before = getrefcount(dtype)
+    m.ndim(a)
+    after = getrefcount(dtype)
+    assert after == before
diff --git a/pybind11/tests/test_numpy_dtypes.cpp b/pybind11/tests/test_numpy_dtypes.cpp
index 2c7ed541a3b56bb4527976952c891a054f70db76..b3b0053abc77d04dd67f62b142159082fc4ba35e 100644
--- a/pybind11/tests/test_numpy_dtypes.cpp
+++ b/pybind11/tests/test_numpy_dtypes.cpp
@@ -31,6 +31,14 @@ std::ostream& operator<<(std::ostream& os, const SimpleStruct& v)
   return os << "s:" << v.bool_ << "," << v.uint_ << "," << v.float_ << "," << v.ldbl_;
 }
 
+struct SimpleStructReordered
+{
+  bool bool_;
+  float float_;
+  uint32_t uint_;
+  long double ldbl_;
+};
+
 PYBIND11_PACKED(struct PackedStruct {
   bool bool_;
   uint32_t uint_;
@@ -323,6 +331,7 @@ TEST_SUBMODULE(numpy_dtypes, m)
   py::class_<SimpleStruct>(m, "SimpleStruct");
 
   PYBIND11_NUMPY_DTYPE(SimpleStruct, bool_, uint_, float_, ldbl_);
+  PYBIND11_NUMPY_DTYPE(SimpleStructReordered, bool_, uint_, float_, ldbl_);
   PYBIND11_NUMPY_DTYPE(PackedStruct, bool_, uint_, float_, ldbl_);
   PYBIND11_NUMPY_DTYPE(NestedStruct, a, b);
   PYBIND11_NUMPY_DTYPE(PartialStruct, bool_, uint_, float_, ldbl_);
diff --git a/pybind11/tests/test_operator_overloading.cpp b/pybind11/tests/test_operator_overloading.cpp
index a57e8dd1a84ef1a54189c31f1914fc9ab47eda2b..a839bc3dac428caea327a46b5347f57e58174494 100644
--- a/pybind11/tests/test_operator_overloading.cpp
+++ b/pybind11/tests/test_operator_overloading.cpp
@@ -59,6 +59,10 @@ public:
     return "[" + std::to_string(x) + ", " + std::to_string(y) + "]";
   }
 
+  Vector2 operator-() const
+  {
+    return Vector2(-x, -y);
+  }
   Vector2 operator+(const Vector2& v) const
   {
     return Vector2(x + v.x, y + v.y);
@@ -226,6 +230,7 @@ TEST_SUBMODULE(operators, m)
       .def(float() - py::self)
       .def(float() * py::self)
       .def(float() / py::self)
+      .def(-py::self)
       .def("__str__", &Vector2::toString)
       .def(hash(py::self));
 
diff --git a/pybind11/tests/test_operator_overloading.py b/pybind11/tests/test_operator_overloading.py
index 9726b92dc27450cba0e4a86b0d81b649963ec9a8..b303ca411bb2bc5164a3455b54e048ad08ffcd59 100644
--- a/pybind11/tests/test_operator_overloading.py
+++ b/pybind11/tests/test_operator_overloading.py
@@ -9,6 +9,8 @@ def test_operator_overloading():
     assert str(v1) == "[1.000000, 2.000000]"
     assert str(v2) == "[3.000000, -1.000000]"
 
+    assert str(-v2) == "[-3.000000, 1.000000]"
+
     assert str(v1 + v2) == "[4.000000, 1.000000]"
     assert str(v1 - v2) == "[-2.000000, 3.000000]"
     assert str(v1 - 8) == "[-7.000000, -6.000000]"
@@ -44,10 +46,10 @@ def test_operator_overloading():
     del v2
     assert cstats.alive() == 0
     assert cstats.values() == [
-        '[1.000000, 2.000000]', '[3.000000, -1.000000]', '[4.000000, 1.000000]', '[-2.000000, 3.000000]',
-        '[-7.000000, -6.000000]', '[9.000000, 10.000000]', '[8.000000, 16.000000]', '[0.125000, 0.250000]',
-        '[7.000000, 6.000000]', '[9.000000, 10.000000]', '[8.000000, 16.000000]', '[8.000000, 4.000000]',
-        '[3.000000, -2.000000]', '[3.000000, -0.500000]', '[6.000000, -2.000000]'
+        '[1.000000, 2.000000]', '[3.000000, -1.000000]', '[-3.000000, 1.000000]', '[4.000000, 1.000000]',
+        '[-2.000000, 3.000000]', '[-7.000000, -6.000000]', '[9.000000, 10.000000]', '[8.000000, 16.000000]',
+        '[0.125000, 0.250000]', '[7.000000, 6.000000]', '[9.000000, 10.000000]', '[8.000000, 16.000000]',
+        '[8.000000, 4.000000]', '[3.000000, -2.000000]', '[3.000000, -0.500000]', '[6.000000, -2.000000]'
     ]
     assert cstats.default_constructions == 0
     assert cstats.copy_constructions == 0
diff --git a/pybind11/tests/test_pytypes.cpp b/pybind11/tests/test_pytypes.cpp
index 890f8e74146659c31dedd7e0bee2a6ded30caf61..e71cd432e48295802772ce759599253cc73ba093 100644
--- a/pybind11/tests/test_pytypes.cpp
+++ b/pybind11/tests/test_pytypes.cpp
@@ -18,6 +18,8 @@ TEST_SUBMODULE(pytypes, m)
     list.append("value");
     py::print("Entry at position 0:", list[0]);
     list[0] = py::str("overwritten");
+    list.insert(0, "inserted-0");
+    list.insert(2, "inserted-2");
     return list;
   });
   m.def("print_list", [](py::list list) {
@@ -38,6 +40,8 @@ TEST_SUBMODULE(pytypes, m)
     for (auto item : set)
       py::print("key:", item);
   });
+  m.def("set_contains", [](py::set set, py::object key) { return set.contains(key); });
+  m.def("set_contains", [](py::set set, const char* key) { return set.contains(key); });
 
   // test_dict
   m.def("get_dict", []() { return py::dict("key"_a = "value"); });
@@ -50,6 +54,8 @@ TEST_SUBMODULE(pytypes, m)
     auto d2 = py::dict("z"_a = 3, **d1);
     return d2;
   });
+  m.def("dict_contains", [](py::dict dict, py::object val) { return dict.contains(val); });
+  m.def("dict_contains", [](py::dict dict, const char* val) { return dict.contains(val); });
 
   // test_str
   m.def("str_from_string", []() { return py::str(std::string("baz")); });
diff --git a/pybind11/tests/test_pytypes.py b/pybind11/tests/test_pytypes.py
index ed94167dddce6ddaddc42dcdbf7889f55232d351..537099871bbdbf8dce20f961625635f2b1ba6573 100644
--- a/pybind11/tests/test_pytypes.py
+++ b/pybind11/tests/test_pytypes.py
@@ -9,14 +9,16 @@ from pybind11_tests import debug_enabled
 def test_list(capture, doc):
     with capture:
         lst = m.get_list()
-        assert lst == ["overwritten"]
+        assert lst == ["inserted-0", "overwritten", "inserted-2"]
 
         lst.append("value2")
         m.print_list(lst)
     assert capture.unordered == """
         Entry at position 0: value
-        list item 0: overwritten
-        list item 1: value2
+        list item 0: inserted-0
+        list item 1: overwritten
+        list item 2: inserted-2
+        list item 3: value2
     """
 
     assert doc(m.get_list) == "get_list() -> list"
@@ -37,6 +39,10 @@ def test_set(capture, doc):
         key: key4
     """
 
+    assert not m.set_contains(set([]), 42)
+    assert m.set_contains({42}, 42)
+    assert m.set_contains({"foo"}, "foo")
+
     assert doc(m.get_list) == "get_list() -> list"
     assert doc(m.print_list) == "print_list(arg0: list) -> None"
 
@@ -53,6 +59,10 @@ def test_dict(capture, doc):
         key: key2, value=value2
     """
 
+    assert not m.dict_contains({}, 42)
+    assert m.dict_contains({42: None}, 42)
+    assert m.dict_contains({"foo": None}, "foo")
+
     assert doc(m.get_dict) == "get_dict() -> dict"
     assert doc(m.print_dict) == "print_dict(arg0: dict) -> None"
 
diff --git a/pybind11/tests/test_smart_ptr.py b/pybind11/tests/test_smart_ptr.py
index 346835b9bd2325efe548daae089f820fe317c358..05523d583ad95b980f1210f6d69067da76ed2106 100644
--- a/pybind11/tests/test_smart_ptr.py
+++ b/pybind11/tests/test_smart_ptr.py
@@ -271,7 +271,8 @@ def test_smart_ptr_from_default():
     instance = m.HeldByDefaultHolder()
     with pytest.raises(RuntimeError) as excinfo:
         m.HeldByDefaultHolder.load_shared_ptr(instance)
-    assert "Unable to load a custom holder type from a default-holder instance" in str(excinfo)
+    assert "Unable to load a custom holder type from a " \
+           "default-holder instance" in str(excinfo.value)
 
 
 def test_shared_ptr_gc():
diff --git a/pybind11/tests/test_stl_binders.cpp b/pybind11/tests/test_stl_binders.cpp
index 69620d1bf5a3560a9e0f497bfb6eb81fa3b7082c..47f2735b570255a1ade70cec03ec516811d1ca13 100644
--- a/pybind11/tests/test_stl_binders.cpp
+++ b/pybind11/tests/test_stl_binders.cpp
@@ -65,6 +65,16 @@ Map* times_ten(int n)
   return m;
 }
 
+template <class NestMap>
+NestMap* times_hundred(int n)
+{
+  auto m = new NestMap();
+  for (int i = 1; i <= n; i++)
+    for (int j = 1; j <= n; j++)
+      (*m)[i].emplace(int(j * 10), E_nc(100 * j));
+  return m;
+}
+
 TEST_SUBMODULE(stl_binders, m)
 {
   // test_vector_int
@@ -94,6 +104,24 @@ TEST_SUBMODULE(stl_binders, m)
   m.def("get_mnc", &times_ten<std::map<int, E_nc>>, py::return_value_policy::reference);
   py::bind_map<std::unordered_map<int, E_nc>>(m, "UmapENC");
   m.def("get_umnc", &times_ten<std::unordered_map<int, E_nc>>, py::return_value_policy::reference);
+  // Issue #1885: binding nested std::map<X, Container<E>> with E non-copyable
+  py::bind_map<std::map<int, std::vector<E_nc>>>(m, "MapVecENC");
+  m.def(
+      "get_nvnc",
+      [](int n) {
+        auto m = new std::map<int, std::vector<E_nc>>();
+        for (int i = 1; i <= n; i++)
+          for (int j = 1; j <= n; j++)
+            (*m)[i].emplace_back(j);
+        return m;
+      },
+      py::return_value_policy::reference);
+  py::bind_map<std::map<int, std::map<int, E_nc>>>(m, "MapMapENC");
+  m.def("get_nmnc", &times_hundred<std::map<int, std::map<int, E_nc>>>, py::return_value_policy::reference);
+  py::bind_map<std::unordered_map<int, std::unordered_map<int, E_nc>>>(m, "UmapUmapENC");
+  m.def("get_numnc",
+        &times_hundred<std::unordered_map<int, std::unordered_map<int, E_nc>>>,
+        py::return_value_policy::reference);
 
   // test_vector_buffer
   py::bind_vector<std::vector<unsigned char>>(m, "VectorUChar", py::buffer_protocol());
diff --git a/pybind11/tests/test_stl_binders.py b/pybind11/tests/test_stl_binders.py
index e368e668608df21a332ad423a6f178bbbb4084df..3b0a7a0b516061d3fad4297fb84ae971406b396c 100644
--- a/pybind11/tests/test_stl_binders.py
+++ b/pybind11/tests/test_stl_binders.py
@@ -53,6 +53,20 @@ def test_vector_int():
     v_int2.extend(x for x in range(5))
     assert v_int2 == m.VectorInt([0, 99, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4])
 
+    # test negative indexing
+    assert v_int2[-1] == 4
+
+    # insert with negative index
+    v_int2.insert(-1, 88)
+    assert v_int2 == m.VectorInt([0, 99, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 88, 4])
+
+    # delete negative index
+    del v_int2[-1]
+    assert v_int2 == m.VectorInt([0, 99, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 88])
+
+    v_int2.clear()
+    assert len(v_int2) == 0
+
 
 # related to the PyPy's buffer protocol.
 @pytest.unsupported_on_pypy
@@ -202,6 +216,44 @@ def test_noncopyable_containers():
 
     assert vsum == 150
 
+    # nested std::map<std::vector>
+    nvnc = m.get_nvnc(5)
+    for i in range(1, 6):
+        for j in range(0, 5):
+            assert nvnc[i][j].value == j + 1
+
+    for k, v in nvnc.items():
+        for i, j in enumerate(v, start=1):
+            assert j.value == i
+
+    # nested std::map<std::map>
+    nmnc = m.get_nmnc(5)
+    for i in range(1, 6):
+        for j in range(10, 60, 10):
+            assert nmnc[i][j].value == 10 * j
+
+    vsum = 0
+    for k_o, v_o in nmnc.items():
+        for k_i, v_i in v_o.items():
+            assert v_i.value == 10 * k_i
+            vsum += v_i.value
+
+    assert vsum == 7500
+
+    # nested std::unordered_map<std::unordered_map>
+    numnc = m.get_numnc(5)
+    for i in range(1, 6):
+        for j in range(10, 60, 10):
+            assert numnc[i][j].value == 10 * j
+
+    vsum = 0
+    for k_o, v_o in numnc.items():
+        for k_i, v_i in v_o.items():
+            assert v_i.value == 10 * k_i
+            vsum += v_i.value
+
+    assert vsum == 7500
+
 
 def test_map_delitem():
     mm = m.MapStringDouble()
diff --git a/pybind11/tools/FindPythonLibsNew.cmake b/pybind11/tools/FindPythonLibsNew.cmake
index bb542b7e26c99f604aa7d0e5a3e51aff67b78026..17c999726044bc9598b23ce417025981d9525cda 100644
--- a/pybind11/tools/FindPythonLibsNew.cmake
+++ b/pybind11/tools/FindPythonLibsNew.cmake
@@ -137,19 +137,19 @@ string(REGEX
        REPLACE "\\\\"
                "/"
                PYTHON_PREFIX
-               ${PYTHON_PREFIX})
+               "${PYTHON_PREFIX}")
 string(REGEX
        REPLACE "\\\\"
                "/"
                PYTHON_INCLUDE_DIR
-               ${PYTHON_INCLUDE_DIR})
+               "${PYTHON_INCLUDE_DIR}")
 string(REGEX
        REPLACE "\\\\"
                "/"
                PYTHON_SITE_PACKAGES
-               ${PYTHON_SITE_PACKAGES})
+               "${PYTHON_SITE_PACKAGES}")
 
-if(CMAKE_HOST_WIN32)
+if(CMAKE_HOST_WIN32 AND NOT (MINGW AND DEFINED ENV{MSYSTEM}))
   set(PYTHON_LIBRARY "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
 
   # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the original python installation. They
diff --git a/pybind11/tools/pybind11Tools.cmake b/pybind11/tools/pybind11Tools.cmake
index bfba23bc2917128fb0c8f31ffaf48286574d2d88..4b6571b8989592dbff0fc1e90831749f20ed97ca 100644
--- a/pybind11/tools/pybind11Tools.cmake
+++ b/pybind11/tools/pybind11Tools.cmake
@@ -12,7 +12,7 @@ if(NOT PYBIND11_PYTHON_VERSION)
   set(PYBIND11_PYTHON_VERSION "" CACHE STRING "Python version to use for compiling modules")
 endif()
 
-set(Python_ADDITIONAL_VERSIONS 3.7 3.6 3.5 3.4)
+set(Python_ADDITIONAL_VERSIONS 3.9 3.8 3.7 3.6 3.5 3.4)
 find_package(PythonLibsNew ${PYBIND11_PYTHON_VERSION} REQUIRED)
 
 include(CheckCXXCompilerFlag)
@@ -201,7 +201,7 @@ function(pybind11_add_module target_name)
 
   _pybind11_add_lto_flags(${target_name} ${ARG_THIN_LTO})
 
-  if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug)
+  if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo)
     # Strip unnecessary sections of the binary on Linux/Mac OS
     if(CMAKE_STRIP)
       if(APPLE)
diff --git a/python/dune/xt/functions/function-as-grid-function.hh b/python/dune/xt/functions/function-as-grid-function.hh
index 452234e4c81e0b1a5747f42ab4717efa1356fcf0..62c12a304801c203842e8525c4ce6337d9b006cf 100644
--- a/python/dune/xt/functions/function-as-grid-function.hh
+++ b/python/dune/xt/functions/function-as-grid-function.hh
@@ -27,12 +27,9 @@ namespace bindings {
 
 
 template <class G, size_t d, size_t r, size_t rC>
-typename std::enable_if<
-    Grid::is_grid<G>::value,
-    pybind11::class_<FunctionAsGridFunctionWrapper<typename G::template Codim<0>::Entity, r, rC, double>,
-                     GridFunctionInterface<typename G::template Codim<0>::Entity, r, rC, double>>>::type
-bind_FunctionAsGridFunctionWrapper(pybind11::module& m, const std::string& grid_id)
+auto bind_FunctionAsGridFunctionWrapper(pybind11::module& m, const std::string& grid_id)
 {
+  static_assert(Grid::is_grid<G>::value);
   namespace py = pybind11;
   using namespace pybind11::literals;
 
diff --git a/python/dune/xt/functions/indicator.hh b/python/dune/xt/functions/indicator.hh
index 797c962c7960642fda45b0cd339f4a630bf05846..e48553964c45a7bf4cdb21a744f337f442fe3c55 100644
--- a/python/dune/xt/functions/indicator.hh
+++ b/python/dune/xt/functions/indicator.hh
@@ -30,12 +30,9 @@ namespace Functions {
 
 
 template <class G, size_t d, size_t r, size_t rC>
-typename std::enable_if<
-    Grid::is_grid<G>::value,
-    pybind11::class_<IndicatorGridFunction<typename G::template Codim<0>::Entity, r, rC, double>,
-                     GridFunctionInterface<typename G::template Codim<0>::Entity, r, rC, double>>>::type
-bind_IndicatorGridFunction(pybind11::module& m, const std::string& grid_id)
+auto bind_IndicatorGridFunction(pybind11::module& m, const std::string& grid_id)
 {
+  static_assert(Grid::is_grid<G>::value);
   namespace py = pybind11;
   using namespace pybind11::literals;
 
diff --git a/python/dune/xt/functions/spe10.hh b/python/dune/xt/functions/spe10.hh
index abf948699459b65e403e1c4a01e24d6a681818c2..2096229ff6e5f071e9540bfd8daab9d10b055a2b 100644
--- a/python/dune/xt/functions/spe10.hh
+++ b/python/dune/xt/functions/spe10.hh
@@ -29,11 +29,6 @@ namespace XT {
 namespace Functions {
 
 
-template <class G, size_t d, size_t r, size_t rC>
-typename std::enable_if<Grid::is_grid<G>::value && d != 2, void>::type
-bind_Spe10Model1Function(pybind11::module& /*m*/, const std::string& /*grid_id*/)
-{}
-
 /**
  * \note We would like to drop the d template parameter and use either of
 \code
@@ -44,11 +39,7 @@ static const constexpr size_t d = G::dimension;
  *       everywhere: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59937
  */
 template <class G, size_t d, size_t r, size_t rC>
-typename std::enable_if<
-    Grid::is_grid<G>::value && d == 2,
-    pybind11::class_<Spe10::Model1Function<typename G::template Codim<0>::Entity, r, rC, double>,
-                     GridFunctionInterface<typename G::template Codim<0>::Entity, r, rC, double>>>::type
-bind_Spe10Model1Function(pybind11::module& m, const std::string& grid_id)
+auto bind_Spe10Model1Function_2D(pybind11::module& m, const std::string& grid_id)
 {
   namespace py = pybind11;
   using namespace pybind11::literals;
@@ -117,6 +108,13 @@ bind_Spe10Model1Function(pybind11::module& m, const std::string& grid_id)
   return c;
 } // ... bind_Spe10Model1Function(...)
 
+template <class G, size_t d, size_t r, size_t rC>
+void bind_Spe10Model1Function(pybind11::module& m, const std::string& grid_id)
+{
+  if constexpr (Grid::is_grid<G>::value && d == 2) {
+    bind_Spe10Model1Function_2D<G, d, r, rC>(m, grid_id);
+  }
+}
 
 } // namespace Functions
 } // namespace XT
diff --git a/python/dune/xt/grid/walker/apply-on.bindings.hh b/python/dune/xt/grid/walker/apply-on.bindings.hh
index 1b4f726cb1c86cd4af4914534d684479fdc3eb6f..c067b7a5d146ffd8253e6160ac1d71114cd0c70c 100644
--- a/python/dune/xt/grid/walker/apply-on.bindings.hh
+++ b/python/dune/xt/grid/walker/apply-on.bindings.hh
@@ -51,27 +51,11 @@ private:
            + XT::Grid::bindings::grid_name<G>::value();
   }
 
-  template <bool with_bi = ctor_expects_boundary_info, bool anything = true>
-  struct addbind // with_bi = false
+  static void addbind(pybind11::module& m, bound_type& c, const std::string& class_name, const std::string& layer_name)
   {
-    void operator()(pybind11::module& m, bound_type& c, const std::string& class_name, const std::string& layer_name)
-    {
-      c.def(pybind11::init<>());
-
-
-      m.def(makename(class_name, layer_name).c_str(), []() { return new type(); });
-    }
-  };
-
-  template <bool anything>
-  struct addbind<true, anything>
-  {
-    void operator()(pybind11::module& m, bound_type& c, const std::string& class_name, const std::string& layer_name)
-    {
+    if constexpr (ctor_expects_boundary_info) {
       using namespace pybind11::literals;
-
       c.def(pybind11::init<const BoundaryInfoType&, XT::Grid::BoundaryType*&&>());
-
       m.def(
           makename(class_name, layer_name).c_str(),
           [](const BoundaryInfoType& boundary_info, XT::Grid::BoundaryType*&& boundary_type) {
@@ -79,8 +63,11 @@ private:
           },
           "boundary_info"_a,
           "boundary_type"_a);
+    } else {
+      c.def(pybind11::init<>());
+      m.def(makename(class_name, layer_name).c_str(), []() { return new type(); });
     }
-  };
+  }
 
 public:
   static bound_type bind(pybind11::module& m, const std::string& class_name, const std::string& layer_name)
@@ -99,7 +86,7 @@ public:
     // bind class
     const auto ClassName = Common::to_camel_case("apply_on_" + class_name + "_" + grid_name + "_" + layer_name);
     bound_type c(m, ClassName.c_str(), ClassName.c_str());
-    addbind<>()(m, c, class_name, layer_name);
+    addbind(m, c, class_name, layer_name);
 
     return c;
   } // ... bind(...)
diff --git a/python/dune/xt/la/container/container-interface.hh b/python/dune/xt/la/container/container-interface.hh
index 101923a7f7502bdf3c7b1c877f939f8ec80654bc..939508999be818ebe0891709a01ef38acc978bbb 100644
--- a/python/dune/xt/la/container/container-interface.hh
+++ b/python/dune/xt/la/container/container-interface.hh
@@ -45,8 +45,9 @@ pybind11::enum_<Backends> bind_Backends(pybind11::module& m)
 
 
 template <class C>
-typename std::enable_if<is_container<C>::value, void>::type addbind_ContainerInterface(pybind11::class_<C>& c)
+void addbind_ContainerInterface(pybind11::class_<C>& c)
 {
+  static_assert(is_container<C>::value);
   namespace py = pybind11;
   using namespace pybind11::literals;
 
@@ -76,70 +77,36 @@ typename std::enable_if<is_container<C>::value, void>::type addbind_ContainerInt
 
 } // ... addbind_ContainerInterface(...)
 
-
-template <class C>
-typename std::enable_if<provides_backend<C>::value, void>::type addbind_ProvidesBackend(pybind11::class_<C>& c)
-{
-  namespace py = pybind11;
-
-  c.def_property_readonly_static("backend_type", [](py::object /*self*/) { return C::backend_type; });
-}
-
-template <class C>
-typename std::enable_if<!provides_backend<C>::value, void>::type addbind_ProvidesBackend(pybind11::class_<C>& /*c*/)
-{}
-
-
-/**
- * \brief Allows the resulting container to be convertible into a NumPy array as in `np.array(c, copy = False)`.
- */
-template <class C>
-typename std::enable_if<provides_data_access<C>::value && is_vector<C>::value, pybind11::class_<C>>::type
-bind_ProvidesDataAccess(pybind11::module& m, const std::string& class_id, const std::string& help_id)
-{
-  namespace py = pybind11;
-  typedef typename C::DataType D;
-
-  py::class_<C> c(m, class_id.c_str(), help_id.c_str(), py::buffer_protocol());
-
-  c.def_buffer([](C& vec) -> py::buffer_info {
-    return py::buffer_info(
-        vec.data(), sizeof(D), py::format_descriptor<D>::format(), 1, {vec.data_size()}, {sizeof(D)});
-  });
-
-  return c;
-}
-
-template <class C>
-typename std::enable_if<provides_data_access<C>::value && is_matrix<C>::value, pybind11::class_<C>>::type
-bind_ProvidesDataAccess(pybind11::module& m, const std::string& class_id, const std::string& help_id)
-{
-  namespace py = pybind11;
-  typedef typename C::DataType D;
-
-  py::class_<C> c(m, class_id.c_str(), help_id.c_str(), py::buffer_protocol());
-
-  c.def_buffer([](C& mat) -> py::buffer_info {
-    return py::buffer_info(mat.data(), /* Pointer to buffer */
-                           sizeof(D), /* Size of one scalar */
-                           py::format_descriptor<D>::format(), /* Python struct-style format descriptor */
-                           2, /* Number of dimensions */
-                           {mat.rows(), mat.cols()}, /* Buffer dimensions */
-                           {sizeof(D) * mat.cols(), /* Strides (in bytes) for each index */
-                            sizeof(D)});
-  });
-  return c;
-}
-
 template <class C>
-typename std::enable_if<!provides_data_access<C>::value, pybind11::class_<C>>::type
-bind_ProvidesDataAccess(pybind11::module& m, const std::string& class_id, const std::string& help_id)
+auto bind_ProvidesDataAccess(pybind11::module& m, const std::string& class_id, const std::string& help_id)
 {
   namespace py = pybind11;
-  return py::class_<C>(m, class_id.c_str(), help_id.c_str());
+  if constexpr (!provides_data_access<C>::value) {
+    return py::class_<C>(m, class_id.c_str(), help_id.c_str());
+  } else {
+    namespace py = pybind11;
+    typedef typename C::DataType D;
+    py::class_<C> c(m, class_id.c_str(), help_id.c_str(), py::buffer_protocol());
+    if constexpr (is_vector<C>::value) {
+      c.def_buffer([](C& vec) -> py::buffer_info {
+        return py::buffer_info(
+            vec.data(), sizeof(D), py::format_descriptor<D>::format(), 1, {vec.data_size()}, {sizeof(D)});
+      });
+    } else if constexpr (is_matrix<C>::value) {
+      c.def_buffer([](C& mat) -> py::buffer_info {
+        return py::buffer_info(mat.data(), /* Pointer to buffer */
+                               sizeof(D), /* Size of one scalar */
+                               py::format_descriptor<D>::format(), /* Python struct-style format descriptor */
+                               2, /* Number of dimensions */
+                               {mat.rows(), mat.cols()}, /* Buffer dimensions */
+                               {sizeof(D) * mat.cols(), /* Strides (in bytes) for each index */
+                                sizeof(D)});
+      });
+    }
+    return c;
+  }
 }
 
-
 } // namespace LA
 } // namespace XT
 } // namespace Dune
diff --git a/python/dune/xt/la/container/matrix-interface.hh b/python/dune/xt/la/container/matrix-interface.hh
index 2d46027c381e5c1d607160d4a536755a8b537ae9..a4350f5b4f84b1f916d628f73cd8c28acad55393 100644
--- a/python/dune/xt/la/container/matrix-interface.hh
+++ b/python/dune/xt/la/container/matrix-interface.hh
@@ -85,8 +85,9 @@ void print_row_sparsely(const M& self, const size_t row, std::stringstream& ss)
 
 
 template <class C, bool sparse>
-typename std::enable_if<is_matrix<C>::value, pybind11::class_<C>>::type bind_Matrix(pybind11::module& m)
+auto bind_Matrix(pybind11::module& m)
 {
+  static_assert(is_matrix<C>::value);
   namespace py = pybind11;
   using namespace pybind11::literals;
 
diff --git a/python/dune/xt/la/container/vector-interface.hh b/python/dune/xt/la/container/vector-interface.hh
index 144544439b69e61fc079d4e519c5d5ec0fb5a7c2..31515dc6b52f59004d4dee4c076be52097e30515 100644
--- a/python/dune/xt/la/container/vector-interface.hh
+++ b/python/dune/xt/la/container/vector-interface.hh
@@ -34,8 +34,9 @@ namespace LA {
 
 
 template <class C>
-typename std::enable_if<is_vector<C>::value, pybind11::class_<C>>::type bind_Vector(pybind11::module& m)
+auto bind_Vector(pybind11::module& m)
 {
+  static_assert(is_vector<C>::value);
   namespace py = pybind11;
   using namespace pybind11::literals;
 
diff --git a/python/dune/xt/la/solver.hh b/python/dune/xt/la/solver.hh
index dd46e1a4d39a346f0b59519a5371e2c45ac10699..1cdbf7a340da8c1aee4afa016221a9bc975caef9 100644
--- a/python/dune/xt/la/solver.hh
+++ b/python/dune/xt/la/solver.hh
@@ -28,8 +28,9 @@ namespace LA {
 
 
 template <class M, class V = typename Container<typename M::ScalarType, M::vector_type>::VectorType>
-typename std::enable_if<is_matrix<M>::value, pybind11::class_<Solver<M>>>::type bind_Solver(pybind11::module& m)
+auto bind_Solver(pybind11::module& m)
 {
+  static_assert(is_matrix<M>::value);
   typedef Solver<M> C;
 
   namespace py = pybind11;