From f46998907aa5948b334da7e5f136374f3f47899a Mon Sep 17 00:00:00 2001
From: Felix Schindler <felix.schindler@wwu.de>
Date: Thu, 12 Jan 2017 16:02:18 +0100
Subject: [PATCH] [pybind11] add state of
 https://github.com/pybind/pybind11.git v2.0.1

---
 pybind11/CMakeLists.txt                       | 169 ++---
 pybind11/MANIFEST.in                          |   1 +
 pybind11/README.md                            |  31 +-
 pybind11/docs/advanced/cast/chrono.rst        |   2 +-
 pybind11/docs/advanced/cast/custom.rst        |   6 +
 pybind11/docs/advanced/cast/index.rst         | 151 +----
 pybind11/docs/advanced/cast/overview.rst      | 146 +++++
 pybind11/docs/advanced/classes.rst            |  21 +-
 pybind11/docs/advanced/functions.rst          | 118 +++-
 pybind11/docs/advanced/misc.rst               |  42 ++
 pybind11/docs/advanced/pycpp/numpy.rst        |  13 +-
 pybind11/docs/advanced/smart_ptrs.rst         |  17 +-
 pybind11/docs/basics.rst                      |  14 +-
 pybind11/docs/changelog.rst                   | 354 ++++++++--
 pybind11/docs/classes.rst                     |  45 +-
 pybind11/docs/compiling.rst                   | 146 ++++-
 pybind11/docs/conf.py                         |   6 +-
 pybind11/docs/faq.rst                         |  11 +
 pybind11/docs/intro.rst                       |  18 +-
 pybind11/docs/release.rst                     |   6 +-
 pybind11/include/pybind11/attr.h              |  58 +-
 pybind11/include/pybind11/cast.h              | 538 ++++++++++------
 pybind11/include/pybind11/common.h            | 191 ++++--
 pybind11/include/pybind11/descr.h             |  30 +-
 pybind11/include/pybind11/eigen.h             |  36 +-
 pybind11/include/pybind11/eval.h              |  27 +-
 pybind11/include/pybind11/functional.h        |  50 +-
 pybind11/include/pybind11/numpy.h             | 464 +++++++++-----
 pybind11/include/pybind11/options.h           |  65 ++
 pybind11/include/pybind11/pybind11.h          | 603 ++++++++++++------
 pybind11/include/pybind11/pytypes.h           | 323 ++++++----
 pybind11/include/pybind11/stl.h               | 141 +++-
 pybind11/include/pybind11/stl_bind.h          | 282 ++++----
 pybind11/pybind11/__init__.py                 |   2 +-
 pybind11/pybind11/_version.py                 |   2 +-
 pybind11/setup.cfg                            |   8 +
 pybind11/setup.py                             |  34 +-
 pybind11/tests/CMakeLists.txt                 |  75 ++-
 pybind11/tests/conftest.py                    |  19 +-
 pybind11/tests/constructor_stats.h            |  37 +-
 pybind11/tests/pybind11_tests.cpp             |   2 +-
 pybind11/tests/test_alias_initialization.py   |  34 +-
 pybind11/tests/test_buffers.cpp               |   2 +-
 pybind11/tests/test_buffers.py                |  53 +-
 pybind11/tests/test_chrono.py                 |   2 +-
 pybind11/tests/test_class_args.py             |   5 +-
 .../installed_function/CMakeLists.txt         |  12 +
 .../installed_target/CMakeLists.txt           |  18 +
 pybind11/tests/test_cmake_build/main.cpp      |  10 +
 .../subdirectory_function/CMakeLists.txt      |   8 +
 .../subdirectory_target/CMakeLists.txt        |  15 +
 pybind11/tests/test_cmake_build/test.py       |   5 +
 .../tests/test_constants_and_functions.cpp    |  49 ++
 .../tests/test_constants_and_functions.py     |  22 +
 pybind11/tests/test_copy_move_policies.cpp    |   4 +-
 pybind11/tests/test_copy_move_policies.py     |   2 -
 pybind11/tests/test_docstring_options.cpp     |  53 ++
 pybind11/tests/test_docstring_options.py      |  32 +
 pybind11/tests/test_eigen.cpp                 |   2 +-
 pybind11/tests/test_eigen.py                  |  20 +-
 pybind11/tests/test_enum.cpp                  |  19 +-
 pybind11/tests/test_enum.py                   |  45 ++
 pybind11/tests/test_eval_call.py              |   2 +-
 pybind11/tests/test_exceptions.cpp            |   2 +-
 pybind11/tests/test_exceptions.py             |   2 +-
 pybind11/tests/test_inheritance.cpp           |  14 +
 pybind11/tests/test_inheritance.py            |   8 +
 pybind11/tests/test_issues.cpp                | 160 ++++-
 pybind11/tests/test_issues.py                 | 107 +++-
 pybind11/tests/test_keep_alive.py             |  27 +-
 .../tests/test_methods_and_attributes.cpp     |  67 +-
 pybind11/tests/test_methods_and_attributes.py |  55 ++
 pybind11/tests/test_multiple_inheritance.cpp  |   1 -
 pybind11/tests/test_multiple_inheritance.py   |  11 +-
 pybind11/tests/test_numpy_array.cpp           |  46 +-
 pybind11/tests/test_numpy_array.py            | 132 ++--
 pybind11/tests/test_numpy_dtypes.cpp          |  52 +-
 pybind11/tests/test_numpy_dtypes.py           |  89 ++-
 pybind11/tests/test_numpy_vectorize.py        |   4 +-
 pybind11/tests/test_opaque_types.py           |   2 +-
 pybind11/tests/test_operator_overloading.py   |   1 -
 pybind11/tests/test_pickling.cpp              |   4 +-
 pybind11/tests/test_pickling.py               |   3 +
 pybind11/tests/test_python_types.cpp          | 163 ++++-
 pybind11/tests/test_python_types.py           | 123 +++-
 .../tests/test_sequences_and_iterators.cpp    |   3 +-
 .../tests/test_sequences_and_iterators.py     |   2 +-
 pybind11/tests/test_smart_ptr.cpp             |  75 ++-
 pybind11/tests/test_smart_ptr.py              |  82 ++-
 pybind11/tests/test_stl_binders.cpp           |  53 ++
 pybind11/tests/test_stl_binders.py            |  77 ++-
 pybind11/tests/test_virtual_functions.py      |  70 +-
 pybind11/tools/FindPythonLibsNew.cmake        |  24 +-
 pybind11/tools/check-style.sh                 |  35 +-
 pybind11/tools/libsize.py                     |  38 ++
 pybind11/tools/pybind11Config.cmake.in        |  92 +++
 pybind11/tools/pybind11Tools.cmake            | 163 +++++
 97 files changed, 4888 insertions(+), 1617 deletions(-)
 create mode 100644 pybind11/docs/advanced/cast/overview.rst
 create mode 100644 pybind11/include/pybind11/options.h
 create mode 100644 pybind11/tests/test_cmake_build/installed_function/CMakeLists.txt
 create mode 100644 pybind11/tests/test_cmake_build/installed_target/CMakeLists.txt
 create mode 100644 pybind11/tests/test_cmake_build/main.cpp
 create mode 100644 pybind11/tests/test_cmake_build/subdirectory_function/CMakeLists.txt
 create mode 100644 pybind11/tests/test_cmake_build/subdirectory_target/CMakeLists.txt
 create mode 100644 pybind11/tests/test_cmake_build/test.py
 create mode 100644 pybind11/tests/test_docstring_options.cpp
 create mode 100644 pybind11/tests/test_docstring_options.py
 create mode 100644 pybind11/tools/libsize.py
 create mode 100644 pybind11/tools/pybind11Config.cmake.in
 create mode 100644 pybind11/tools/pybind11Tools.cmake

diff --git a/pybind11/CMakeLists.txt b/pybind11/CMakeLists.txt
index 48d3cc7e8..341f845e4 100644
--- a/pybind11/CMakeLists.txt
+++ b/pybind11/CMakeLists.txt
@@ -7,6 +7,11 @@
 
 cmake_minimum_required(VERSION 2.8.12)
 
+if (POLICY CMP0048)
+  # cmake warns if loaded from a min-3.0-required parent dir, so silence the warning:
+  cmake_policy(SET CMP0048 NEW)
+endif()
+
 project(pybind11)
 
 # Check if pybind11 is being used directly or via add_subdirectory
@@ -19,30 +24,9 @@ option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJE
 option(PYBIND11_TEST    "Build pybind11 test suite?"     ${PYBIND11_MASTER_PROJECT})
 option(PYBIND11_WERROR  "Report all warnings as errors"  OFF)
 
-# Add a CMake parameter for choosing a desired Python version
-set(PYBIND11_PYTHON_VERSION "" CACHE STRING "Python version to use for compiling modules")
-
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/tools")
-set(Python_ADDITIONAL_VERSIONS 3.4 3.5 3.6 3.7)
-find_package(PythonLibsNew ${PYBIND11_PYTHON_VERSION} REQUIRED)
-
-include(CheckCXXCompilerFlag)
 
-if(NOT MSVC AND NOT PYBIND11_CPP_STANDARD)
-  check_cxx_compiler_flag("-std=c++14" HAS_CPP14_FLAG)
-  check_cxx_compiler_flag("-std=c++11" HAS_CPP11_FLAG)
-
-  if (HAS_CPP14_FLAG)
-    set(PYBIND11_CPP_STANDARD -std=c++14)
-  elseif (HAS_CPP11_FLAG)
-    set(PYBIND11_CPP_STANDARD -std=c++11)
-  else()
-    message(FATAL_ERROR "Unsupported compiler -- pybind11 requires C++11 support!")
-  endif()
-
-  set(PYBIND11_CPP_STANDARD ${PYBIND11_CPP_STANDARD} CACHE STRING
-      "C++ standard flag, e.g. -std=c++11 or -std=c++14. Defaults to latest available." FORCE)
-endif()
+include(pybind11Tools)
 
 # Cache variables so pybind11_add_module can be used in parent projects
 set(PYBIND11_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}/include" CACHE INTERNAL "")
@@ -51,91 +35,6 @@ set(PYTHON_LIBRARIES ${PYTHON_LIBRARIES} CACHE INTERNAL "")
 set(PYTHON_MODULE_PREFIX ${PYTHON_MODULE_PREFIX} CACHE INTERNAL "")
 set(PYTHON_MODULE_EXTENSION ${PYTHON_MODULE_EXTENSION} CACHE INTERNAL "")
 
-# Build a Python extension module:
-# pybind11_add_module(<name> source1 [source2 ...])
-#
-function(pybind11_add_module target_name)
-  add_library(${target_name} MODULE ${ARGN})
-  target_include_directories(${target_name}
-    PRIVATE ${PYBIND11_INCLUDE_DIR}
-    PRIVATE ${PYTHON_INCLUDE_DIRS})
-
-  # The prefix and extension are provided by FindPythonLibsNew.cmake
-  set_target_properties(${target_name} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}")
-  set_target_properties(${target_name} PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}")
-
-  if(WIN32 OR CYGWIN)
-    # Link against the Python shared library on Windows
-    target_link_libraries(${target_name} PRIVATE ${PYTHON_LIBRARIES})
-  elseif(APPLE)
-    # It's quite common to have multiple copies of the same Python version
-    # installed on one's system. E.g.: one copy from the OS and another copy
-    # that's statically linked into an application like Blender or Maya.
-    # If we link our plugin library against the OS Python here and import it
-    # into Blender or Maya later on, this will cause segfaults when multiple
-    # conflicting Python instances are active at the same time (even when they
-    # are of the same version).
-
-    # Windows is not affected by this issue since it handles DLL imports
-    # differently. The solution for Linux and Mac OS is simple: we just don't
-    # link against the Python library. The resulting shared library will have
-    # missing symbols, but that's perfectly fine -- they will be resolved at
-    # import time.
-
-    target_link_libraries(${target_name} PRIVATE "-undefined dynamic_lookup")
-  endif()
-
-  if(NOT MSVC)
-    # Make sure C++11/14 are enabled
-    target_compile_options(${target_name} PUBLIC ${PYBIND11_CPP_STANDARD})
-
-    # Enable link time optimization and set the default symbol
-    # visibility to hidden (very important to obtain small binaries)
-    string(TOUPPER "${CMAKE_BUILD_TYPE}" U_CMAKE_BUILD_TYPE)
-    if (NOT ${U_CMAKE_BUILD_TYPE} MATCHES DEBUG)
-      # Check for Link Time Optimization support (GCC/Clang)
-      check_cxx_compiler_flag("-flto" HAS_LTO_FLAG)
-      if(HAS_LTO_FLAG AND NOT CYGWIN)
-        target_compile_options(${target_name} PRIVATE -flto)
-      endif()
-
-      # Intel equivalent to LTO is called IPO
-      if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
-        check_cxx_compiler_flag("-ipo" HAS_IPO_FLAG)
-        if(HAS_IPO_FLAG)
-          target_compile_options(${target_name} PRIVATE -ipo)
-        endif()
-      endif()
-
-      # Default symbol visibility
-      target_compile_options(${target_name} PRIVATE "-fvisibility=hidden")
-
-      # Strip unnecessary sections of the binary on Linux/Mac OS
-      if(CMAKE_STRIP)
-        if(APPLE)
-          add_custom_command(TARGET ${target_name} POST_BUILD
-                             COMMAND ${CMAKE_STRIP} -u -r $<TARGET_FILE:${target_name}>)
-        else()
-          add_custom_command(TARGET ${target_name} POST_BUILD
-                             COMMAND ${CMAKE_STRIP} $<TARGET_FILE:${target_name}>)
-        endif()
-      endif()
-    endif()
-  elseif(MSVC)
-    # /MP enables multithreaded builds (relevant when there are many files), /bigobj is
-    # needed for bigger binding projects due to the limit to 64k addressable sections
-    target_compile_options(${target_name} PRIVATE /MP /bigobj)
-
-    # Enforce link time code generation on MSVC, except in debug mode
-    target_compile_options(${target_name} PRIVATE $<$<NOT:$<CONFIG:Debug>>:/GL>)
-
-    # Fancy generator expressions don't work with linker flags, for reasons unknown
-    set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS_RELEASE /LTCG)
-    set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS_MINSIZEREL /LTCG)
-    set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS_RELWITHDEBINFO /LTCG)
-  endif()
-endfunction()
-
 # Compile with compiler warnings turned on
 function(pybind11_enable_warnings target_name)
   if(MSVC)
@@ -160,6 +59,7 @@ set(PYBIND11_HEADERS
   include/pybind11/common.h
   include/pybind11/complex.h
   include/pybind11/descr.h
+  include/pybind11/options.h
   include/pybind11/eigen.h
   include/pybind11/eval.h
   include/pybind11/functional.h
@@ -178,6 +78,59 @@ if (PYBIND11_TEST)
   add_subdirectory(tests)
 endif()
 
+include(GNUInstallDirs)
+include(CMakePackageConfigHelpers)
+
+# extract project version from source
+file(STRINGS "${PYBIND11_INCLUDE_DIR}/pybind11/common.h" pybind11_version_defines
+     REGEX "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) ")
+foreach(ver ${pybind11_version_defines})
+  if (ver MATCHES "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) +([^ ]+)$")
+    set(PYBIND11_VERSION_${CMAKE_MATCH_1} "${CMAKE_MATCH_2}" CACHE INTERNAL "")
+  endif()
+endforeach()
+set(${PROJECT_NAME}_VERSION ${PYBIND11_VERSION_MAJOR}.${PYBIND11_VERSION_MINOR}.${PYBIND11_VERSION_PATCH})
+message(STATUS "pybind11 v${${PROJECT_NAME}_VERSION}")
+
+if(NOT (CMAKE_VERSION VERSION_LESS 3.0))  # CMake >= 3.0
+  # Build an interface library target:
+  add_library(module INTERFACE)
+  target_include_directories(module INTERFACE $<BUILD_INTERFACE:${PYBIND11_INCLUDE_DIR}>
+                                              $<BUILD_INTERFACE:${PYTHON_INCLUDE_DIRS}>
+                                              $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+  if(WIN32 OR CYGWIN)
+    target_link_libraries(module INTERFACE $<BUILD_INTERFACE:${PYTHON_LIBRARIES}>)
+  elseif(APPLE)
+    target_link_libraries(module INTERFACE "-undefined dynamic_lookup")
+  endif()
+  target_compile_options(module INTERFACE $<BUILD_INTERFACE:${PYBIND11_CPP_STANDARD}>)
+
+  add_library(pybind11::module ALIAS module)  # to match exported target
+endif()
+
 if (PYBIND11_INSTALL)
-  install(FILES ${PYBIND11_HEADERS} DESTINATION include/pybind11)
+  install(FILES ${PYBIND11_HEADERS}
+          DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/pybind11)
+  # GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
+  set(PYBIND11_CMAKECONFIG_INSTALL_DIR "share/cmake/${PROJECT_NAME}" CACHE STRING "install path for pybind11Config.cmake")
+
+  configure_package_config_file(tools/${PROJECT_NAME}Config.cmake.in
+                                "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
+                                INSTALL_DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
+  write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
+                                   VERSION ${${PROJECT_NAME}_VERSION}
+                                   COMPATIBILITY AnyNewerVersion)
+  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
+                ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
+                tools/FindPythonLibsNew.cmake
+                tools/pybind11Tools.cmake
+          DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
+
+  if(NOT (CMAKE_VERSION VERSION_LESS 3.0))
+    install(TARGETS module
+            EXPORT "${PROJECT_NAME}Targets")
+    install(EXPORT "${PROJECT_NAME}Targets"
+            NAMESPACE "${PROJECT_NAME}::"
+            DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
+  endif()
 endif()
diff --git a/pybind11/MANIFEST.in b/pybind11/MANIFEST.in
index 33c22c84c..aa51d0110 100644
--- a/pybind11/MANIFEST.in
+++ b/pybind11/MANIFEST.in
@@ -1 +1,2 @@
 include include/pybind11/*.h
+include LICENSE README.md CONTRIBUTING.md
diff --git a/pybind11/README.md b/pybind11/README.md
index 9d4827a3e..60ac871d0 100644
--- a/pybind11/README.md
+++ b/pybind11/README.md
@@ -2,7 +2,9 @@
 
 # pybind11 — Seamless operability between C++11 and Python
 
-[![Documentation Status](https://readthedocs.org/projects/pybind11/badge/?version=latest)](http://pybind11.readthedocs.org/en/latest/?badge=latest)
+[![Documentation Status](https://readthedocs.org/projects/pybind11/badge/?version=master)](http://pybind11.readthedocs.org/en/master/?badge=master)
+[![Documentation Status](https://readthedocs.org/projects/pybind11/badge/?version=stable)](http://pybind11.readthedocs.org/en/stable/?badge=stable)
+[![Gitter chat](https://img.shields.io/gitter/room/gitterHQ/gitter.svg)](https://gitter.im/pybind/Lobby)
 [![Build Status](https://travis-ci.org/pybind/pybind11.svg?branch=master)](https://travis-ci.org/pybind/pybind11)
 [![Build status](https://ci.appveyor.com/api/projects/status/riaj54pn4h08xy40?svg=true)](https://ci.appveyor.com/project/wjakob/pybind11)
 
@@ -23,17 +25,17 @@ become an excessively large and unnecessary dependency.
 
 Think of this library as a tiny self-contained version of Boost.Python with
 everything stripped away that isn't relevant for binding generation. Without
-comments, the core header files only require ~2.5K lines of code and depend on
-Python (2.7 or 3.x) and the C++ standard library. This compact implementation
-was possible thanks to some of the new C++11 language features (specifically:
-tuples, lambda functions and variadic templates). Since its creation, this
-library has grown beyond Boost.Python in many ways, leading to dramatically
-simpler binding code in many common situations.
+comments, the core header files only require ~4K lines of code and depend on
+Python (2.7 or 3.x, or PyPy2.7 >= 5.7) and the C++ standard library. This
+compact implementation was possible thanks to some of the new C++11 language
+features (specifically: tuples, lambda functions and variadic templates). Since
+its creation, this library has grown beyond Boost.Python in many ways, leading
+to dramatically simpler binding code in many common situations.
 
 Tutorial and reference documentation is provided at
-[http://pybind11.readthedocs.org/en/latest](http://pybind11.readthedocs.org/en/latest).
+[http://pybind11.readthedocs.org/en/master](http://pybind11.readthedocs.org/en/master).
 A PDF version of the manual is available
-[here](https://media.readthedocs.org/pdf/pybind11/latest/pybind11.pdf).
+[here](https://media.readthedocs.org/pdf/pybind11/master/pybind11.pdf).
 
 ## Core features
 pybind11 can map the following core C++ features to Python
@@ -57,12 +59,15 @@ pybind11 can map the following core C++ features to Python
 ## Goodies
 In addition to the core functionality, pybind11 provides some extra goodies:
 
-- pybind11 uses C++11 move constructors and move assignment operators whenever
-  possible to efficiently transfer custom data types.
+- Python 2.7, 3.x, and PyPy (PyPy2.7 >= 5.7) are supported with an
+  implementation-agnostic interface.
 
 - It is possible to bind C++11 lambda functions with captured variables. The
   lambda capture data is stored inside the resulting Python function object.
 
+- pybind11 uses C++11 move constructors and move assignment operators whenever
+  possible to efficiently transfer custom data types.
+
 - It's easy to expose the internal storage of custom data types through
   Pythons' buffer protocols. This is handy e.g. for fast conversion between
   C++ matrix classes like Eigen and NumPy without expensive copy operations.
@@ -92,14 +97,14 @@ In addition to the core functionality, pybind11 provides some extra goodies:
 ## Supported compilers
 
 1. Clang/LLVM (any non-ancient version with C++11 support)
-2. GCC (any non-ancient version with C++11 support)
+2. GCC 4.8 or newer
 3. Microsoft Visual Studio 2015 or newer
 4. Intel C++ compiler 16 or newer (15 with a [workaround](https://github.com/pybind/pybind11/issues/276))
 5. Cygwin/GCC (tested on 2.5.1)
 
 ## About
 
-This project was created by [Wenzel Jakob](https://www.mitsuba-renderer.org/~wenzel/).
+This project was created by [Wenzel Jakob](http://rgl.epfl.ch/people/wjakob).
 Significant features and/or improvements to the code were contributed by
 Jonas Adler,
 Sylvain Corlay,
diff --git a/pybind11/docs/advanced/cast/chrono.rst b/pybind11/docs/advanced/cast/chrono.rst
index 1213e2cf0..6d4a5ee55 100644
--- a/pybind11/docs/advanced/cast/chrono.rst
+++ b/pybind11/docs/advanced/cast/chrono.rst
@@ -39,7 +39,7 @@ depending on the system.
 If it is a typedef of the system clock, python will get datetime objects, but if
 it is a different clock they will be timedelta objects.
 
-Conversions Provided
+Provided conversions
 --------------------
 
 .. rubric:: C++ to Python
diff --git a/pybind11/docs/advanced/cast/custom.rst b/pybind11/docs/advanced/cast/custom.rst
index 50b07db51..c854e7fcd 100644
--- a/pybind11/docs/advanced/cast/custom.rst
+++ b/pybind11/docs/advanced/cast/custom.rst
@@ -77,3 +77,9 @@ type is explicitly allowed.
             }
         };
     }} // namespace pybind11::detail
+
+.. warning::
+
+    When using custom type casters, it's important to declare them consistently
+    in every compilation unit of the Python extension module. Otherwise,
+    undefined behavior can ensue.
diff --git a/pybind11/docs/advanced/cast/index.rst b/pybind11/docs/advanced/cast/index.rst
index a0e4d5666..36586af5c 100644
--- a/pybind11/docs/advanced/cast/index.rst
+++ b/pybind11/docs/advanced/cast/index.rst
@@ -1,148 +1,41 @@
 Type conversions
 ################
 
-There are 3 mechanisms that pybind11 uses to move data between C++ and Python.
-We'll take a quick look at each one to get an overview of what's happening.
+Apart from enabling cross-language function calls, a fundamental problem
+that a binding tool like pybind11 must address is to provide access to
+native Python types in C++ and vice versa. There are three fundamentally
+different ways to do this—which approach is preferable for a particular type
+depends on the situation at hand.
 
-.. rubric:: 1. Native type in C++, wrapper in Python
+1. Use a native C++ type everywhere. In this case, the type must be wrapped
+   using pybind11-generated bindings so that Python can interact with it.
 
-Exposing a custom C++ type using :class:`py::class_` was covered in detail in
-the :doc:`/classes` section. There, the underlying data structure is always the
-original C++ class while the :class:`py::class_` wrapper provides a Python
-interface. Internally, when an object like this is sent from C++ to Python,
-pybind11 will just add the outer wrapper layer over the native C++ object.
-Getting it back from Python is just a matter of peeling off the wrapper.
+2. Use a native Python type everywhere. It will need to be wrapped so that
+   C++ functions can interact with it.
 
-.. rubric:: 2. Wrapper in C++, native type in Python
+3. Use a native C++ type on the C++ side and a native Python type on the
+   Python side. pybind11 refers to this as a *type conversion*.
 
-This is the exact opposite situation. Now, we have a type which is native to
-Python, like a ``tuple`` or a ``list``. One way to get this data into C++ is
-with the :class:`py::object` family of wrappers. These are explained in more
-detail in the :doc:`/advanced/pycpp/object` section. We'll just give a quick
-example here:
+   Type conversions are the most "natural" option in the sense that native
+   (non-wrapped) types are used everywhere. The main downside is that a copy
+   of the data must be made on every Python ↔ C++ transition: this is
+   needed since the C++ and Python versions of the same type generally won't
+   have the same memory layout.
 
-.. code-block:: cpp
+   pybind11 can perform many kinds of conversions automatically. An overview
+   is provided in the table ":ref:`conversion_table`".
 
-    void print_list(py::list my_list) {
-        for (auto item : my_list)
-            std::cout << item << " ";
-    }
-
-.. code-block:: pycon
-
-    >>> print_list([1, 2, 3])
-    1 2 3
-
-The Python ``list`` is not converted in any way -- it's just wrapped in a C++
-:class:`py::list` class. At its core it's still a Python object. Copying a
-:class:`py::list` will do the usual reference-counting like in Python.
-Returning the object to Python will just remove the thin wrapper.
-
-.. rubric:: 3. Converting between native C++ and Python types
-
-In the previous two cases we had a native type in one language and a wrapper in
-the other. Now, we have native types on both sides and we convert between them.
-
-.. code-block:: cpp
-
-    void print_vector(const std::vector<int> &v) {
-        for (auto item : v)
-            std::cout << item << "\n";
-    }
-
-.. code-block:: pycon
-
-    >>> print_vector([1, 2, 3])
-    1 2 3
-
-In this case, pybind11 will construct a new ``std::vector<int>`` and copy each
-element from the Python ``list``. The newly constructed object will be passed
-to ``print_vector``. The same thing happens in the other direction: a new
-``list`` is made to match the value returned from C++.
-
-Lots of these conversions are supported out of the box, as shown in the table
-below. They are very convenient, but keep in mind that these conversions are
-fundamentally based on copying data. This is perfectly fine for small immutable
-types but it may become quite expensive for large data structures. This can be
-avoided by overriding the automatic conversion with a custom wrapper (i.e. the
-above-mentioned approach 1). This requires some manual effort and more details
-are available in the :ref:`opaque` section.
-
-.. rubric:: Supported automatic conversions
+The following subsections discuss the differences between these options in more
+detail. The main focus in this section is on type conversions, which represent
+the last case of the above list.
 
 .. toctree::
    :maxdepth: 1
 
+   overview
    stl
    functional
    chrono
    eigen
    custom
 
-The following basic data types are supported out of the box (some may require
-an additional extension header to be included). To pass other data structures
-as arguments and return values, refer to the section on binding :ref:`classes`.
-
-+---------------------------------+--------------------------+-------------------------------+
-|  Data type                      |  Description             | Header file                   |
-+=================================+==========================+===============================+
-| ``int8_t``, ``uint8_t``         | 8-bit integers           | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``int16_t``, ``uint16_t``       | 16-bit integers          | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``int32_t``, ``uint32_t``       | 32-bit integers          | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``int64_t``, ``uint64_t``       | 64-bit integers          | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``ssize_t``, ``size_t``         | Platform-dependent size  | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``float``, ``double``           | Floating point types     | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``bool``                        | Two-state Boolean type   | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``char``                        | Character literal        | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``wchar_t``                     | Wide character literal   | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``const char *``                | UTF-8 string literal     | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``const wchar_t *``             | Wide string literal      | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::string``                 | STL dynamic UTF-8 string | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::wstring``                | STL dynamic wide string  | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::pair<T1, T2>``           | Pair of two custom types | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::tuple<...>``             | Arbitrary tuple of types | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::reference_wrapper<...>`` | Reference type wrapper   | :file:`pybind11/pybind11.h`   |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::complex<T>``             | Complex numbers          | :file:`pybind11/complex.h`    |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::array<T, Size>``         | STL static array         | :file:`pybind11/stl.h`        |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::vector<T>``              | STL dynamic array        | :file:`pybind11/stl.h`        |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::list<T>``                | STL linked list          | :file:`pybind11/stl.h`        |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::map<T1, T2>``            | STL ordered map          | :file:`pybind11/stl.h`        |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::unordered_map<T1, T2>``  | STL unordered map        | :file:`pybind11/stl.h`        |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::set<T>``                 | STL ordered set          | :file:`pybind11/stl.h`        |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::unordered_set<T>``       | STL unordered set        | :file:`pybind11/stl.h`        |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::function<...>``          | STL polymorphic function | :file:`pybind11/functional.h` |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::chrono::duration<...>``  | STL time duration        | :file:`pybind11/chrono.h`     |
-+---------------------------------+--------------------------+-------------------------------+
-| ``std::chrono::time_point<...>``| STL date/time            | :file:`pybind11/chrono.h`     |
-+---------------------------------+--------------------------+-------------------------------+
-| ``Eigen::Matrix<...>``          | Eigen: dense matrix      | :file:`pybind11/eigen.h`      |
-+---------------------------------+--------------------------+-------------------------------+
-| ``Eigen::Map<...>``             | Eigen: mapped memory     | :file:`pybind11/eigen.h`      |
-+---------------------------------+--------------------------+-------------------------------+
-| ``Eigen::SparseMatrix<...>``    | Eigen: sparse matrix     | :file:`pybind11/eigen.h`      |
-+---------------------------------+--------------------------+-------------------------------+
diff --git a/pybind11/docs/advanced/cast/overview.rst b/pybind11/docs/advanced/cast/overview.rst
new file mode 100644
index 000000000..ab37b90be
--- /dev/null
+++ b/pybind11/docs/advanced/cast/overview.rst
@@ -0,0 +1,146 @@
+Overview
+########
+
+.. rubric:: 1. Native type in C++, wrapper in Python
+
+Exposing a custom C++ type using :class:`py::class_` was covered in detail
+in the :doc:`/classes` section. There, the underlying data structure is
+always the original C++ class while the :class:`py::class_` wrapper provides
+a Python interface. Internally, when an object like this is sent from C++ to
+Python, pybind11 will just add the outer wrapper layer over the native C++
+object. Getting it back from Python is just a matter of peeling off the
+wrapper.
+
+.. rubric:: 2. Wrapper in C++, native type in Python
+
+This is the exact opposite situation. Now, we have a type which is native to
+Python, like a ``tuple`` or a ``list``. One way to get this data into C++ is
+with the :class:`py::object` family of wrappers. These are explained in more
+detail in the :doc:`/advanced/pycpp/object` section. We'll just give a quick
+example here:
+
+.. code-block:: cpp
+
+    void print_list(py::list my_list) {
+        for (auto item : my_list)
+            std::cout << item << " ";
+    }
+
+.. code-block:: pycon
+
+    >>> print_list([1, 2, 3])
+    1 2 3
+
+The Python ``list`` is not converted in any way -- it's just wrapped in a C++
+:class:`py::list` class. At its core it's still a Python object. Copying a
+:class:`py::list` will do the usual reference-counting like in Python.
+Returning the object to Python will just remove the thin wrapper.
+
+.. rubric:: 3. Converting between native C++ and Python types
+
+In the previous two cases we had a native type in one language and a wrapper in
+the other. Now, we have native types on both sides and we convert between them.
+
+.. code-block:: cpp
+
+    void print_vector(const std::vector<int> &v) {
+        for (auto item : v)
+            std::cout << item << "\n";
+    }
+
+.. code-block:: pycon
+
+    >>> print_vector([1, 2, 3])
+    1 2 3
+
+In this case, pybind11 will construct a new ``std::vector<int>`` and copy each
+element from the Python ``list``. The newly constructed object will be passed
+to ``print_vector``. The same thing happens in the other direction: a new
+``list`` is made to match the value returned from C++.
+
+Lots of these conversions are supported out of the box, as shown in the table
+below. They are very convenient, but keep in mind that these conversions are
+fundamentally based on copying data. This is perfectly fine for small immutable
+types but it may become quite expensive for large data structures. This can be
+avoided by overriding the automatic conversion with a custom wrapper (i.e. the
+above-mentioned approach 1). This requires some manual effort and more details
+are available in the :ref:`opaque` section.
+
+.. _conversion_table:
+
+List of all builtin conversions
+-------------------------------
+
+The following basic data types are supported out of the box (some may require
+an additional extension header to be included). To pass other data structures
+as arguments and return values, refer to the section on binding :ref:`classes`.
+
++------------------------------------+---------------------------+-------------------------------+
+|  Data type                         |  Description              | Header file                   |
++====================================+===========================+===============================+
+| ``int8_t``, ``uint8_t``            | 8-bit integers            | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``int16_t``, ``uint16_t``          | 16-bit integers           | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``int32_t``, ``uint32_t``          | 32-bit integers           | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``int64_t``, ``uint64_t``          | 64-bit integers           | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``ssize_t``, ``size_t``            | Platform-dependent size   | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``float``, ``double``              | Floating point types      | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``bool``                           | Two-state Boolean type    | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``char``                           | Character literal         | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``wchar_t``                        | Wide character literal    | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``const char *``                   | UTF-8 string literal      | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``const wchar_t *``                | Wide string literal       | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::string``                    | STL dynamic UTF-8 string  | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::wstring``                   | STL dynamic wide string   | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::pair<T1, T2>``              | Pair of two custom types  | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::tuple<...>``                | Arbitrary tuple of types  | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::reference_wrapper<...>``    | Reference type wrapper    | :file:`pybind11/pybind11.h`   |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::complex<T>``                | Complex numbers           | :file:`pybind11/complex.h`    |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::array<T, Size>``            | STL static array          | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::vector<T>``                 | STL dynamic array         | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::valarray<T>``               | STL value array           | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::list<T>``                   | STL linked list           | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::map<T1, T2>``               | STL ordered map           | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::unordered_map<T1, T2>``     | STL unordered map         | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::set<T>``                    | STL ordered set           | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::unordered_set<T>``          | STL unordered set         | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::optional<T>``               | STL optional type (C++17) | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::experimental::optional<T>`` | STL optional type (exp.)  | :file:`pybind11/stl.h`        |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::function<...>``             | STL polymorphic function  | :file:`pybind11/functional.h` |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::chrono::duration<...>``     | STL time duration         | :file:`pybind11/chrono.h`     |
++------------------------------------+---------------------------+-------------------------------+
+| ``std::chrono::time_point<...>``   | STL date/time             | :file:`pybind11/chrono.h`     |
++------------------------------------+---------------------------+-------------------------------+
+| ``Eigen::Matrix<...>``             | Eigen: dense matrix       | :file:`pybind11/eigen.h`      |
++------------------------------------+---------------------------+-------------------------------+
+| ``Eigen::Map<...>``                | Eigen: mapped memory      | :file:`pybind11/eigen.h`      |
++------------------------------------+---------------------------+-------------------------------+
+| ``Eigen::SparseMatrix<...>``       | Eigen: sparse matrix      | :file:`pybind11/eigen.h`      |
++------------------------------------+---------------------------+-------------------------------+
diff --git a/pybind11/docs/advanced/classes.rst b/pybind11/docs/advanced/classes.rst
index 4a423b578..e20895e6d 100644
--- a/pybind11/docs/advanced/classes.rst
+++ b/pybind11/docs/advanced/classes.rst
@@ -422,15 +422,24 @@ The section on :ref:`properties` discussed the creation of instance properties
 that are implemented in terms of C++ getters and setters.
 
 Static properties can also be created in a similar way to expose getters and
-setters of static class attributes. It is important to note that the implicit
-``self`` argument also exists in this case and is used to pass the Python
-``type`` subclass instance. This parameter will often not be needed by the C++
-side, and the following example illustrates how to instantiate a lambda getter
-function that ignores it:
+setters of static class attributes. Two things are important to note:
+
+1. Static properties are implemented by instrumenting the *metaclass* of the
+   class in question -- however, this requires the class to have a modifiable
+   metaclass in the first place. pybind11 provides a ``py::metaclass()``
+   annotation that must be specified in the ``class_`` constructor, or any
+   later method calls to ``def_{property_,∅}_{readwrite,readonly}_static`` will
+   fail (see the example below).
+
+2. For static properties defined in terms of setter and getter functions, note
+   that the implicit ``self`` argument also exists in this case and is used to
+   pass the Python ``type`` subclass instance. This parameter will often not be
+   needed by the C++ side, and the following example illustrates how to
+   instantiate a lambda getter function that ignores it:
 
 .. code-block:: cpp
 
-    py::class_<Foo>(m, "Foo")
+    py::class_<Foo>(m, "Foo", py::metaclass())
         .def_property_readonly_static("foo", [](py::object /* self */) { return Foo(); });
 
 Operator overloading
diff --git a/pybind11/docs/advanced/functions.rst b/pybind11/docs/advanced/functions.rst
index 5c697b12c..f291e8222 100644
--- a/pybind11/docs/advanced/functions.rst
+++ b/pybind11/docs/advanced/functions.rst
@@ -9,48 +9,62 @@ functions, i.e. *methods* in Python.
 Return value policies
 =====================
 
-Python and C++ use wildly different ways of managing the memory and lifetime of
-objects managed by them. This can lead to issues when creating bindings for
-functions that return a non-trivial type. Just by looking at the type
-information, it is not clear whether Python should take charge of the returned
-value and eventually free its resources, or if this is handled on the C++ side.
-For this reason, pybind11 provides a several `return value policy` annotations
-that can be passed to the :func:`module::def` and :func:`class_::def`
-functions. The default policy is :enum:`return_value_policy::automatic`.
-
-Return value policies can also be applied to properties, in which case the
-arguments must be passed through the :class:`cpp_function` constructor:
+Python and C++ use fundamentally different ways of managing the memory and
+lifetime of objects managed by them. This can lead to issues when creating
+bindings for functions that return a non-trivial type. Just by looking at the
+type information, it is not clear whether Python should take charge of the
+returned value and eventually free its resources, or if this is handled on the
+C++ side. For this reason, pybind11 provides a several `return value policy`
+annotations that can be passed to the :func:`module::def` and
+:func:`class_::def` functions. The default policy is
+:enum:`return_value_policy::automatic`.
+
+Return value policies are tricky, and it's very important to get them right.
+Just to illustrate what can go wrong, consider the following simple example:
 
 .. code-block:: cpp
 
-    class_<MyClass>(m, "MyClass")
-        def_property("data"
-            py::cpp_function(&MyClass::getData, py::return_value_policy::copy),
-            py::cpp_function(&MyClass::setData)
-        );
+    /* Function declaration */ 
+    Data *get_data() { return _data; /* (pointer to a static data structure) */ }
+    ...
+
+    /* Binding code */ 
+    m.def("get_data", &get_data); // <-- KABOOM, will cause crash when called from Python
+
+What's going on here? When ``get_data()`` is called from Python, the return
+value (a native C++ type) must be wrapped to turn it into a usable Python type.
+In this case, the default return value policy (:enum:`return_value_policy::automatic`)
+causes pybind11 to assume ownership of the static ``_data`` instance.
+
+When Python's garbage collector eventually deletes the Python
+wrapper, pybind11 will also attempt to delete the C++ instance (via ``operator
+delete()``) due to the implied ownership. At this point, the entire application
+will come crashing down, though errors could also be more subtle and involve
+silent data corruption.
+
+In the above example, the policy :enum:`return_value_policy::reference` should have
+been specified so that the global data instance is only *referenced* without any
+implied transfer of ownership, i.e.: 
+
+.. code-block:: cpp
+
+    m.def("get_data", &get_data, return_value_policy::reference);
 
-The following table provides an overview of the available return value policies:
+On the other hand, this is not the right policy for many other situations,
+where ignoring ownership could lead to resource leaks.
+As a developer using pybind11, it's important to be familiar with the different
+return value policies, including which situation calls for which one of them.
+The following table provides an overview of available policies:
 
 .. tabularcolumns:: |p{0.5\textwidth}|p{0.45\textwidth}|
 
 +--------------------------------------------------+----------------------------------------------------------------------------+
 | Return value policy                              | Description                                                                |
 +==================================================+============================================================================+
-| :enum:`return_value_policy::automatic`           | This is the default return value policy, which falls back to the policy    |
-|                                                  | :enum:`return_value_policy::take_ownership` when the return value is a     |
-|                                                  | pointer. Otherwise, it uses :enum:`return_value::move` or                  |
-|                                                  | :enum:`return_value::copy` for rvalue and lvalue references, respectively. |
-|                                                  | See below for a description of what all of these different policies do.    |
-+--------------------------------------------------+----------------------------------------------------------------------------+
-| :enum:`return_value_policy::automatic_reference` | As above, but use policy :enum:`return_value_policy::reference` when the   |
-|                                                  | return value is a pointer. This is the default conversion policy for       |
-|                                                  | function arguments when calling Python functions manually from C++ code    |
-|                                                  | (i.e. via handle::operator()). You probably won't need to use this.        |
-+--------------------------------------------------+----------------------------------------------------------------------------+
 | :enum:`return_value_policy::take_ownership`      | Reference an existing object (i.e. do not create a new copy) and take      |
 |                                                  | ownership. Python will call the destructor and delete operator when the    |
 |                                                  | object's reference count reaches zero. Undefined behavior ensues when the  |
-|                                                  | C++ side does the same.                                                    |
+|                                                  | C++ side does the same, or when the data was not dynamically allocated.    |
 +--------------------------------------------------+----------------------------------------------------------------------------+
 | :enum:`return_value_policy::copy`                | Create a new copy of the returned object, which will be owned by Python.   |
 |                                                  | This policy is comparably safe because the lifetimes of the two instances  |
@@ -74,6 +88,38 @@ The following table provides an overview of the available return value policies:
 |                                                  | return value is referenced by Python. This is the default policy for       |
 |                                                  | property getters created via ``def_property``, ``def_readwrite``, etc.     |
 +--------------------------------------------------+----------------------------------------------------------------------------+
+| :enum:`return_value_policy::automatic`           | This is the default return value policy, which falls back to the policy    |
+|                                                  | :enum:`return_value_policy::take_ownership` when the return value is a     |
+|                                                  | pointer. Otherwise, it uses :enum:`return_value::move` or                  |
+|                                                  | :enum:`return_value::copy` for rvalue and lvalue references, respectively. |
+|                                                  | See above for a description of what all of these different policies do.    |
++--------------------------------------------------+----------------------------------------------------------------------------+
+| :enum:`return_value_policy::automatic_reference` | As above, but use policy :enum:`return_value_policy::reference` when the   |
+|                                                  | return value is a pointer. This is the default conversion policy for       |
+|                                                  | function arguments when calling Python functions manually from C++ code    |
+|                                                  | (i.e. via handle::operator()). You probably won't need to use this.        |
++--------------------------------------------------+----------------------------------------------------------------------------+
+
+Return value policies can also be applied to properties:
+
+.. code-block:: cpp
+
+    class_<MyClass>(m, "MyClass")
+        .def_property("data", &MyClass::getData, &MyClass::setData,
+                      py::return_value_policy::copy);
+
+Technically, the code above applies the policy to both the getter and the
+setter function, however, the setter doesn't really care about *return*
+value policies which makes this a convenient terse syntax. Alternatively,
+targeted arguments can be passed through the :class:`cpp_function` constructor:
+
+.. code-block:: cpp
+
+    class_<MyClass>(m, "MyClass")
+        .def_property("data"
+            py::cpp_function(&MyClass::getData, py::return_value_policy::copy),
+            py::cpp_function(&MyClass::setData)
+        );
 
 .. warning::
 
@@ -82,12 +128,14 @@ The following table provides an overview of the available return value policies:
     non-determinism and segmentation faults, hence it is worth spending the
     time to understand all the different options in the table above.
 
-One important aspect of the above policies is that they only apply to instances
-which pybind11 has *not* seen before, in which case the policy clarifies
-essential questions about the return value's lifetime and ownership.  When
-pybind11 knows the instance already (as identified by its type and address in
-memory), it will return the existing Python object wrapper rather than creating
-a new copy.
+.. note::
+
+    One important aspect of the above policies is that they only apply to
+    instances which pybind11 has *not* seen before, in which case the policy
+    clarifies essential questions about the return value's lifetime and
+    ownership.  When pybind11 knows the instance already (as identified by its
+    type and address in memory), it will return the existing Python object
+    wrapper rather than creating a new copy.
 
 .. note::
 
diff --git a/pybind11/docs/advanced/misc.rst b/pybind11/docs/advanced/misc.rst
index 2968f8ac1..c13df7bf8 100644
--- a/pybind11/docs/advanced/misc.rst
+++ b/pybind11/docs/advanced/misc.rst
@@ -149,6 +149,25 @@ accessed by multiple extension modules:
         ...
     };
 
+Note also that it is possible (although would rarely be required) to share arbitrary
+C++ objects between extension modules at runtime. Internal library data is shared
+between modules using capsule machinery [#f6]_ which can be also utilized for
+storing, modifying and accessing user-defined data. Note that an extension module
+will "see" other extensions' data if and only if they were built with the same
+pybind11 version. Consider the following example:
+
+.. code-block:: cpp
+
+    auto data = (MyData *) py::get_shared_data("mydata");
+    if (!data)
+        data = (MyData *) py::set_shared_data("mydata", new MyData(42));
+
+If the above snippet was used in several separately compiled extension modules,
+the first one to be imported would create a ``MyData`` instance and associate
+a ``"mydata"`` key with a pointer to it. Extensions that are imported later
+would be then able to access the data behind the same pointer.
+
+.. [#f6] https://docs.python.org/3/extending/extending.html#using-capsules
 
 
 Generating documentation using Sphinx
@@ -183,5 +202,28 @@ work, it is important that all lines are indented consistently, i.e.:
         ----------
     )mydelimiter");
 
+By default, pybind11 automatically generates and prepends a signature to the docstring of a function 
+registered with ``module::def()`` and ``class_::def()``. Sometimes this
+behavior is not desirable, because you want to provide your own signature or remove 
+the docstring completely to exclude the function from the Sphinx documentation.
+The class ``options`` allows you to selectively suppress auto-generated signatures:
+
+.. code-block:: cpp
+
+    PYBIND11_PLUGIN(example) {
+        py::module m("example", "pybind11 example plugin");
+
+        py::options options;
+        options.disable_function_signatures();
+        
+        m.def("add", [](int a, int b) { return a + b; }, "A function which adds two numbers");
+        
+        return m.ptr();
+    }
+
+Note that changes to the settings affect only function bindings created during the 
+lifetime of the ``options`` instance. When it goes out of scope at the end of the module's init function, 
+the default settings are restored to prevent unwanted side effects.
+
 .. [#f4] http://www.sphinx-doc.org
 .. [#f5] http://github.com/pybind/python_example
diff --git a/pybind11/docs/advanced/pycpp/numpy.rst b/pybind11/docs/advanced/pycpp/numpy.rst
index 8b46b7c83..111ff0e3c 100644
--- a/pybind11/docs/advanced/pycpp/numpy.rst
+++ b/pybind11/docs/advanced/pycpp/numpy.rst
@@ -33,7 +33,7 @@ completely avoid copy operations with Python expressions like
 
 .. code-block:: cpp
 
-    py::class_<Matrix>(m, "Matrix")
+    py::class_<Matrix>(m, "Matrix", py::buffer_protocol())
        .def_buffer([](Matrix &m) -> py::buffer_info {
             return py::buffer_info(
                 m.data(),                               /* Pointer to buffer */
@@ -46,9 +46,12 @@ completely avoid copy operations with Python expressions like
             );
         });
 
-The snippet above binds a lambda function, which can create ``py::buffer_info``
-description records on demand describing a given matrix. The contents of
-``py::buffer_info`` mirror the Python buffer protocol specification.
+Supporting the buffer protocol in a new type involves specifying the special
+``py::buffer_protocol()`` tag in the ``py::class_`` constructor and calling the
+``def_buffer()`` method with a lambda function that creates a
+``py::buffer_info`` description record on demand describing a given matrix
+instance. The contents of ``py::buffer_info`` mirror the Python buffer protocol
+specification.
 
 .. code-block:: cpp
 
@@ -77,7 +80,7 @@ buffer objects (e.g. a NumPy matrix).
     typedef Matrix::Scalar Scalar;
     constexpr bool rowMajor = Matrix::Flags & Eigen::RowMajorBit;
 
-    py::class_<Matrix>(m, "Matrix")
+    py::class_<Matrix>(m, "Matrix", py::buffer_protocol())
         .def("__init__", [](Matrix &m, py::buffer b) {
             typedef Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic> Strides;
 
diff --git a/pybind11/docs/advanced/smart_ptrs.rst b/pybind11/docs/advanced/smart_ptrs.rst
index 6e8c9de36..3c982136c 100644
--- a/pybind11/docs/advanced/smart_ptrs.rst
+++ b/pybind11/docs/advanced/smart_ptrs.rst
@@ -53,8 +53,6 @@ code?
 
 .. code-block:: cpp
 
-    PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr<T>);
-
     class Child { };
 
     class Parent {
@@ -125,7 +123,7 @@ Custom smart pointers
 pybind11 supports ``std::unique_ptr`` and ``std::shared_ptr`` right out of the
 box. For any other custom smart pointer, transparent conversions can be enabled
 using a macro invocation similar to the following. It must be declared at the
-level before any binding code:
+top namespace level before any binding code:
 
 .. code-block:: cpp
 
@@ -136,6 +134,19 @@ placeholder name that is used as a template parameter of the second argument.
 Thus, feel free to use any identifier, but use it consistently on both sides;
 also, don't use the name of a type that already exists in your codebase.
 
+The macro also accepts a third optional boolean parameter that is set to false
+by default. Specify
+
+.. code-block:: cpp
+
+    PYBIND11_DECLARE_HOLDER_TYPE(T, SmartPtr<T>, true);
+
+if ``SmartPtr<T>`` can always be initialized from a ``T*`` pointer without the
+risk of inconsistencies (such as multiple independent ``SmartPtr`` instances
+believing that they are the sole owner of the ``T*`` pointer). A common
+situation where ``true`` should be passed is when the ``T`` instances use
+*intrusive* reference counting.
+
 Please take a look at the :ref:`macro_notes` before using this feature.
 
 .. seealso::
diff --git a/pybind11/docs/basics.rst b/pybind11/docs/basics.rst
index 45272b7ed..33c60049d 100644
--- a/pybind11/docs/basics.rst
+++ b/pybind11/docs/basics.rst
@@ -25,7 +25,7 @@ After installing the prerequisites, run
    mkdir build
    cd build
    cmake ..
-   make pytest -j 4
+   make check -j 4
 
 The last line will both compile and run the tests.
 
@@ -42,7 +42,7 @@ To compile and run the tests:
    mkdir build
    cd build
    cmake ..
-   cmake --build . --config Release --target pytest
+   cmake --build . --config Release --target check
 
 This will create a Visual Studio project, compile and run the target, all from the
 command line.
@@ -254,16 +254,18 @@ The shorthand notation is also available for default arguments:
 Exporting variables
 ===================
 
-To expose a value from C++, use the ``attr`` function to register it in a module
-as shown below. Built-in types and general objects (more on that later) can be
+To expose a value from C++, use the ``attr`` function to register it in a
+module as shown below. Built-in types and general objects (more on that later)
+are automatically converted when assigned as attributes, and can be explicitly
 converted using the function ``py::cast``.
 
 .. code-block:: cpp
 
     PYBIND11_PLUGIN(example) {
         py::module m("example", "pybind11 example plugin");
-        m.attr("the_answer") = py::cast(42);
-        m.attr("what") = py::cast("World");
+        m.attr("the_answer") = 42;
+        py::object world = py::cast("World");
+        m.attr("what") = world;
         return m.ptr();
     }
 
diff --git a/pybind11/docs/changelog.rst b/pybind11/docs/changelog.rst
index 0c3ec1bcb..74bedde8b 100644
--- a/pybind11/docs/changelog.rst
+++ b/pybind11/docs/changelog.rst
@@ -3,61 +3,315 @@
 Changelog
 #########
 
-Starting with version 1.8, pybind11 releases use a
-[semantic versioning](http://semver.org) policy.
+Starting with version 1.8.0, pybind11 releases use a `semantic versioning
+<http://semver.org>`_ policy.
 
-Breaking changes queued for v2.0.0 (Not yet released)
+v2.0.1 (Jan 4, 2017)
 -----------------------------------------------------
-* Redesigned virtual call mechanism and user-facing syntax (see
-  https://github.com/pybind/pybind11/commit/86d825f3302701d81414ddd3d38bcd09433076bc)
 
-* Remove ``handle.call()`` method
+* Fix pointer to reference error in type_caster on MSVC
+  `#583 <https://github.com/pybind/pybind11/pull/583>`_.
+
+* Fixed a segmentation in the test suite due to a typo
+  `cd7eac <https://github.com/pybind/pybind11/commit/cd7eac>`_.
+
+v2.0.0 (Jan 1, 2017)
+-----------------------------------------------------
+
+* Fixed a reference counting regression affecting types with custom metaclasses
+  (introduced in v2.0.0-rc1).
+  `#571 <https://github.com/pybind/pybind11/pull/571>`_.
+
+* Quenched a CMake policy warning.
+  `#570 <https://github.com/pybind/pybind11/pull/570>`_.
+
+v2.0.0-rc1 (Dec 23, 2016)
+-----------------------------------------------------
+
+The pybind11 developers are excited to issue a release candidate of pybind11
+with a subsequent v2.0.0 release planned in early January next year.
+
+An incredible amount of effort by went into pybind11 over the last ~5 months,
+leading to a release that is jam-packed with exciting new features and numerous
+usability improvements. The following list links PRs or individual commits
+whenever applicable.
+
+Happy Christmas!
+
+* Support for binding C++ class hierarchies that make use of multiple
+  inheritance. `#410 <https://github.com/pybind/pybind11/pull/410>`_.
+
+* PyPy support: pybind11 now supports nightly builds of PyPy and will
+  interoperate with the future 5.7 release. No code changes are necessary,
+  everything "just" works as usual. Note that we only target the Python 2.7
+  branch for now; support for 3.x will be added once its ``cpyext`` extension
+  support catches up. A few minor features remain unsupported for the time
+  being (notably dynamic attributes in custom types).
+  `#527 <https://github.com/pybind/pybind11/pull/527>`_.
+
+* Significant work on the documentation -- in particular, the monolitic
+  ``advanced.rst`` file was restructured into a easier to read hierarchical
+  organization. `#448 <https://github.com/pybind/pybind11/pull/448>`_.
+
+* Many NumPy-related improvements:
+
+  1. Object-oriented API to access and modify NumPy ``ndarray`` instances,
+     replicating much of the corresponding NumPy C API functionality.
+     `#402 <https://github.com/pybind/pybind11/pull/402>`_.
+
+  2. NumPy array ``dtype`` array descriptors are now first-class citizens and
+     are exposed via a new class ``py::dtype``.
+
+  3. Structured dtypes can be registered using the ``PYBIND11_NUMPY_DTYPE()``
+     macro. Special ``array`` constructors accepting dtype objects were also
+     added.
+
+     One potential caveat involving this change: format descriptor strings
+     should now be accessed via ``format_descriptor::format()`` (however, for
+     compatibility purposes, the old syntax ``format_descriptor::value`` will
+     still work for non-structured data types). `#308
+     <https://github.com/pybind/pybind11/pull/308>`_.
+
+  4. Further improvements to support structured dtypes throughout the system.
+     `#472 <https://github.com/pybind/pybind11/pull/472>`_,
+     `#474 <https://github.com/pybind/pybind11/pull/474>`_,
+     `#459 <https://github.com/pybind/pybind11/pull/459>`_,
+     `#453 <https://github.com/pybind/pybind11/pull/453>`_,
+     `#452 <https://github.com/pybind/pybind11/pull/452>`_, and
+     `#505 <https://github.com/pybind/pybind11/pull/505>`_.
+
+  5. Fast access operators. `#497 <https://github.com/pybind/pybind11/pull/497>`_.
+
+  6. Constructors for arrays whose storage is owned by another object.
+     `#440 <https://github.com/pybind/pybind11/pull/440>`_.
+
+  7. Added constructors for ``array`` and ``array_t`` explicitly accepting shape
+     and strides; if strides are not provided, they are deduced assuming
+     C-contiguity. Also added simplified constructors for 1-dimensional case.
+
+  8. Added buffer/NumPy support for ``char[N]`` and ``std::array<char, N>`` types.
+
+  9. Added ``memoryview`` wrapper type which is constructible from ``buffer_info``.
+
+* Eigen: many additional conversions and support for non-contiguous
+  arrays/slices.
+  `#427 <https://github.com/pybind/pybind11/pull/427>`_,
+  `#315 <https://github.com/pybind/pybind11/pull/315>`_,
+  `#316 <https://github.com/pybind/pybind11/pull/316>`_,
+  `#312 <https://github.com/pybind/pybind11/pull/312>`_, and
+  `#267 <https://github.com/pybind/pybind11/pull/267>`_
+
+* Incompatible changes in ``class_<...>::class_()``:
+
+    1. Declarations of types that provide access via the buffer protocol must
+       now include the ``py::buffer_protocol()`` annotation as an argument to
+       the ``class_`` constructor.
+
+    2. Declarations of types that require a custom metaclass (i.e. all classes
+       which include static properties via commands such as
+       ``def_readwrite_static()``) must now include the ``py::metaclass()``
+       annotation as an argument to the ``class_`` constructor.
+
+       These two changes were necessary to make type definitions in pybind11
+       future-proof, and to support PyPy via its cpyext mechanism. `#527
+       <https://github.com/pybind/pybind11/pull/527>`_.
+
+
+    3. This version of pybind11 uses a redesigned mechnism for instantiating
+       trempoline classes that are used to override virtual methods from within
+       Python. This led to the following user-visible syntax change: instead of
+
+       .. code-block:: cpp
+
+           py::class_<TrampolineClass>("MyClass")
+             .alias<MyClass>()
+             ....
+
+       write
+
+       .. code-block:: cpp
+
+           py::class_<MyClass, TrampolineClass>("MyClass")
+             ....
+
+       Importantly, both the original and the trampoline class are now
+       specified as an arguments (in arbitrary order) to the ``py::class_``
+       template, and the ``alias<..>()`` call is gone. The new scheme has zero
+       overhead in cases when Python doesn't override any functions of the
+       underlying C++ class. `rev. 86d825
+       <https://github.com/pybind/pybind11/commit/86d825>`_.
 
-1.9.0 (Not yet released)
-------------------------
-* Queued changes: map indexing suite, documentation for indexing suites.
-* Mapping a stateless C++ function to Python and back is now "for free" (i.e. no call overheads)
-* Support for translation of arbitrary C++ exceptions to Python counterparts
 * Added ``eval`` and ``eval_file`` functions for evaluating expressions and
-  statements from a string or file
-* eigen.h type converter fixed for non-contiguous arrays (e.g. slices)
-* Print more informative error messages when ``make_tuple()`` or ``cast()`` fail
-* ``std::enable_shared_from_this<>`` now also works for ``const`` values
-* A return value policy can now be passed to ``handle::operator()``
-* ``make_iterator()`` improvements for better compatibility with various types
-  (now uses prefix increment operator); it now also accepts iterators with
-  different begin/end types as long as they are equality comparable.
-* ``arg()`` now accepts a wider range of argument types for default values
-* Added ``repr()`` method to the ``handle`` class.
-* Added support for registering structured dtypes via ``PYBIND11_NUMPY_DTYPE()`` macro.
-* Added ``PYBIND11_STR_TYPE`` macro which maps to the ``builtins.str`` type.
-* Added a simplified ``buffer_info`` constructor for 1-dimensional buffers.
-* Format descriptor strings should now be accessed via ``format_descriptor::format()``
-  (for compatibility purposes, the old syntax ``format_descriptor::value`` will still
-  work for non-structured data types).
-* Added a class wrapping NumPy array descriptors: ``dtype``.
-* Added buffer/NumPy support for ``char[N]`` and ``std::array<char, N>`` types.
-* ``array`` gained new constructors accepting dtype objects.
-* Added constructors for ``array`` and ``array_t`` explicitly accepting shape and
-  strides; if strides are not provided, they are deduced assuming C-contiguity.
-  Also added simplified constructors for 1-dimensional case.
-* Added constructors for ``str`` from ``bytes`` and for ``bytes`` from ``str``.
-  This will do the UTF-8 decoding/encoding as required.
-* Added constructors for ``str`` and ``bytes`` from zero-terminated char pointers,
-  and from char pointers and length.
-* Added ``memoryview`` wrapper type which is constructible from ``buffer_info``.
-* New syntax to call a Python function from C++ using keyword arguments and unpacking,
-  e.g. ``foo(1, 2, "z"_a=3)`` or ``bar(1, *args, "z"_a=3, **kwargs)``.
-* Added ``py::print()`` function which replicates Python's API and writes to Python's
-  ``sys.stdout`` by default (as opposed to C's ``stdout`` like ``std::cout``).
-* Added ``py::dict`` keyword constructor:``auto d = dict("number"_a=42, "name"_a="World");``
-* Added ``py::str::format()`` method and ``_s`` literal:
-  ``py::str s = "1 + 2 = {}"_s.format(3);``
-* Attribute and item accessors now have a more complete interface which makes it possible
-  to chain attributes ``obj.attr("a")[key].attr("b").attr("method")(1, 2, 3)```.
-* Added built-in support for ``std::shared_ptr`` holder type. There is no more need
-  to do it manually via ``PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr<T>)``.
-* Various minor improvements of library internals (no user-visible changes)
+  statements from a string or file. `rev. 0d3fc3
+  <https://github.com/pybind/pybind11/commit/0d3fc3>`_.
+
+* pybind11 can now create types with a modifiable dictionary.
+  `#437 <https://github.com/pybind/pybind11/pull/437>`_ and
+  `#444 <https://github.com/pybind/pybind11/pull/444>`_.
+
+* Support for translation of arbitrary C++ exceptions to Python counterparts.
+  `#296 <https://github.com/pybind/pybind11/pull/296>`_ and
+  `#273 <https://github.com/pybind/pybind11/pull/273>`_.
+
+* Report full backtraces through mixed C++/Python code, better reporting for
+  import errors, fixed GIL management in exception processing.
+  `#537 <https://github.com/pybind/pybind11/pull/537>`_,
+  `#494 <https://github.com/pybind/pybind11/pull/494>`_,
+  `rev. e72d95 <https://github.com/pybind/pybind11/commit/e72d95>`_, and
+  `rev. 099d6e <https://github.com/pybind/pybind11/commit/099d6e>`_.
+
+* Support for bit-level operations, comparisons, and serialization of C++
+  enumerations. `#503 <https://github.com/pybind/pybind11/pull/503>`_,
+  `#508 <https://github.com/pybind/pybind11/pull/508>`_,
+  `#380 <https://github.com/pybind/pybind11/pull/380>`_,
+  `#309 <https://github.com/pybind/pybind11/pull/309>`_.
+  `#311 <https://github.com/pybind/pybind11/pull/311>`_.
+
+* The ``class_`` constructor now accepts its template arguments in any order.
+  `#385 <https://github.com/pybind/pybind11/pull/385>`_.
+
+* Attribute and item accessors now have a more complete interface which makes
+  it possible to chain attributes as in
+  ``obj.attr("a")[key].attr("b").attr("method")(1, 2, 3)``. `#425
+  <https://github.com/pybind/pybind11/pull/425>`_.
+
+* Major redesign of the default and conversion constructors in ``pytypes.h``.
+  `#464 <https://github.com/pybind/pybind11/pull/464>`_.
+
+* Added built-in support for ``std::shared_ptr`` holder type. It is no longer
+  necessary to to include a declaration of the form
+  ``PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr<T>)`` (though continuing to
+  do so won't cause an error).
+  `#454 <https://github.com/pybind/pybind11/pull/454>`_.
+
+* New ``py::overload_cast`` casting operator to select among multiple possible
+  overloads of a function. An example:
+
+    .. code-block:: cpp
+
+        py::class_<Pet>(m, "Pet")
+            .def("set", py::overload_cast<int>(&Pet::set), "Set the pet's age")
+            .def("set", py::overload_cast<const std::string &>(&Pet::set), "Set the pet's name");
+
+  This feature only works on C++14-capable compilers.
+  `#541 <https://github.com/pybind/pybind11/pull/541>`_.
+
+* C++ types are automatically cast to Python types, e.g. when assigning
+  them as an attribute. For instance, the following is now legal:
+
+    .. code-block:: cpp
+
+        py::module m = /* ... */
+        m.attr("constant") = 123;
+
+  (Previously, a ``py::cast`` call was necessary to avoid a compilation error.)
+  `#551 <https://github.com/pybind/pybind11/pull/551>`_.
+
+* Redesigned ``pytest``-based test suite. `#321 <https://github.com/pybind/pybind11/pull/321>`_.
+
+* Instance tracking to detect reference leaks in test suite. `#324 <https://github.com/pybind/pybind11/pull/324>`_
+
+* pybind11 can now distinguish between multiple different instances that are
+  located at the same memory address, but which have different types.
+  `#329 <https://github.com/pybind/pybind11/pull/329>`_.
+
+* Improved logic in ``move`` return value policy.
+  `#510 <https://github.com/pybind/pybind11/pull/510>`_,
+  `#297 <https://github.com/pybind/pybind11/pull/297>`_.
+
+* Generalized unpacking API to permit calling Python functions from C++ using
+  notation such as ``foo(a1, a2, *args, "ka"_a=1, "kb"_a=2, **kwargs)``. `#372 <https://github.com/pybind/pybind11/pull/372>`_.
+
+* ``py::print()`` function whose behavior matches that of the native Python
+  ``print()`` function. `#372 <https://github.com/pybind/pybind11/pull/372>`_.
+
+* Added ``py::dict`` keyword constructor:``auto d = dict("number"_a=42,
+  "name"_a="World");``. `#372 <https://github.com/pybind/pybind11/pull/372>`_.
+
+* Added ``py::str::format()`` method and ``_s`` literal: ``py::str s = "1 + 2
+  = {}"_s.format(3);``. `#372 <https://github.com/pybind/pybind11/pull/372>`_.
+
+* Added ``py::repr()`` function which is equivalent to Python's builtin
+  ``repr()``. `#333 <https://github.com/pybind/pybind11/pull/333>`_.
+
+* Improved construction and destruction logic for holder types. It is now
+  possible to reference instances with smart pointer holder types without
+  constructing the holder if desired. The ``PYBIND11_DECLARE_HOLDER_TYPE``
+  macro now accepts an optional second parameter to indicate whether the holder
+  type uses intrusive reference counting.
+  `#533 <https://github.com/pybind/pybind11/pull/533>`_ and
+  `#561 <https://github.com/pybind/pybind11/pull/561>`_.
+
+* Mapping a stateless C++ function to Python and back is now "for free" (i.e.
+  no extra indirections or argument conversion overheads). `rev. 954b79
+  <https://github.com/pybind/pybind11/commit/954b79>`_.
+
+* Bindings for ``std::valarray<T>``.
+  `#545 <https://github.com/pybind/pybind11/pull/545>`_.
+
+* Improved support for C++17 capable compilers.
+  `#562 <https://github.com/pybind/pybind11/pull/562>`_.
+
+* Bindings for ``std::optional<t>``.
+  `#475 <https://github.com/pybind/pybind11/pull/475>`_,
+  `#476 <https://github.com/pybind/pybind11/pull/476>`_,
+  `#479 <https://github.com/pybind/pybind11/pull/479>`_,
+  `#499 <https://github.com/pybind/pybind11/pull/499>`_, and
+  `#501 <https://github.com/pybind/pybind11/pull/501>`_.
+
+* ``stl_bind.h``: general improvements and support for ``std::map`` and
+  ``std::unordered_map``.
+  `#490 <https://github.com/pybind/pybind11/pull/490>`_,
+  `#282 <https://github.com/pybind/pybind11/pull/282>`_,
+  `#235 <https://github.com/pybind/pybind11/pull/235>`_.
+
+* The ``std::tuple``, ``std::pair``, ``std::list``, and ``std::vector`` type
+  casters now accept any Python sequence type as input. `rev. 107285
+  <https://github.com/pybind/pybind11/commit/107285>`_.
+
+* Improved CMake Python detection on multi-architecture Linux.
+  `#532 <https://github.com/pybind/pybind11/pull/532>`_.
+
+* Infrastructure to selectively disable or enable parts of the automatically
+  generated docstrings. `#486 <https://github.com/pybind/pybind11/pull/486>`_.
+
+* ``reference`` and ``reference_internal`` are now the default return value
+  properties for static and non-static properties, respectively. `#473
+  <https://github.com/pybind/pybind11/pull/473>`_. (the previous defaults
+  were ``automatic``). `#473 <https://github.com/pybind/pybind11/pull/473>`_.
+
+* Support for ``std::unique_ptr`` with non-default deleters or no deleter at
+  all (``py::nodelete``). `#384 <https://github.com/pybind/pybind11/pull/384>`_.
+
+* Deprecated ``handle::call()`` method. The new syntax to call Python
+  functions is simply ``handle()``. It can also be invoked explicitly via
+  ``handle::operator<X>()``, where ``X`` is an optional return value policy.
+
+* Print more informative error messages when ``make_tuple()`` or ``cast()``
+  fail. `#262 <https://github.com/pybind/pybind11/pull/262>`_.
+
+* Creation of holder types for classes deriving from
+  ``std::enable_shared_from_this<>`` now also works for ``const`` values.
+  `#260 <https://github.com/pybind/pybind11/pull/260>`_.
+
+* ``make_iterator()`` improvements for better compatibility with various
+  types (now uses prefix increment operator); it now also accepts iterators
+  with different begin/end types as long as they are equality comparable.
+  `#247 <https://github.com/pybind/pybind11/pull/247>`_.
+
+* ``arg()`` now accepts a wider range of argument types for default values.
+  `#244 <https://github.com/pybind/pybind11/pull/244>`_.
+
+* Support ``keep_alive`` where the nurse object may be ``None``. `#341
+  <https://github.com/pybind/pybind11/pull/341>`_.
+
+* Added constructors for ``str`` and ``bytes`` from zero-terminated char
+  pointers, and from char pointers and length. Added constructors for ``str``
+  from ``bytes`` and for ``bytes`` from ``str``, which will perform UTF-8
+  decoding/encoding as required.
+
+* Many other improvements of library internals without user-visible changes
+
 
 1.8.1 (July 12, 2016)
 ----------------------
diff --git a/pybind11/docs/classes.rst b/pybind11/docs/classes.rst
index 300816d41..872977684 100644
--- a/pybind11/docs/classes.rst
+++ b/pybind11/docs/classes.rst
@@ -104,6 +104,8 @@ With the above change, the same Python code now produces the following output:
     >>> print(p)
     <example.Pet named 'Molly'>
 
+.. [#f1] Stateless closures are those with an empty pair of brackets ``[]`` as the capture object.
+
 .. _properties:
 
 Instance and static fields
@@ -337,6 +339,35 @@ The overload signatures are also visible in the method's docstring:
      |
      |      Set the pet's name
 
+If you have a C++14 compatible compiler [#cpp14]_, you can use an alternative
+syntax to cast the overloaded function:
+
+.. code-block:: cpp
+
+    py::class_<Pet>(m, "Pet")
+        .def("set", py::overload_cast<int>(&Pet::set), "Set the pet's age")
+        .def("set", py::overload_cast<const std::string &>(&Pet::set), "Set the pet's name");
+
+Here, ``py::overload_cast`` only requires the parameter types to be specified.
+The return type and class are deduced. This avoids the additional noise of
+``void (Pet::*)()`` as seen in the raw cast. If a function is overloaded based
+on constness, the ``py::const_`` tag should be used:
+
+.. code-block:: cpp
+
+    struct Widget {
+        int foo(int x, float y);
+        int foo(int x, float y) const;
+    };
+
+    py::class_<Widget>(m, "Widget")
+       .def("foo_mutable", py::overload_cast<int, float>(&Widget::foo))
+       .def("foo_const",   py::overload_cast<int, float>(&Widget::foo, py::const_));
+
+
+.. [#cpp14] A compiler which supports the ``-std=c++14`` flag
+            or Visual Studio 2015 Update 2 and newer.
+
 .. note::
 
     To define multiple overloaded constructors, simply declare one after the
@@ -393,4 +424,16 @@ typed enums.
     1L
 
 
-.. [#f1] Stateless closures are those with an empty pair of brackets ``[]`` as the capture object.
+.. note::
+
+    When the special tag ``py::arithmetic()`` is specified to the ``enum_``
+    constructor, pybind11 creates an enumeration that also supports rudimentary
+    arithmetic and bit-level operations like comparisons, and, or, xor, negation,
+    etc.
+
+    .. code-block:: cpp
+
+        py::enum_<Pet::Kind>(pet, "Kind", py::arithmetic())
+           ...
+
+    By default, these are omitted to conserve space.
diff --git a/pybind11/docs/compiling.rst b/pybind11/docs/compiling.rst
index 30cd83b79..c7053dbf9 100644
--- a/pybind11/docs/compiling.rst
+++ b/pybind11/docs/compiling.rst
@@ -39,15 +39,147 @@ extension module can be created with just a few lines of code:
 
 This assumes that the pybind11 repository is located in a subdirectory named
 :file:`pybind11` and that the code is located in a file named :file:`example.cpp`.
-The CMake command ``add_subdirectory`` will import a function with the signature
-``pybind11_add_module(<name> source1 [source2 ...])``. It will take care of all
-the details needed to build a Python extension module on any platform.
-
-The target Python version can be selected by setting the ``PYBIND11_PYTHON_VERSION``
-variable before adding the pybind11 subdirectory. Alternatively, an exact Python
-installation can be specified by setting ``PYTHON_EXECUTABLE``.
+The CMake command ``add_subdirectory`` will import the pybind11 project which
+provides the ``pybind11_add_module`` function. It will take care of all the
+details needed to build a Python extension module on any platform.
 
 A working sample project, including a way to invoke CMake from :file:`setup.py` for
 PyPI integration, can be found in the [cmake_example]_  repository.
 
 .. [cmake_example] https://github.com/pybind/cmake_example
+
+pybind11_add_module
+-------------------
+
+To ease the creation of Python extension modules, pybind11 provides a CMake
+function with the following signature:
+
+.. code-block:: cmake
+
+    pybind11_add_module(<name> [MODULE | SHARED] [EXCLUDE_FROM_ALL]
+                        [NO_EXTRAS] [THIN_LTO] source1 [source2 ...])
+
+This function behaves very much like CMake's builtin ``add_library`` (in fact,
+it's a wrapper function around that command). It will add a library target
+called ``<name>`` to be built from the listed source files. In addition, it
+will take care of all the Python-specific compiler and linker flags as well
+as the OS- and Python-version-specific file extension. The produced target
+``<name>`` can be further manipulated with regular CMake commands.
+
+``MODULE`` or ``SHARED`` may be given to specify the type of library. If no
+type is given, ``MODULE`` is used by default which ensures the creation of a
+Python-exclusive module. Specifying ``SHARED`` will create a more traditional
+dynamic library which can also be linked from elsewhere. ``EXCLUDE_FROM_ALL``
+removes this target from the default build (see CMake docs for details).
+
+Since pybind11 is a template library, ``pybind11_add_module`` adds compiler
+flags to ensure high quality code generation without bloat arising from long
+symbol names and duplication of code in different translation units. The
+additional flags enable LTO (Link Time Optimization), set default visibility
+to *hidden* and strip unneeded symbols. See the :ref:`FAQ entry <faq:symhidden>`
+for a more detailed explanation. These optimizations are never applied in
+``Debug`` mode. If ``NO_EXTRAS`` is given, they will always be disabled, even
+in ``Release`` mode. However, this will result in code bloat and is generally
+not recommended.
+
+As stated above, LTO is enabled by default. Some newer compilers also support
+different flavors of LTO such as `ThinLTO`_. Setting ``THIN_LTO`` will cause
+the function to prefer this flavor if available. The function falls back to
+regular LTO if ``-flto=thin`` is not available.
+
+.. _ThinLTO: http://clang.llvm.org/docs/ThinLTO.html
+
+Configuration variables
+-----------------------
+
+By default, pybind11 will compile modules with the latest C++ standard
+available on the target compiler. To override this, the standard flag can
+be given explicitly in ``PYBIND11_CPP_STANDARD``:
+
+.. code-block:: cmake
+
+    set(PYBIND11_CPP_STANDARD -std=c++11)
+    add_subdirectory(pybind11)  # or find_package(pybind11)
+
+Note that this and all other configuration variables must be set **before** the
+call to ``add_subdiretory`` or ``find_package``. The variables can also be set
+when calling CMake from the command line using the ``-D<variable>=<value>`` flag.
+
+The target Python version can be selected by setting ``PYBIND11_PYTHON_VERSION``
+or an exact Python installation can be specified with ``PYTHON_EXECUTABLE``.
+For example:
+
+.. code-block:: bash
+
+    cmake -DPYBIND11_PYTHON_VERSION=3.6 ..
+    # or
+    cmake -DPYTHON_EXECUTABLE=path/to/python ..
+
+find_package vs. add_subdirectory
+---------------------------------
+
+For CMake-based projects that don't include the pybind11 repository internally,
+an external installation can be detected through ``find_package(pybind11)``.
+See the `Config file`_ docstring for details of relevant CMake variables.
+
+.. code-block:: cmake
+
+    cmake_minimum_required(VERSION 2.8.12)
+    project(example)
+
+    find_package(pybind11 REQUIRED)
+    pybind11_add_module(example example.cpp)
+
+Once detected, the aforementioned ``pybind11_add_module`` can be employed as
+before. The function usage and configuration variables are identical no matter
+if pybind11 is added as a subdirectory or found as an installed package. You
+can refer to the same [cmake_example]_ repository for a full sample project
+-- just swap out ``add_subdirectory`` for ``find_package``.
+
+.. _Config file: https://github.com/pybind/pybind11/blob/master/tools/pybind11Config.cmake.in
+
+Advanced: interface library target
+----------------------------------
+
+When using a version of CMake greater than 3.0, pybind11 can additionally
+be used as a special *interface library* . The target ``pybind11::module``
+is available with pybind11 headers, Python headers and libraries as needed,
+and C++ compile definitions attached. This target is suitable for linking
+to an independently constructed (through ``add_library``, not
+``pybind11_add_module``) target in the consuming project.
+
+.. code-block:: cmake
+
+    cmake_minimum_required(VERSION 3.0)
+    project(example)
+
+    find_package(pybind11 REQUIRED)  # or add_subdirectory(pybind11)
+
+    add_library(example MODULE main.cpp)
+    target_link_libraries(example PRIVATE pybind11::module)
+    set_target_properties(example PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
+                                             SUFFIX "${PYTHON_MODULE_EXTENSION}")
+
+.. warning::
+
+    Since pybind11 is a metatemplate library, it is crucial that certain
+    compiler flags are provided to ensure high quality code generation. In
+    contrast to the ``pybind11_add_module()`` command, the CMake interface
+    library only provides the *minimal* set of parameters to ensure that the
+    code using pybind11 compiles, but it does **not** pass these extra compiler
+    flags (i.e. this is up to you).
+
+    These include Link Time Optimization (``-flto`` on GCC/Clang/ICPC, ``/GL``
+    and ``/LTCG`` on Visual Studio). Default-hidden symbols on GCC/Clang/ICPC
+    (``-fvisibility=hidden``) and .OBJ files with many sections on Visual Studio
+    (``/bigobj``). The :ref:`FAQ <faq:symhidden>` contains an
+    explanation on why these are needed.
+
+Generating binding code automatically
+=====================================
+
+The ``Binder`` project is a tool for automatic generation of pybind11 binding
+code by introspecting existing C++ codebases using LLVM/Clang. See the
+[binder]_ documentation for details.
+
+.. [binder] http://cppbinder.readthedocs.io/en/latest/about.html
diff --git a/pybind11/docs/conf.py b/pybind11/docs/conf.py
index 69f0cb337..0769f20be 100644
--- a/pybind11/docs/conf.py
+++ b/pybind11/docs/conf.py
@@ -48,7 +48,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = 'pybind11'
-copyright = '2015, Wenzel Jakob'
+copyright = '2016, Wenzel Jakob'
 author = 'Wenzel Jakob'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -56,9 +56,9 @@ author = 'Wenzel Jakob'
 # built documents.
 #
 # The short X.Y version.
-version = '1.9'
+version = '2.0'
 # The full version, including alpha/beta/rc tags.
-release = '1.9.dev0'
+release = '2.0.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/pybind11/docs/faq.rst b/pybind11/docs/faq.rst
index cf7a650b7..34002b42d 100644
--- a/pybind11/docs/faq.rst
+++ b/pybind11/docs/faq.rst
@@ -153,6 +153,17 @@ will:
    definition is changed, only a subset of the binding code will generally need
    to be recompiled.
 
+"recursive template instantiation exceeded maximum depth of 256"
+================================================================
+
+If you receive an error about excessive recursive template evaluation, try
+specifying a larger value, e.g. ``-ftemplate-depth=1024`` on GCC/Clang. The
+culprit is generally the generation of function signatures at compile time
+using C++14 template metaprogramming.
+
+
+.. _`faq:symhidden`:
+
 How can I create smaller binaries?
 ==================================
 
diff --git a/pybind11/docs/intro.rst b/pybind11/docs/intro.rst
index 429a01cdd..2149c18db 100644
--- a/pybind11/docs/intro.rst
+++ b/pybind11/docs/intro.rst
@@ -17,15 +17,14 @@ compatibility has its cost: arcane template tricks and workarounds are
 necessary to support the oldest and buggiest of compiler specimens. Now that
 C++11-compatible compilers are widely available, this heavy machinery has
 become an excessively large and unnecessary dependency.
-
 Think of this library as a tiny self-contained version of Boost.Python with
 everything stripped away that isn't relevant for binding generation. Without
-comments, the core header files only require ~2.5K lines of code and depend on
-Python (2.7 or 3.x) and the C++ standard library. This compact implementation
-was possible thanks to some of the new C++11 language features (specifically:
-tuples, lambda functions and variadic templates). Since its creation, this
-library has grown beyond Boost.Python in many ways, leading to dramatically
-simpler binding code in many common situations.
+comments, the core header files only require ~4K lines of code and depend on
+Python (2.7 or 3.x, or PyPy2.7 >= 5.7) and the C++ standard library. This
+compact implementation was possible thanks to some of the new C++11 language
+features (specifically: tuples, lambda functions and variadic templates). Since
+its creation, this library has grown beyond Boost.Python in many ways, leading
+to dramatically simpler binding code in many common situations.
 
 Core features
 *************
@@ -51,6 +50,9 @@ Goodies
 *******
 In addition to the core functionality, pybind11 provides some extra goodies:
 
+- Python 2.7, 3.x, and PyPy (PyPy2.7 >= 5.7) are supported with an
+  implementation-agnostic interface.
+
 - It is possible to bind C++11 lambda functions with captured variables. The
   lambda capture data is stored inside the resulting Python function object.
 
@@ -88,6 +90,6 @@ Supported compilers
 *******************
 
 1. Clang/LLVM (any non-ancient version with C++11 support)
-2. GCC (any non-ancient version with C++11 support)
+2. GCC 4.8 or newer
 3. Microsoft Visual Studio 2015 or newer
 4. Intel C++ compiler v15 or newer
diff --git a/pybind11/docs/release.rst b/pybind11/docs/release.rst
index a15504ea8..30d159a6f 100644
--- a/pybind11/docs/release.rst
+++ b/pybind11/docs/release.rst
@@ -1,8 +1,10 @@
 To release a new version of pybind11:
 
 - Update the version number and push to pypi
-    - Update ``pybind11/_version.py`` (set release version, remove 'dev')
-    - Update version in ``docs/conf.py``
+    - Update ``pybind11/_version.py`` (set release version, remove 'dev').
+    - Update ``PYBIND11_VERSION_MAJOR`` etc. in ``include/pybind11/common.h``.
+    - Ensure that all the information in ``setup.py`` is up-to-date.
+    - Update version in ``docs/conf.py``.
     - Tag release date in ``docs/changelog.rst``.
     - ``git add`` and ``git commit``.
     - if new minor version: ``git checkout -b vX.Y``, ``git push -u origin vX.Y``
diff --git a/pybind11/include/pybind11/attr.h b/pybind11/include/pybind11/attr.h
index 1ea925c18..0676d5da6 100644
--- a/pybind11/include/pybind11/attr.h
+++ b/pybind11/include/pybind11/attr.h
@@ -47,6 +47,15 @@ struct multiple_inheritance { };
 /// Annotation which enables dynamic attributes, i.e. adds `__dict__` to a class
 struct dynamic_attr { };
 
+/// Annotation which enables the buffer protocol for a type
+struct buffer_protocol { };
+
+/// Annotation which requests that a special metaclass is created for a type
+struct metaclass { };
+
+/// Annotation to mark enums as an arithmetic type
+struct arithmetic { };
+
 NAMESPACE_BEGIN(detail)
 /* Forward declarations */
 enum op_id : int;
@@ -71,7 +80,7 @@ struct argument_record {
 struct function_record {
     function_record()
         : is_constructor(false), is_stateless(false), is_operator(false),
-          has_args(false), has_kwargs(false) { }
+          has_args(false), has_kwargs(false), is_method(false) { }
 
     /// Function name
     char *name = nullptr; /* why no C++ strings? They generate heavier code.. */
@@ -112,15 +121,15 @@ struct function_record {
     /// True if the function has a '**kwargs' argument
     bool has_kwargs : 1;
 
+    /// True if this is a method
+    bool is_method : 1;
+
     /// Number of arguments
     uint16_t nargs;
 
     /// Python method object
     PyMethodDef *def = nullptr;
 
-    /// Python handle to the associated class (if this is method)
-    handle class_;
-
     /// Python handle to the parent scope (a class or a module)
     handle scope;
 
@@ -133,7 +142,9 @@ struct function_record {
 
 /// Special data structure which (temporarily) holds metadata about a bound class
 struct type_record {
-    PYBIND11_NOINLINE type_record() { }
+    PYBIND11_NOINLINE type_record()
+        : multiple_inheritance(false), dynamic_attr(false),
+          buffer_protocol(false), metaclass(false) { }
 
     /// Handle to the parent scope
     handle scope;
@@ -163,10 +174,16 @@ struct type_record {
     const char *doc = nullptr;
 
     /// Multiple inheritance marker
-    bool multiple_inheritance = false;
+    bool multiple_inheritance : 1;
 
     /// Does the class manage a __dict__?
-    bool dynamic_attr = false;
+    bool dynamic_attr : 1;
+
+    /// Does the class implement the buffer protocol?
+    bool buffer_protocol : 1;
+
+    /// Does the class require its own metaclass?
+    bool metaclass : 1;
 
     PYBIND11_NOINLINE void add_base(const std::type_info *base, void *(*caster)(void *)) {
         auto base_info = detail::get_type_info(*base, false);
@@ -232,7 +249,7 @@ template <> struct process_attribute<sibling> : process_attribute_default<siblin
 
 /// Process an attribute which indicates that this function is a method
 template <> struct process_attribute<is_method> : process_attribute_default<is_method> {
-    static void init(const is_method &s, function_record *r) { r->class_ = s.class_; r->scope = s.class_; }
+    static void init(const is_method &s, function_record *r) { r->is_method = true; r->scope = s.class_; }
 };
 
 /// Process an attribute which indicates the parent scope of a method
@@ -248,7 +265,7 @@ template <> struct process_attribute<is_operator> : process_attribute_default<is
 /// Process a keyword argument attribute (*without* a default value)
 template <> struct process_attribute<arg> : process_attribute_default<arg> {
     static void init(const arg &a, function_record *r) {
-        if (r->class_ && r->args.empty())
+        if (r->is_method && r->args.empty())
             r->args.emplace_back("self", nullptr, handle());
         r->args.emplace_back(a.name, nullptr, handle());
     }
@@ -257,17 +274,17 @@ template <> struct process_attribute<arg> : process_attribute_default<arg> {
 /// Process a keyword argument attribute (*with* a default value)
 template <> struct process_attribute<arg_v> : process_attribute_default<arg_v> {
     static void init(const arg_v &a, function_record *r) {
-        if (r->class_ && r->args.empty())
+        if (r->is_method && r->args.empty())
             r->args.emplace_back("self", nullptr, handle());
 
         if (!a.value) {
 #if !defined(NDEBUG)
             auto descr = "'" + std::string(a.name) + ": " + a.type + "'";
-            if (r->class_) {
+            if (r->is_method) {
                 if (r->name)
-                    descr += " in method '" + (std::string) r->class_.str() + "." + (std::string) r->name + "'";
+                    descr += " in method '" + (std::string) str(r->scope) + "." + (std::string) r->name + "'";
                 else
-                    descr += " in method of '" + (std::string) r->class_.str() + "'";
+                    descr += " in method of '" + (std::string) str(r->scope) + "'";
             } else if (r->name) {
                 descr += " in function named '" + (std::string) r->name + "'";
             }
@@ -306,6 +323,21 @@ struct process_attribute<dynamic_attr> : process_attribute_default<dynamic_attr>
     static void init(const dynamic_attr &, type_record *r) { r->dynamic_attr = true; }
 };
 
+template <>
+struct process_attribute<buffer_protocol> : process_attribute_default<buffer_protocol> {
+    static void init(const buffer_protocol &, type_record *r) { r->buffer_protocol = true; }
+};
+
+template <>
+struct process_attribute<metaclass> : process_attribute_default<metaclass> {
+    static void init(const metaclass &, type_record *r) { r->metaclass = true; }
+};
+
+
+/// Process an 'arithmetic' attribute for enums (does nothing here)
+template <>
+struct process_attribute<arithmetic> : process_attribute_default<arithmetic> {};
+
 /***
  * Process a keep_alive call policy -- invokes keep_alive_impl during the
  * pre-call handler if both Nurse, Patient != 0 and use the post-call handler
diff --git a/pybind11/include/pybind11/cast.h b/pybind11/include/pybind11/cast.h
index dbbeb9225..b953cc897 100644
--- a/pybind11/include/pybind11/cast.h
+++ b/pybind11/include/pybind11/cast.h
@@ -26,6 +26,7 @@ struct type_info {
     void (*init_holder)(PyObject *, const void *);
     std::vector<PyObject *(*)(PyObject *, PyTypeObject *)> implicit_conversions;
     std::vector<std::pair<const std::type_info *, void *(*)(void *)>> implicit_casts;
+    std::vector<bool (*)(PyObject *, void *&)> *direct_conversions;
     buffer_info *(*get_buffer)(PyObject *, void *) = nullptr;
     void *get_buffer_data = nullptr;
     /** A simple type never occurs as a (direct or indirect) parent
@@ -39,12 +40,8 @@ PYBIND11_NOINLINE inline internals &get_internals() {
         return *internals_ptr;
     handle builtins(PyEval_GetBuiltins());
     const char *id = PYBIND11_INTERNALS_ID;
-    capsule caps;
-    if (builtins.contains(id)) {
-        caps = builtins[id];
-    }
-    if (caps.check()) {
-        internals_ptr = caps;
+    if (builtins.contains(id) && isinstance<capsule>(builtins[id])) {
+        internals_ptr = capsule(builtins[id]);
     } else {
         internals_ptr = new internals();
         #if defined(WITH_THREAD)
@@ -90,7 +87,8 @@ PYBIND11_NOINLINE inline detail::type_info* get_type_info(PyTypeObject *type) {
     } while (true);
 }
 
-PYBIND11_NOINLINE inline detail::type_info *get_type_info(const std::type_info &tp, bool throw_if_missing) {
+PYBIND11_NOINLINE inline detail::type_info *get_type_info(const std::type_info &tp,
+                                                          bool throw_if_missing = false) {
     auto &types = get_internals().registered_types_cpp;
 
     auto it = types.find(std::type_index(tp));
@@ -109,6 +107,13 @@ PYBIND11_NOINLINE inline handle get_type_handle(const std::type_info &tp, bool t
     return handle(type_info ? ((PyObject *) type_info->type) : nullptr);
 }
 
+PYBIND11_NOINLINE inline bool isinstance_generic(handle obj, const std::type_info &tp) {
+    handle type = detail::get_type_handle(tp, false);
+    if (!type)
+        return false;
+    return isinstance(obj, type);
+}
+
 PYBIND11_NOINLINE inline std::string error_string() {
     if (!PyErr_Occurred()) {
         PyErr_SetString(PyExc_RuntimeError, "Unknown internal error occurred");
@@ -123,7 +128,36 @@ PYBIND11_NOINLINE inline std::string error_string() {
         errorString += ": ";
     }
     if (scope.value)
-        errorString += (std::string) handle(scope.value).str();
+        errorString += (std::string) str(scope.value);
+
+    PyErr_NormalizeException(&scope.type, &scope.value, &scope.trace);
+
+#if PY_MAJOR_VERSION >= 3
+    if (scope.trace != nullptr)
+        PyException_SetTraceback(scope.value, scope.trace);
+#endif
+
+#if !defined(PYPY_VERSION)
+    if (scope.trace) {
+        PyTracebackObject *trace = (PyTracebackObject *) scope.trace;
+
+        /* Get the deepest trace possible */
+        while (trace->tb_next)
+            trace = trace->tb_next;
+
+        PyFrameObject *frame = trace->tb_frame;
+        errorString += "\n\nAt:\n";
+        while (frame) {
+            int lineno = PyFrame_GetLineNumber(frame);
+            errorString +=
+                "  " + handle(frame->f_code->co_filename).cast<std::string>() +
+                "(" + std::to_string(lineno) + "): " +
+                handle(frame->f_code->co_name).cast<std::string>() + "\n";
+            frame = frame->f_back;
+        }
+        trace = trace->tb_next;
+    }
+#endif
 
     return errorString;
 }
@@ -140,7 +174,9 @@ PYBIND11_NOINLINE inline handle get_object_handle(const void *ptr, const detail:
 }
 
 inline PyThreadState *get_thread_state_unchecked() {
-#if   PY_VERSION_HEX < 0x03000000
+#if defined(PYPY_VERSION)
+    return PyThreadState_GET();
+#elif PY_VERSION_HEX < 0x03000000
     return _PyThreadState_Current;
 #elif PY_VERSION_HEX < 0x03050000
     return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
@@ -157,7 +193,7 @@ inline void keep_alive_impl(handle nurse, handle patient);
 class type_caster_generic {
 public:
     PYBIND11_NOINLINE type_caster_generic(const std::type_info &type_info)
-     : typeinfo(get_type_info(type_info, false)) { }
+     : typeinfo(get_type_info(type_info)) { }
 
     PYBIND11_NOINLINE bool load(handle src, bool convert) {
         if (!src)
@@ -188,9 +224,9 @@ public:
 
             /* If this is a python class, also check the parents recursively */
             auto const &type_dict = get_internals().registered_types_py;
-            bool new_style_class = PyType_Check(tobj);
+            bool new_style_class = PyType_Check((PyObject *) tobj);
             if (type_dict.find(tobj) == type_dict.end() && new_style_class && tobj->tp_bases) {
-                tuple parents(tobj->tp_bases, true);
+                auto parents = reinterpret_borrow<tuple>(tobj->tp_bases);
                 for (handle parent : parents) {
                     bool result = load(src, convert, (PyTypeObject *) parent.ptr());
                     if (result)
@@ -211,10 +247,14 @@ public:
         /* Perform an implicit conversion */
         if (convert) {
             for (auto &converter : typeinfo->implicit_conversions) {
-                temp = object(converter(src.ptr(), typeinfo->type), false);
+                temp = reinterpret_steal<object>(converter(src.ptr(), typeinfo->type));
                 if (load(temp, false))
                     return true;
             }
+            for (auto &converter : *typeinfo->direct_conversions) {
+                if (converter(src.ptr(), value))
+                    return true;
+            }
         }
         return false;
     }
@@ -254,7 +294,7 @@ public:
                 return handle((PyObject *) it_i->second).inc_ref();
         }
 
-        object inst(PyType_GenericAlloc(tinfo->type, 0), false);
+        auto inst = reinterpret_steal<object>(PyType_GenericAlloc(tinfo->type, 0));
 
         auto wrapper = (instance<void> *) inst.ptr();
 
@@ -262,44 +302,46 @@ public:
         wrapper->owned = false;
 
         switch (policy) {
-        case return_value_policy::automatic:
-        case return_value_policy::take_ownership:
-            wrapper->value = src;
-            wrapper->owned = true;
-            break;
-
-        case return_value_policy::automatic_reference:
-        case return_value_policy::reference:
-            wrapper->value = src;
-            wrapper->owned = false;
-            break;
-
-        case return_value_policy::copy:
-            if (copy_constructor)
-                wrapper->value = copy_constructor(src);
-            else
-                throw cast_error("return_value_policy = copy, but the object is non-copyable!");
-            wrapper->owned = true;
-            break;
-
-        case return_value_policy::move:
-            if (move_constructor)
-                wrapper->value = move_constructor(src);
-            else if (copy_constructor)
-                wrapper->value = copy_constructor(src);
-            else
-                throw cast_error("return_value_policy = move, but the object is neither movable nor copyable!");
-            wrapper->owned = true;
-            break;
-
-        case return_value_policy::reference_internal:
-            wrapper->value = src;
-            wrapper->owned = false;
-            detail::keep_alive_impl(inst, parent);
-            break;
-
-        default:
-            throw cast_error("unhandled return_value_policy: should not happen!");
+            case return_value_policy::automatic:
+            case return_value_policy::take_ownership:
+                wrapper->value = src;
+                wrapper->owned = true;
+                break;
+
+            case return_value_policy::automatic_reference:
+            case return_value_policy::reference:
+                wrapper->value = src;
+                wrapper->owned = false;
+                break;
+
+            case return_value_policy::copy:
+                if (copy_constructor)
+                    wrapper->value = copy_constructor(src);
+                else
+                    throw cast_error("return_value_policy = copy, but the "
+                                     "object is non-copyable!");
+                wrapper->owned = true;
+                break;
+
+            case return_value_policy::move:
+                if (move_constructor)
+                    wrapper->value = move_constructor(src);
+                else if (copy_constructor)
+                    wrapper->value = copy_constructor(src);
+                else
+                    throw cast_error("return_value_policy = move, but the "
+                                     "object is neither movable nor copyable!");
+                wrapper->owned = true;
+                break;
+
+            case return_value_policy::reference_internal:
+                wrapper->value = src;
+                wrapper->owned = false;
+                detail::keep_alive_impl(inst, parent);
+                break;
+
+            default:
+                throw cast_error("unhandled return_value_policy: should not happen!");
         }
 
         tinfo->init_holder(inst.ptr(), existing_holder);
@@ -321,6 +363,18 @@ using cast_op_type = typename std::conditional<std::is_pointer<typename std::rem
     typename std::add_pointer<intrinsic_t<T>>::type,
     typename std::add_lvalue_reference<intrinsic_t<T>>::type>::type;
 
+// std::is_copy_constructible isn't quite enough: it lets std::vector<T> (and similar) through when
+// T is non-copyable, but code containing such a copy constructor fails to actually compile.
+template <typename T, typename SFINAE = void> struct is_copy_constructible : std::is_copy_constructible<T> {};
+
+// Specialization for types that appear to be copy constructible but also look like stl containers
+// (we specifically check for: has `value_type` and `reference` with `reference = value_type&`): if
+// so, copy constructability depends on whether the value_type is copy constructible.
+template <typename Container> struct is_copy_constructible<Container, enable_if_t<
+        std::is_copy_constructible<Container>::value &&
+        std::is_same<typename Container::value_type &, typename Container::reference>::value
+    >> : std::is_copy_constructible<typename Container::value_type> {};
+
 /// Generic type caster for objects stored on the heap
 template <typename type> class type_caster_base : public type_caster_generic {
     using itype = intrinsic_t<type>;
@@ -336,10 +390,8 @@ public:
         return cast(&src, policy, parent);
     }
 
-    static handle cast(itype &&src, return_value_policy policy, handle parent) {
-        if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference)
-            policy = return_value_policy::move;
-        return cast(&src, policy, parent);
+    static handle cast(itype &&src, return_value_policy, handle parent) {
+        return cast(&src, return_value_policy::move, parent);
     }
 
     static handle cast(const itype *src, return_value_policy policy, handle parent) {
@@ -358,20 +410,21 @@ protected:
 #if !defined(_MSC_VER)
     /* Only enabled when the types are {copy,move}-constructible *and* when the type
        does not have a private operator new implementaton. */
-    template <typename T = type> static auto make_copy_constructor(const T *value) -> decltype(new T(*value), Constructor(nullptr)) {
+    template <typename T = type, typename = enable_if_t<is_copy_constructible<T>::value>> static auto make_copy_constructor(const T *value) -> decltype(new T(*value), Constructor(nullptr)) {
         return [](const void *arg) -> void * { return new T(*((const T *) arg)); }; }
     template <typename T = type> static auto make_move_constructor(const T *value) -> decltype(new T(std::move(*((T *) value))), Constructor(nullptr)) {
         return [](const void *arg) -> void * { return (void *) new T(std::move(*((T *) arg))); }; }
 #else
     /* Visual Studio 2015's SFINAE implementation doesn't yet handle the above robustly in all situations.
        Use a workaround that only tests for constructibility for now. */
-    template <typename T = type, typename = enable_if_t<std::is_copy_constructible<T>::value>>
+    template <typename T = type, typename = enable_if_t<is_copy_constructible<T>::value>>
     static Constructor make_copy_constructor(const T *value) {
         return [](const void *arg) -> void * { return new T(*((const T *)arg)); }; }
     template <typename T = type, typename = enable_if_t<std::is_move_constructible<T>::value>>
     static Constructor make_move_constructor(const T *value) {
         return [](const void *arg) -> void * { return (void *) new T(std::move(*((T *)arg))); }; }
 #endif
+
     static Constructor make_copy_constructor(...) { return nullptr; }
     static Constructor make_move_constructor(...) { return nullptr; }
 };
@@ -379,6 +432,14 @@ protected:
 template <typename type, typename SFINAE = void> class type_caster : public type_caster_base<type> { };
 template <typename type> using make_caster = type_caster<intrinsic_t<type>>;
 
+// Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T
+template <typename T> typename make_caster<T>::template cast_op_type<T> cast_op(make_caster<T> &caster) {
+    return caster.operator typename make_caster<T>::template cast_op_type<T>();
+}
+template <typename T> typename make_caster<T>::template cast_op_type<T> cast_op(make_caster<T> &&caster) {
+    return cast_op<T>(caster);
+}
+
 template <typename type> class type_caster<std::reference_wrapper<type>> : public type_caster_base<type> {
 public:
     static handle cast(const std::reference_wrapper<type> &src, return_value_policy policy, handle parent) {
@@ -435,7 +496,19 @@ public:
             (std::is_integral<T>::value && sizeof(py_type) != sizeof(T) &&
                (py_value < (py_type) std::numeric_limits<T>::min() ||
                 py_value > (py_type) std::numeric_limits<T>::max()))) {
+#if PY_VERSION_HEX < 0x03000000
+            bool type_error = PyErr_ExceptionMatches(PyExc_SystemError);
+#else
+            bool type_error = PyErr_ExceptionMatches(PyExc_TypeError);
+#endif
             PyErr_Clear();
+            if (type_error && PyNumber_Check(src.ptr())) {
+                auto tmp = reinterpret_borrow<object>(std::is_floating_point<T>::value
+                                                      ? PyNumber_Float(src.ptr())
+                                                      : PyNumber_Long(src.ptr()));
+                PyErr_Clear();
+                return load(tmp, false);
+            }
             return false;
         }
 
@@ -462,15 +535,17 @@ public:
     PYBIND11_TYPE_CASTER(T, _<std::is_integral<T>::value>("int", "float"));
 };
 
-template <> class type_caster<void_type> {
+template<typename T> struct void_caster {
 public:
     bool load(handle, bool) { return false; }
-    static handle cast(void_type, return_value_policy /* policy */, handle /* parent */) {
+    static handle cast(T, return_value_policy /* policy */, handle /* parent */) {
         return none().inc_ref();
     }
-    PYBIND11_TYPE_CASTER(void_type, _("None"));
+    PYBIND11_TYPE_CASTER(T, _("None"));
 };
 
+template <> class type_caster<void_type> : public void_caster<void_type> {};
+
 template <> class type_caster<void> : public type_caster<void_type> {
 public:
     using type_caster<void_type>::cast;
@@ -484,9 +559,8 @@ public:
         }
 
         /* Check if this is a capsule */
-        capsule c(h, true);
-        if (c.check()) {
-            value = (void *) c;
+        if (isinstance<capsule>(h)) {
+            value = reinterpret_borrow<capsule>(h);
             return true;
         }
 
@@ -538,7 +612,7 @@ public:
         if (!src) {
             return false;
         } else if (PyUnicode_Check(load_src.ptr())) {
-            temp = object(PyUnicode_AsUTF8String(load_src.ptr()), false);
+            temp = reinterpret_steal<object>(PyUnicode_AsUTF8String(load_src.ptr()));
             if (!temp) { PyErr_Clear(); return false; }  // UnicodeEncodeError
             load_src = temp;
         }
@@ -579,7 +653,7 @@ public:
         if (!src) {
             return false;
         } else if (!PyUnicode_Check(load_src.ptr())) {
-            temp = object(PyUnicode_FromObject(load_src.ptr()), false);
+            temp = reinterpret_steal<object>(PyUnicode_FromObject(load_src.ptr()));
             if (!temp) { PyErr_Clear(); return false; }
             load_src = temp;
         }
@@ -588,10 +662,10 @@ public:
 #if PY_MAJOR_VERSION >= 3
         buffer = PyUnicode_AsWideCharString(load_src.ptr(), &length);
 #else
-        temp = object(
-            sizeof(wchar_t) == sizeof(short)
-                ? PyUnicode_AsUTF16String(load_src.ptr())
-                : PyUnicode_AsUTF32String(load_src.ptr()), false);
+        temp = reinterpret_steal<object>(PyUnicode_AsEncodedString(
+            load_src.ptr(), sizeof(wchar_t) == sizeof(short)
+            ? "utf16" : "utf32", nullptr));
+
         if (temp) {
             int err = PYBIND11_BYTES_AS_STRING_AND_SIZE(temp.ptr(), (char **) &buffer, &length);
             if (err == -1) { buffer = nullptr; }  // TypeError
@@ -663,17 +737,17 @@ template <typename T1, typename T2> class type_caster<std::pair<T1, T2>> {
     typedef std::pair<T1, T2> type;
 public:
     bool load(handle src, bool convert) {
-        if (!src)
+        if (!isinstance<sequence>(src))
             return false;
-        else if (!PyTuple_Check(src.ptr()) || PyTuple_Size(src.ptr()) != 2)
+        const auto seq = reinterpret_borrow<sequence>(src);
+        if (seq.size() != 2)
             return false;
-        return  first.load(PyTuple_GET_ITEM(src.ptr(), 0), convert) &&
-               second.load(PyTuple_GET_ITEM(src.ptr(), 1), convert);
+        return first.load(seq[0], convert) && second.load(seq[1], convert);
     }
 
     static handle cast(const type &src, return_value_policy policy, handle parent) {
-        object o1 = object(make_caster<T1>::cast(src.first, policy, parent), false);
-        object o2 = object(make_caster<T2>::cast(src.second, policy, parent), false);
+        auto o1 = reinterpret_steal<object>(make_caster<T1>::cast(src.first, policy, parent));
+        auto o2 = reinterpret_steal<object>(make_caster<T2>::cast(src.second, policy, parent));
         if (!o1 || !o2)
             return handle();
         tuple result(2);
@@ -691,8 +765,7 @@ public:
     template <typename T> using cast_op_type = type;
 
     operator type() {
-        return type(first.operator typename make_caster<T1>::template cast_op_type<T1>(),
-                    second.operator typename make_caster<T2>::template cast_op_type<T2>());
+        return type(cast_op<T1>(first), cast_op<T2>(second));
     }
 protected:
     make_caster<T1> first;
@@ -700,95 +773,54 @@ protected:
 };
 
 template <typename... Tuple> class type_caster<std::tuple<Tuple...>> {
-    typedef std::tuple<Tuple...> type;
-    typedef std::tuple<intrinsic_t<Tuple>...> itype;
-    typedef std::tuple<args> args_type;
-    typedef std::tuple<args, kwargs> args_kwargs_type;
-public:
-    enum { size = sizeof...(Tuple) };
-
-    static constexpr const bool has_kwargs = std::is_same<itype, args_kwargs_type>::value;
-    static constexpr const bool has_args = has_kwargs || std::is_same<itype, args_type>::value;
+    using type = std::tuple<Tuple...>;
+    using indices = make_index_sequence<sizeof...(Tuple)>;
+    static constexpr auto size = sizeof...(Tuple);
 
+public:
     bool load(handle src, bool convert) {
-        if (!src || !PyTuple_Check(src.ptr()) || PyTuple_GET_SIZE(src.ptr()) != size)
+        if (!isinstance<sequence>(src))
             return false;
-        return load(src, convert, typename make_index_sequence<sizeof...(Tuple)>::type());
-    }
-
-    template <typename T = itype, enable_if_t<
-        !std::is_same<T, args_type>::value &&
-        !std::is_same<T, args_kwargs_type>::value, int> = 0>
-    bool load_args(handle args, handle, bool convert) {
-        return load(args, convert, typename make_index_sequence<sizeof...(Tuple)>::type());
-    }
-
-    template <typename T = itype, enable_if_t<std::is_same<T, args_type>::value, int> = 0>
-    bool load_args(handle args, handle, bool convert) {
-        std::get<0>(value).load(args, convert);
-        return true;
-    }
-
-    template <typename T = itype, enable_if_t<std::is_same<T, args_kwargs_type>::value, int> = 0>
-    bool load_args(handle args, handle kwargs, bool convert) {
-        std::get<0>(value).load(args, convert);
-        std::get<1>(value).load(kwargs, convert);
-        return true;
+        const auto seq = reinterpret_borrow<sequence>(src);
+        if (seq.size() != size)
+            return false;
+        return load_impl(seq, convert, indices{});
     }
 
     static handle cast(const type &src, return_value_policy policy, handle parent) {
-        return cast(src, policy, parent, typename make_index_sequence<size>::type());
-    }
-
-    static PYBIND11_DESCR element_names() {
-        return detail::concat(make_caster<Tuple>::name()...);
+        return cast_impl(src, policy, parent, indices{});
     }
 
     static PYBIND11_DESCR name() {
-        return type_descr(_("Tuple[") + element_names() + _("]"));
-    }
-
-    template <typename ReturnValue, typename Func> enable_if_t<!std::is_void<ReturnValue>::value, ReturnValue> call(Func &&f) {
-        return call<ReturnValue>(std::forward<Func>(f), typename make_index_sequence<sizeof...(Tuple)>::type());
-    }
-
-    template <typename ReturnValue, typename Func> enable_if_t<std::is_void<ReturnValue>::value, void_type> call(Func &&f) {
-        call<ReturnValue>(std::forward<Func>(f), typename make_index_sequence<sizeof...(Tuple)>::type());
-        return void_type();
+        return type_descr(_("Tuple[") + detail::concat(make_caster<Tuple>::name()...) + _("]"));
     }
 
     template <typename T> using cast_op_type = type;
 
-    operator type() {
-        return cast(typename make_index_sequence<sizeof...(Tuple)>::type());
-    }
+    operator type() { return implicit_cast(indices{}); }
 
 protected:
-    template <typename ReturnValue, typename Func, size_t ... Index> ReturnValue call(Func &&f, index_sequence<Index...>) {
-        return f(std::get<Index>(value)
-            .operator typename make_caster<Tuple>::template cast_op_type<Tuple>()...);
-    }
+    template <size_t... Is>
+    type implicit_cast(index_sequence<Is...>) { return type(cast_op<Tuple>(std::get<Is>(value))...); }
 
-    template <size_t ... Index> type cast(index_sequence<Index...>) {
-        return type(std::get<Index>(value)
-            .operator typename make_caster<Tuple>::template cast_op_type<Tuple>()...);
-    }
+    static constexpr bool load_impl(const sequence &, bool, index_sequence<>) { return true; }
 
-    template <size_t ... Indices> bool load(handle src, bool convert, index_sequence<Indices...>) {
-        std::array<bool, size> success {{
-            std::get<Indices>(value).load(PyTuple_GET_ITEM(src.ptr(), Indices), convert)...
-        }};
-        (void) convert; /* avoid a warning when the tuple is empty */
-        for (bool r : success)
+    template <size_t... Is>
+    bool load_impl(const sequence &seq, bool convert, index_sequence<Is...>) {
+        for (bool r : {std::get<Is>(value).load(seq[Is], convert)...})
             if (!r)
                 return false;
         return true;
     }
 
+    static handle cast_impl(const type &, return_value_policy, handle,
+                            index_sequence<>) { return tuple().release(); }
+
     /* Implementation: Convert a C++ tuple into a Python tuple */
-    template <size_t ... Indices> static handle cast(const type &src, return_value_policy policy, handle parent, index_sequence<Indices...>) {
+    template <size_t... Is>
+    static handle cast_impl(const type &src, return_value_policy policy, handle parent, index_sequence<Is...>) {
         std::array<object, size> entries {{
-            object(make_caster<Tuple>::cast(std::get<Indices>(src), policy, parent), false)...
+            reinterpret_steal<object>(make_caster<Tuple>::cast(std::get<Is>(src), policy, parent))...
         }};
         for (const auto &entry: entries)
             if (!entry)
@@ -800,7 +832,6 @@ protected:
         return result.release();
     }
 
-protected:
     std::tuple<make_caster<Tuple>...> value;
 };
 
@@ -828,26 +859,18 @@ public:
 
         if (typeinfo->simple_type) { /* Case 1: no multiple inheritance etc. involved */
             /* Check if we can safely perform a reinterpret-style cast */
-            if (PyType_IsSubtype(tobj, typeinfo->type)) {
-                auto inst = (instance<type, holder_type> *) src.ptr();
-                value = (void *) inst->value;
-                holder = inst->holder;
-                return true;
-            }
+            if (PyType_IsSubtype(tobj, typeinfo->type))
+                return load_value_and_holder(src);
         } else { /* Case 2: multiple inheritance */
             /* Check if we can safely perform a reinterpret-style cast */
-            if (tobj == typeinfo->type) {
-                auto inst = (instance<type, holder_type> *) src.ptr();
-                value = (void *) inst->value;
-                holder = inst->holder;
-                return true;
-            }
+            if (tobj == typeinfo->type)
+                return load_value_and_holder(src);
 
             /* If this is a python class, also check the parents recursively */
             auto const &type_dict = get_internals().registered_types_py;
-            bool new_style_class = PyType_Check(tobj);
+            bool new_style_class = PyType_Check((PyObject *) tobj);
             if (type_dict.find(tobj) == type_dict.end() && new_style_class && tobj->tp_bases) {
-                tuple parents(tobj->tp_bases, true);
+                auto parents = reinterpret_borrow<tuple>(tobj->tp_bases);
                 for (handle parent : parents) {
                     bool result = load(src, convert, (PyTypeObject *) parent.ptr());
                     if (result)
@@ -861,7 +884,7 @@ public:
 
         if (convert) {
             for (auto &converter : typeinfo->implicit_conversions) {
-                temp = object(converter(src.ptr(), typeinfo->type), false);
+                temp = reinterpret_steal<object>(converter(src.ptr(), typeinfo->type));
                 if (load(temp, false))
                     return true;
             }
@@ -870,6 +893,22 @@ public:
         return false;
     }
 
+    bool load_value_and_holder(handle src) {
+        auto inst = (instance<type, holder_type> *) src.ptr();
+        value = (void *) inst->value;
+        if (inst->holder_constructed) {
+            holder = inst->holder;
+            return true;
+        } else {
+            throw cast_error("Unable to cast from non-held to held instance (T& to Holder<T>) "
+#if defined(NDEBUG)
+                             "(compile in debug mode for type information)");
+#else
+                             "of type '" + type_id<holder_type>() + "''");
+#endif
+        }
+    }
+
     template <typename T = holder_type, detail::enable_if_t<!std::is_constructible<T, const T &, type*>::value, int> = 0>
     bool try_implicit_casts(handle, bool) { return false; }
 
@@ -913,10 +952,14 @@ protected:
 template <typename T>
 class type_caster<std::shared_ptr<T>> : public type_caster_holder<T, std::shared_ptr<T>> { };
 
+template <typename T, bool Value = false> struct always_construct_holder { static constexpr bool value = Value; };
+
 /// Create a specialization for custom holder types (silently ignores std::shared_ptr)
-#define PYBIND11_DECLARE_HOLDER_TYPE(type, holder_type) \
+#define PYBIND11_DECLARE_HOLDER_TYPE(type, holder_type, ...) \
     namespace pybind11 { namespace detail { \
     template <typename type> \
+    struct always_construct_holder<holder_type> : always_construct_holder<void, ##__VA_ARGS__>  { }; \
+    template <typename type> \
     class type_caster<holder_type, enable_if_t<!is_shared_ptr<holder_type>::value>> \
         : public type_caster_holder<type, holder_type> { }; \
     }}
@@ -934,13 +977,17 @@ template <> struct handle_type_name<args> { static PYBIND11_DESCR name() { retur
 template <> struct handle_type_name<kwargs> { static PYBIND11_DESCR name() { return _("**kwargs"); } };
 
 template <typename type>
-struct type_caster<type, enable_if_t<is_pyobject<type>::value>> {
-public:
-    template <typename T = type, enable_if_t<!std::is_base_of<object, T>::value, int> = 0>
-    bool load(handle src, bool /* convert */) { value = type(src); return value.check(); }
+struct pyobject_caster {
+    template <typename T = type, enable_if_t<std::is_same<T, handle>::value, int> = 0>
+    bool load(handle src, bool /* convert */) { value = src; return static_cast<bool>(value); }
 
     template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
-    bool load(handle src, bool /* convert */) { value = type(src, true); return value.check(); }
+    bool load(handle src, bool /* convert */) {
+        if (!isinstance<type>(src))
+            return false;
+        value = reinterpret_borrow<type>(src);
+        return true;
+    }
 
     static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) {
         return src.inc_ref();
@@ -948,6 +995,9 @@ public:
     PYBIND11_TYPE_CASTER(type, handle_type_name<type>::name());
 };
 
+template <typename T>
+class type_caster<T, enable_if_t<is_pyobject<T>::value>> : public pyobject_caster<T> { };
+
 // Our conditions for enabling moving are quite restrictive:
 // At compile time:
 // - T needs to be a non-const, non-pointer, non-reference type
@@ -957,23 +1007,24 @@ public:
 // - if the type is non-copy-constructible, the object must be the sole owner of the type (i.e. it
 //   must have ref_count() == 1)h
 // If any of the above are not satisfied, we fall back to copying.
-template <typename T, typename SFINAE = void> struct move_is_plain_type : std::false_type {};
-template <typename T> struct move_is_plain_type<T, enable_if_t<
-        !std::is_void<T>::value && !std::is_pointer<T>::value && !std::is_reference<T>::value && !std::is_const<T>::value
-    >> : std::true_type { };
+template <typename T> using move_is_plain_type = none_of<
+    std::is_void<T>, std::is_pointer<T>, std::is_reference<T>, std::is_const<T>
+>;
 template <typename T, typename SFINAE = void> struct move_always : std::false_type {};
-template <typename T> struct move_always<T, enable_if_t<
-        move_is_plain_type<T>::value &&
-        !std::is_copy_constructible<T>::value && std::is_move_constructible<T>::value &&
-        std::is_same<decltype(std::declval<type_caster<T>>().operator T&()), T&>::value
-    >> : std::true_type { };
+template <typename T> struct move_always<T, enable_if_t<all_of<
+    move_is_plain_type<T>,
+    negation<std::is_copy_constructible<T>>,
+    std::is_move_constructible<T>,
+    std::is_same<decltype(std::declval<make_caster<T>>().operator T&()), T&>
+>::value>> : std::true_type {};
 template <typename T, typename SFINAE = void> struct move_if_unreferenced : std::false_type {};
-template <typename T> struct move_if_unreferenced<T, enable_if_t<
-        move_is_plain_type<T>::value &&
-        !move_always<T>::value && std::is_move_constructible<T>::value &&
-        std::is_same<decltype(std::declval<type_caster<T>>().operator T&()), T&>::value
-    >> : std::true_type { };
-template <typename T> using move_never = std::integral_constant<bool, !move_always<T>::value && !move_if_unreferenced<T>::value>;
+template <typename T> struct move_if_unreferenced<T, enable_if_t<all_of<
+    move_is_plain_type<T>,
+    negation<move_always<T>>,
+    std::is_move_constructible<T>,
+    std::is_same<decltype(std::declval<make_caster<T>>().operator T&()), T&>
+>::value>> : std::true_type {};
+template <typename T> using move_never = none_of<move_always<T>, move_if_unreferenced<T>>;
 
 // Detect whether returning a `type` from a cast on type's type_caster is going to result in a
 // reference or pointer to a local variable of the type_caster.  Basically, only
@@ -991,7 +1042,7 @@ template <typename T, typename SFINAE> type_caster<T, SFINAE> &load_type(type_ca
         throw cast_error("Unable to cast Python instance to C++ type (compile in debug mode for details)");
 #else
         throw cast_error("Unable to cast Python instance of type " +
-            (std::string) handle.get_type().str() + " to C++ type '" + type_id<T>() + "''");
+            (std::string) str(handle.get_type()) + " to C++ type '" + type_id<T>() + "''");
 #endif
     }
     return conv;
@@ -1005,34 +1056,41 @@ template <typename T> make_caster<T> load_type(const handle &handle) {
 
 NAMESPACE_END(detail)
 
-template <typename T> T cast(const handle &handle) {
-    static_assert(!detail::cast_is_temporary_value_reference<T>::value,
+// pytype -> C++ type
+template <typename T, detail::enable_if_t<!detail::is_pyobject<T>::value, int> = 0>
+T cast(const handle &handle) {
+    using namespace detail;
+    static_assert(!cast_is_temporary_value_reference<T>::value,
             "Unable to cast type to reference: value is local to type caster");
-    using type_caster = detail::make_caster<T>;
-    return detail::load_type<T>(handle).operator typename type_caster::template cast_op_type<T>();
+    return cast_op<T>(load_type<T>(handle));
 }
 
-template <typename T> object cast(const T &value,
-        return_value_policy policy = return_value_policy::automatic_reference,
-        handle parent = handle()) {
+// pytype -> pytype (calls converting constructor)
+template <typename T, detail::enable_if_t<detail::is_pyobject<T>::value, int> = 0>
+T cast(const handle &handle) { return T(reinterpret_borrow<object>(handle)); }
+
+// C++ type -> py::object
+template <typename T, detail::enable_if_t<!detail::is_pyobject<T>::value, int> = 0>
+object cast(const T &value, return_value_policy policy = return_value_policy::automatic_reference,
+            handle parent = handle()) {
     if (policy == return_value_policy::automatic)
         policy = std::is_pointer<T>::value ? return_value_policy::take_ownership : return_value_policy::copy;
     else if (policy == return_value_policy::automatic_reference)
         policy = std::is_pointer<T>::value ? return_value_policy::reference : return_value_policy::copy;
-    return object(detail::make_caster<T>::cast(value, policy, parent), false);
+    return reinterpret_steal<object>(detail::make_caster<T>::cast(value, policy, parent));
 }
 
 template <typename T> T handle::cast() const { return pybind11::cast<T>(*this); }
 template <> inline void handle::cast() const { return; }
 
 template <typename T>
-detail::enable_if_t<detail::move_always<T>::value || detail::move_if_unreferenced<T>::value, T> move(object &&obj) {
+detail::enable_if_t<!detail::move_never<T>::value, T> move(object &&obj) {
     if (obj.ref_count() > 1)
 #if defined(NDEBUG)
         throw cast_error("Unable to cast Python instance to C++ rvalue: instance has multiple references"
             " (compile in debug mode for details)");
 #else
-        throw cast_error("Unable to move from Python " + (std::string) obj.get_type().str() +
+        throw cast_error("Unable to move from Python " + (std::string) str(obj.get_type()) +
                 " instance to C++ " + type_id<T>() + " instance: instance has multiple references");
 #endif
 
@@ -1066,6 +1124,10 @@ template <> inline void object::cast() && { return; }
 
 NAMESPACE_BEGIN(detail)
 
+// Declared in pytypes.h:
+template <typename T, enable_if_t<!is_pyobject<T>::value, int>>
+object object_or_cast(T &&o) { return pybind11::cast(std::forward<T>(o)); }
+
 struct overload_unused {}; // Placeholder type for the unneeded (and dead code) static variable in the OVERLOAD_INT macro
 template <typename ret_type> using overload_caster_t = conditional_t<
     cast_is_temporary_value_reference<ret_type>::value, make_caster<ret_type>, overload_unused>;
@@ -1073,7 +1135,7 @@ template <typename ret_type> using overload_caster_t = conditional_t<
 // Trampoline use: for reference/pointer types to value-converted values, we do a value cast, then
 // store the result in the given variable.  For other types, this is a no-op.
 template <typename T> enable_if_t<cast_is_temporary_value_reference<T>::value, T> cast_ref(object &&o, make_caster<T> &caster) {
-    return load_type(caster, o).operator typename make_caster<T>::template cast_op_type<T>();
+    return cast_op<T>(load_type(caster, o));
 }
 template <typename T> enable_if_t<!cast_is_temporary_value_reference<T>::value, T> cast_ref(object &&, overload_unused &) {
     pybind11_fail("Internal error: cast_ref fallback invoked"); }
@@ -1093,8 +1155,8 @@ template <return_value_policy policy = return_value_policy::automatic_reference,
           typename... Args> tuple make_tuple(Args&&... args_) {
     const size_t size = sizeof...(Args);
     std::array<object, size> args {
-        { object(detail::make_caster<Args>::cast(
-            std::forward<Args>(args_), policy, nullptr), false)... }
+        { reinterpret_steal<object>(detail::make_caster<Args>::cast(
+            std::forward<Args>(args_), policy, nullptr))... }
     };
     for (auto &arg_value : args) {
         if (!arg_value) {
@@ -1126,7 +1188,9 @@ struct arg_v : arg {
     template <typename T>
     arg_v(const char *name, T &&x, const char *descr = nullptr)
         : arg(name),
-          value(detail::make_caster<T>::cast(x, return_value_policy::automatic, handle()), false),
+          value(reinterpret_steal<object>(
+              detail::make_caster<T>::cast(x, return_value_policy::automatic, {})
+          )),
           descr(descr)
 #if !defined(NDEBUG)
         , type(type_id<T>())
@@ -1152,6 +1216,70 @@ constexpr arg operator"" _a(const char *name, size_t) { return arg(name); }
 }
 
 NAMESPACE_BEGIN(detail)
+
+/// Helper class which loads arguments for C++ functions called from Python
+template <typename... Args>
+class argument_loader {
+    using itypes = type_list<intrinsic_t<Args>...>;
+    using indices = make_index_sequence<sizeof...(Args)>;
+
+public:
+    argument_loader() : value() {} // Helps gcc-7 properly initialize value
+
+    static constexpr auto has_kwargs = std::is_same<itypes, type_list<args, kwargs>>::value;
+    static constexpr auto has_args = has_kwargs || std::is_same<itypes, type_list<args>>::value;
+
+    static PYBIND11_DESCR arg_names() { return detail::concat(make_caster<Args>::name()...); }
+
+    bool load_args(handle args, handle kwargs) {
+        return load_impl(args, kwargs, itypes{});
+    }
+
+    template <typename Return, typename Func>
+    enable_if_t<!std::is_void<Return>::value, Return> call(Func &&f) {
+        return call_impl<Return>(std::forward<Func>(f), indices{});
+    }
+
+    template <typename Return, typename Func>
+    enable_if_t<std::is_void<Return>::value, void_type> call(Func &&f) {
+        call_impl<Return>(std::forward<Func>(f), indices{});
+        return void_type();
+    }
+
+private:
+    bool load_impl(handle args_, handle, type_list<args>) {
+        std::get<0>(value).load(args_, true);
+        return true;
+    }
+
+    bool load_impl(handle args_, handle kwargs_, type_list<args, kwargs>) {
+        std::get<0>(value).load(args_, true);
+        std::get<1>(value).load(kwargs_, true);
+        return true;
+    }
+
+    bool load_impl(handle args, handle, ... /* anything else */) {
+        return load_impl_sequence(args, indices{});
+    }
+
+    static bool load_impl_sequence(handle, index_sequence<>) { return true; }
+
+    template <size_t... Is>
+    bool load_impl_sequence(handle src, index_sequence<Is...>) {
+        for (bool r : {std::get<Is>(value).load(PyTuple_GET_ITEM(src.ptr(), Is), true)...})
+            if (!r)
+                return false;
+        return true;
+    }
+
+    template <typename Return, typename Func, size_t... Is>
+    Return call_impl(Func &&f, index_sequence<Is...>) {
+        return std::forward<Func>(f)(cast_op<Args>(std::get<Is>(value))...);
+    }
+
+    std::tuple<make_caster<Args>...> value;
+};
+
 NAMESPACE_BEGIN(constexpr_impl)
 /// Implementation details for constexpr functions
 constexpr int first(int i) { return i; }
@@ -1187,10 +1315,10 @@ public:
 
     /// Call a Python function and pass the collected arguments
     object call(PyObject *ptr) const {
-        auto result = object(PyObject_CallObject(ptr, m_args.ptr()), false);
+        PyObject *result = PyObject_CallObject(ptr, m_args.ptr());
         if (!result)
             throw error_already_set();
-        return result;
+        return reinterpret_steal<object>(result);
     }
 
 private:
@@ -1209,7 +1337,7 @@ public:
         int _[] = { 0, (process(args_list, std::forward<Ts>(values)), 0)... };
         ignore_unused(_);
 
-        m_args = object(PyList_AsTuple(args_list.ptr()), false);
+        m_args = std::move(args_list);
     }
 
     const tuple &args() const & { return m_args; }
@@ -1220,16 +1348,16 @@ public:
 
     /// Call a Python function and pass the collected arguments
     object call(PyObject *ptr) const {
-        auto result = object(PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr()), false);
+        PyObject *result = PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr());
         if (!result)
             throw error_already_set();
-        return result;
+        return reinterpret_steal<object>(result);
     }
 
 private:
     template <typename T>
     void process(list &args_list, T &&x) {
-        auto o = object(detail::make_caster<T>::cast(std::forward<T>(x), policy, nullptr), false);
+        auto o = reinterpret_steal<object>(detail::make_caster<T>::cast(std::forward<T>(x), policy, {}));
         if (!o) {
 #if defined(NDEBUG)
             argument_cast_error();
@@ -1266,12 +1394,12 @@ private:
     void process(list &/*args_list*/, detail::kwargs_proxy kp) {
         if (!kp)
             return;
-        for (const auto &k : dict(kp, true)) {
+        for (const auto &k : reinterpret_borrow<dict>(kp)) {
             if (m_kwargs.contains(k.first)) {
 #if defined(NDEBUG)
                 multiple_values_error();
 #else
-                multiple_values_error(k.first.str());
+                multiple_values_error(str(k.first));
 #endif
             }
             m_kwargs[k.first] = k.second;
@@ -1304,14 +1432,14 @@ private:
 
 /// Collect only positional arguments for a Python function call
 template <return_value_policy policy, typename... Args,
-          typename = enable_if_t<all_of_t<is_positional, Args...>::value>>
+          typename = enable_if_t<all_of<is_positional<Args>...>::value>>
 simple_collector<policy> collect_arguments(Args &&...args) {
     return simple_collector<policy>(std::forward<Args>(args)...);
 }
 
 /// Collect all arguments, including keywords and unpacking (only instantiated when needed)
 template <return_value_policy policy, typename... Args,
-          typename = enable_if_t<!all_of_t<is_positional, Args...>::value>>
+          typename = enable_if_t<!all_of<is_positional<Args>...>::value>>
 unpacking_collector<policy> collect_arguments(Args &&...args) {
     // Following argument order rules for generalized unpacking according to PEP 448
     static_assert(
diff --git a/pybind11/include/pybind11/common.h b/pybind11/include/pybind11/common.h
index 6f79f91ba..c0c71b131 100644
--- a/pybind11/include/pybind11/common.h
+++ b/pybind11/include/pybind11/common.h
@@ -16,6 +16,18 @@
 #  define NAMESPACE_END(name) }
 #endif
 
+// Neither MSVC nor Intel support enough of C++14 yet (in particular, as of MSVC 2015 and ICC 17
+// beta, neither support extended constexpr, which we rely on in descr.h), so don't enable pybind
+// CPP14 features for them.
+#if !defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+#  if __cplusplus >= 201402L
+#    define PYBIND11_CPP14
+#    if __cplusplus > 201402L /* Temporary: should be updated to >= the final C++17 value once known */
+#      define PYBIND11_CPP17
+#    endif
+#  endif
+#endif
+
 #if !defined(PYBIND11_EXPORT)
 #  if defined(WIN32) || defined(_WIN32)
 #    define PYBIND11_EXPORT __declspec(dllexport)
@@ -30,7 +42,7 @@
 #  define PYBIND11_NOINLINE __attribute__ ((noinline))
 #endif
 
-#if __cplusplus > 201103L
+#if defined(PYBIND11_CPP14)
 #  define PYBIND11_DEPRECATED(reason) [[deprecated(reason)]]
 #elif defined(__clang__)
 #  define PYBIND11_DEPRECATED(reason) __attribute__((deprecated(reason)))
@@ -40,9 +52,9 @@
 #  define PYBIND11_DEPRECATED(reason) __declspec(deprecated)
 #endif
 
-#define PYBIND11_VERSION_MAJOR 1
-#define PYBIND11_VERSION_MINOR 9
-#define PYBIND11_VERSION_PATCH dev0
+#define PYBIND11_VERSION_MAJOR 2
+#define PYBIND11_VERSION_MINOR 0
+#define PYBIND11_VERSION_PATCH 1
 
 /// Include Python header, disable linking to pythonX_d.lib on Windows in debug mode
 #if defined(_MSC_VER)
@@ -81,6 +93,7 @@
 #  pragma warning(pop)
 #endif
 
+#include <cstddef>
 #include <forward_list>
 #include <vector>
 #include <string>
@@ -98,7 +111,6 @@
 #define PYBIND11_BYTES_FROM_STRING_AND_SIZE PyBytes_FromStringAndSize
 #define PYBIND11_BYTES_AS_STRING_AND_SIZE PyBytes_AsStringAndSize
 #define PYBIND11_BYTES_AS_STRING PyBytes_AsString
-#define PYBIND11_BYTES_CHECK PyBytes_Check
 #define PYBIND11_LONG_CHECK(o) PyLong_Check(o)
 #define PYBIND11_LONG_AS_LONGLONG(o) PyLong_AsLongLong(o)
 #define PYBIND11_LONG_AS_UNSIGNED_LONGLONG(o) PyLong_AsUnsignedLongLong(o)
@@ -117,7 +129,6 @@
 #define PYBIND11_BYTES_FROM_STRING_AND_SIZE PyString_FromStringAndSize
 #define PYBIND11_BYTES_AS_STRING_AND_SIZE PyString_AsStringAndSize
 #define PYBIND11_BYTES_AS_STRING PyString_AsString
-#define PYBIND11_BYTES_CHECK PyString_Check
 #define PYBIND11_LONG_CHECK(o) (PyInt_Check(o) || PyLong_Check(o))
 #define PYBIND11_LONG_AS_LONGLONG(o) (PyInt_Check(o) ? (long long) PyLong_AsLong(o) : PyLong_AsLongLong(o))
 #define PYBIND11_LONG_AS_UNSIGNED_LONGLONG(o) (PyInt_Check(o) ? (unsigned long long) PyLong_AsUnsignedLong(o) : PyLong_AsUnsignedLongLong(o))
@@ -168,9 +179,21 @@ extern "C" {
     }                                                                          \
     PyObject *pybind11_init()
 
+// Function return value and argument type deduction support.  When compiling under C++17 these
+// differ as C++17 makes the noexcept specifier part of the function type, while it is not part of
+// the type under earlier standards.
+#ifdef __cpp_noexcept_function_type
+#  define PYBIND11_NOEXCEPT_TPL_ARG , bool NoExceptions
+#  define PYBIND11_NOEXCEPT_SPECIFIER noexcept(NoExceptions)
+#else
+#  define PYBIND11_NOEXCEPT_TPL_ARG
+#  define PYBIND11_NOEXCEPT_SPECIFIER
+#endif
+
 NAMESPACE_BEGIN(pybind11)
 
-typedef Py_ssize_t ssize_t;
+using ssize_t = Py_ssize_t;
+using size_t  = std::size_t;
 
 /// Approach used to cast a previously unknown C++ instance into a Python object
 enum class return_value_policy : uint8_t {
@@ -299,7 +322,7 @@ template <typename type> struct instance_essentials {
     type *value;
     PyObject *weakrefs;
     bool owned : 1;
-    bool constructed : 1;
+    bool holder_constructed : 1;
 };
 
 /// PyObject wrapper around generic types, includes a special holder type that is responsible for lifetime management
@@ -308,7 +331,7 @@ template <typename type, typename holder_type = std::unique_ptr<type>> struct in
 };
 
 struct overload_hash {
-    inline std::size_t operator()(const std::pair<const PyObject *, const char *>& v) const {
+    inline size_t operator()(const std::pair<const PyObject *, const char *>& v) const {
         size_t value = std::hash<const void *>()(v.first);
         value ^= std::hash<const void *>()(v.second)  + 0x9e3779b9 + (value<<6) + (value>>2);
         return value;
@@ -321,7 +344,9 @@ struct internals {
     std::unordered_map<const void *, void*> registered_types_py;       // PyTypeObject* -> type_info
     std::unordered_multimap<const void *, void*> registered_instances; // void * -> PyObject*
     std::unordered_set<std::pair<const PyObject *, const char *>, overload_hash> inactive_overload_cache;
+    std::unordered_map<std::type_index, std::vector<bool (*)(PyObject *, void *&)>> direct_conversions;
     std::forward_list<void (*) (std::exception_ptr)> registered_exception_translators;
+    std::unordered_map<std::string, void *> shared_data; // Custom data to be shared across extensions
 #if defined(WITH_THREAD)
     decltype(PyThread_create_key()) tstate = 0; // Usually an int but a long on Cygwin64 with Python 3.x
     PyInterpreterState *istate = nullptr;
@@ -331,10 +356,51 @@ struct internals {
 /// Return a reference to the current 'internals' information
 inline internals &get_internals();
 
-/// Index sequence for convenient template metaprogramming involving tuples
+/// from __cpp_future__ import (convenient aliases from C++14/17)
+#ifdef PYBIND11_CPP14
+using std::enable_if_t;
+using std::conditional_t;
+#else
+template <bool B, typename T = void> using enable_if_t = typename std::enable_if<B, T>::type;
+template <bool B, typename T, typename F> using conditional_t = typename std::conditional<B, T, F>::type;
+#endif
+
+/// Index sequences
+#if defined(PYBIND11_CPP14) || defined(_MSC_VER)
+using std::index_sequence;
+using std::make_index_sequence;
+#else
 template<size_t ...> struct index_sequence  { };
-template<size_t N, size_t ...S> struct make_index_sequence : make_index_sequence <N - 1, N - 1, S...> { };
-template<size_t ...S> struct make_index_sequence <0, S...> { typedef index_sequence<S...> type; };
+template<size_t N, size_t ...S> struct make_index_sequence_impl : make_index_sequence_impl <N - 1, N - 1, S...> { };
+template<size_t ...S> struct make_index_sequence_impl <0, S...> { typedef index_sequence<S...> type; };
+template<size_t N> using make_index_sequence = typename make_index_sequence_impl<N>::type;
+#endif
+
+#if defined(PYBIND11_CPP17) || defined(_MSC_VER)
+using std::bool_constant;
+using std::negation;
+#else
+template <bool B> using bool_constant = std::integral_constant<bool, B>;
+template <class T> using negation = bool_constant<!T::value>;
+#endif
+
+/// Compile-time all/any/none of that check the ::value of all template types
+#ifdef PYBIND11_CPP17
+template <class... Ts> using all_of = bool_constant<(Ts::value && ...)>;
+template <class... Ts> using any_of = bool_constant<(Ts::value || ...)>;
+#elif !defined(_MSC_VER)
+template <bool...> struct bools {};
+template <class... Ts> using all_of = std::is_same<
+    bools<Ts::value..., true>,
+    bools<true, Ts::value...>>;
+template <class... Ts> using any_of = negation<all_of<negation<Ts>...>>;
+#else
+// MSVC has trouble with the above, but supports std::conjunction, which we can use instead (albeit
+// at a slight loss of compilation efficiency).
+template <class... Ts> using all_of = std::conjunction<Ts...>;
+template <class... Ts> using any_of = std::disjunction<Ts...>;
+#endif
+template <class... Ts> using none_of = negation<any_of<Ts...>>;
 
 /// Strip the class from a method type
 template <typename T> struct remove_class { };
@@ -354,35 +420,14 @@ template <typename T> using intrinsic_t = typename intrinsic_type<T>::type;
 /// Helper type to replace 'void' in some expressions
 struct void_type { };
 
-/// from __cpp_future__ import (convenient aliases from C++14/17)
-template <bool B> using bool_constant = std::integral_constant<bool, B>;
-template <class T> using negation = bool_constant<!T::value>;
-template <bool B, typename T = void> using enable_if_t = typename std::enable_if<B, T>::type;
-template <bool B, typename T, typename F> using conditional_t = typename std::conditional<B, T, F>::type;
+/// Helper template which holds a list of types
+template <typename...> struct type_list { };
 
 /// Compile-time integer sum
 constexpr size_t constexpr_sum() { return 0; }
 template <typename T, typename... Ts>
 constexpr size_t constexpr_sum(T n, Ts... ns) { return size_t{n} + constexpr_sum(ns...); }
 
-// Counts the number of types in the template parameter pack matching the predicate
-#if !defined(_MSC_VER)
-template <template<typename> class Predicate, typename... Ts>
-using count_t = std::integral_constant<size_t, constexpr_sum(Predicate<Ts>::value...)>;
-#else
-// MSVC workaround (2015 Update 3 has issues with some member type aliases and constexpr)
-template <template<typename> class Predicate, typename... Ts> struct count_t;
-template <template<typename> class Predicate> struct count_t<Predicate> : std::integral_constant<size_t, 0> {};
-template <template<typename> class Predicate, class T, class... Ts>
-struct count_t<Predicate, T, Ts...> : std::integral_constant<size_t, Predicate<T>::value + count_t<Predicate, Ts...>::value> {};
-#endif
-
-/// Return true if all/any Ts satify Predicate<T>
-template <template<typename> class Predicate, typename... Ts>
-using all_of_t = bool_constant<(count_t<Predicate, Ts...>::value == sizeof...(Ts))>;
-template <template<typename> class Predicate, typename... Ts>
-using any_of_t = bool_constant<(count_t<Predicate, Ts...>::value > 0)>;
-
 // Extracts the first type from the template parameter pack matching the predicate, or Default if none match.
 template <template<class> class Predicate, class Default, class... Ts> struct first_of;
 template <template<class> class Predicate, class Default> struct first_of<Predicate, Default> {
@@ -426,13 +471,51 @@ inline void ignore_unused(const int *) { }
 
 NAMESPACE_END(detail)
 
+/// Returns a named pointer that is shared among all extension modules (using the same
+/// pybind11 version) running in the current interpreter. Names starting with underscores
+/// are reserved for internal usage. Returns `nullptr` if no matching entry was found.
+inline PYBIND11_NOINLINE void* get_shared_data(const std::string& name) {
+    auto& internals = detail::get_internals();
+    auto it = internals.shared_data.find(name);
+    return it != internals.shared_data.end() ? it->second : nullptr;
+}
+
+/// Set the shared data that can be later recovered by `get_shared_data()`.
+inline PYBIND11_NOINLINE void *set_shared_data(const std::string& name, void *data) {
+    detail::get_internals().shared_data[name] = data;
+    return data;
+}
+
+/// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if
+/// such entry exists. Otherwise, a new object of default-constructible type `T` is
+/// added to the shared data under the given name and a reference to it is returned.
+template<typename T> T& get_or_create_shared_data(const std::string& name) {
+    auto& internals = detail::get_internals();
+    auto it = internals.shared_data.find(name);
+    T* ptr = (T*) (it != internals.shared_data.end() ? it->second : nullptr);
+    if (!ptr) {
+        ptr = new T();
+        internals.shared_data[name] = ptr;
+    }
+    return *ptr;
+}
+
 /// Fetch and hold an error which was already set in Python
 class error_already_set : public std::runtime_error {
 public:
     error_already_set() : std::runtime_error(detail::error_string()) {
         PyErr_Fetch(&type, &value, &trace);
     }
-    ~error_already_set() { Py_XDECREF(type); Py_XDECREF(value); Py_XDECREF(trace); }
+
+    error_already_set(const error_already_set &) = delete;
+
+    error_already_set(error_already_set &&e)
+        : std::runtime_error(e.what()), type(e.type), value(e.value),
+          trace(e.trace) { e.type = e.value = e.trace = nullptr; }
+
+    inline ~error_already_set(); // implementation in pybind11.h
+
+    error_already_set& operator=(const error_already_set &) = delete;
 
     /// Give the error back to Python
     void restore() { PyErr_Restore(type, value, trace); type = value = trace = nullptr; }
@@ -459,7 +542,6 @@ PYBIND11_RUNTIME_EXCEPTION(stop_iteration, PyExc_StopIteration)
 PYBIND11_RUNTIME_EXCEPTION(index_error, PyExc_IndexError)
 PYBIND11_RUNTIME_EXCEPTION(key_error, PyExc_KeyError)
 PYBIND11_RUNTIME_EXCEPTION(value_error, PyExc_ValueError)
-PYBIND11_RUNTIME_EXCEPTION(import_error, PyExc_ImportError)
 PYBIND11_RUNTIME_EXCEPTION(type_error, PyExc_TypeError)
 PYBIND11_RUNTIME_EXCEPTION(cast_error, PyExc_RuntimeError) /// Thrown when pybind11::cast or handle::call fail due to a type casting error
 PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used internally
@@ -497,4 +579,39 @@ PYBIND11_DECL_FMT(bool, "?");
 /// Dummy destructor wrapper that can be used to expose classes with a private destructor
 struct nodelete { template <typename T> void operator()(T*) { } };
 
+// overload_cast requires variable templates: C++14 or MSVC 2015 Update 2
+#if defined(PYBIND11_CPP14) || _MSC_FULL_VER >= 190023918
+#define PYBIND11_OVERLOAD_CAST 1
+
+NAMESPACE_BEGIN(detail)
+template <typename... Args>
+struct overload_cast_impl {
+    template <typename Return /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+    constexpr auto operator()(Return (*pf)(Args...) PYBIND11_NOEXCEPT_SPECIFIER) const noexcept
+                              -> decltype(pf) { return pf; }
+
+    template <typename Return, typename Class /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+    constexpr auto operator()(Return (Class::*pmf)(Args...) PYBIND11_NOEXCEPT_SPECIFIER, std::false_type = {}) const noexcept
+                              -> decltype(pmf) { return pmf; }
+
+    template <typename Return, typename Class /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+    constexpr auto operator()(Return (Class::*pmf)(Args...) const PYBIND11_NOEXCEPT_SPECIFIER, std::true_type) const noexcept
+                              -> decltype(pmf) { return pmf; }
+};
+NAMESPACE_END(detail)
+
+/// Syntax sugar for resolving overloaded function pointers:
+///  - regular: static_cast<Return (Class::*)(Arg0, Arg1, Arg2)>(&Class::func)
+///  - sweet:   overload_cast<Arg0, Arg1, Arg2>(&Class::func)
+template <typename... Args>
+static constexpr detail::overload_cast_impl<Args...> overload_cast = {};
+// MSVC 2015 only accepts this particular initialization syntax for this variable template.
+
+/// Const member function selector for overload_cast
+///  - regular: static_cast<Return (Class::*)(Arg) const>(&Class::func)
+///  - sweet:   overload_cast<Arg>(&Class::func, const_)
+static constexpr auto const_ = std::true_type{};
+
+#endif // overload_cast
+
 NAMESPACE_END(pybind11)
diff --git a/pybind11/include/pybind11/descr.h b/pybind11/include/pybind11/descr.h
index f8a349b1a..2c3fb3d13 100644
--- a/pybind11/include/pybind11/descr.h
+++ b/pybind11/include/pybind11/descr.h
@@ -15,18 +15,6 @@
 NAMESPACE_BEGIN(pybind11)
 NAMESPACE_BEGIN(detail)
 
-#if defined(__INTEL_COMPILER)
-/* C++14 features not supported for now */
-#elif defined(__clang__)
-#  if __has_feature(cxx_return_type_deduction) && __has_feature(cxx_relaxed_constexpr)
-#    define PYBIND11_CPP14
-#  endif
-#elif defined(__GNUG__)
-#  if __cpp_constexpr >= 201304 && __cpp_decltype_auto >= 201304
-#    define PYBIND11_CPP14
-#  endif
-#endif
-
 #if defined(PYBIND11_CPP14) /* Concatenate type signatures at compile time using C++14 */
 
 template <size_t Size1, size_t Size2> class descr {
@@ -34,8 +22,8 @@ template <size_t Size1, size_t Size2> class descr {
 public:
     constexpr descr(char const (&text) [Size1+1], const std::type_info * const (&types)[Size2+1])
         : descr(text, types,
-                typename make_index_sequence<Size1>::type(),
-                typename make_index_sequence<Size2>::type()) { }
+                make_index_sequence<Size1>(),
+                make_index_sequence<Size2>()) { }
 
     constexpr const char *text() const { return m_text; }
     constexpr const std::type_info * const * types() const { return m_types; }
@@ -43,10 +31,10 @@ public:
     template <size_t OtherSize1, size_t OtherSize2>
     constexpr descr<Size1 + OtherSize1, Size2 + OtherSize2> operator+(const descr<OtherSize1, OtherSize2> &other) const {
         return concat(other,
-                      typename make_index_sequence<Size1>::type(),
-                      typename make_index_sequence<Size2>::type(),
-                      typename make_index_sequence<OtherSize1>::type(),
-                      typename make_index_sequence<OtherSize2>::type());
+                      make_index_sequence<Size1>(),
+                      make_index_sequence<Size2>(),
+                      make_index_sequence<OtherSize1>(),
+                      make_index_sequence<OtherSize2>());
     }
 
 protected:
@@ -93,6 +81,10 @@ template <bool B, size_t Size1, size_t Size2>
 constexpr enable_if_t<!B, descr<Size2 - 1, 0>> _(char const(&)[Size1], char const(&text2)[Size2]) {
     return _(text2);
 }
+template <bool B, size_t SizeA1, size_t SizeA2, size_t SizeB1, size_t SizeB2>
+constexpr enable_if_t<B, descr<SizeA1, SizeA2>> _(descr<SizeA1, SizeA2> d, descr<SizeB1, SizeB2>) { return d; }
+template <bool B, size_t SizeA1, size_t SizeA2, size_t SizeB1, size_t SizeB2>
+constexpr enable_if_t<!B, descr<SizeB1, SizeB2>> _(descr<SizeA1, SizeA2>, descr<SizeB1, SizeB2> d) { return d; }
 
 template <size_t Size> auto constexpr _() -> decltype(int_to_str<Size / 10, Size % 10>::digits) {
     return int_to_str<Size / 10, Size % 10>::digits;
@@ -166,6 +158,8 @@ PYBIND11_NOINLINE inline descr _(const char *text) {
 
 template <bool B> PYBIND11_NOINLINE enable_if_t<B, descr> _(const char *text1, const char *) { return _(text1); }
 template <bool B> PYBIND11_NOINLINE enable_if_t<!B, descr> _(char const *, const char *text2) { return _(text2); }
+template <bool B> PYBIND11_NOINLINE enable_if_t<B, descr> _(descr d, descr) { return d; }
+template <bool B> PYBIND11_NOINLINE enable_if_t<!B, descr> _(descr, descr d) { return d; }
 
 template <typename Type> PYBIND11_NOINLINE descr _() {
     const std::type_info *types[2] = { &typeid(Type), nullptr };
diff --git a/pybind11/include/pybind11/eigen.h b/pybind11/include/pybind11/eigen.h
index d8bf41cd0..ff720d5f6 100644
--- a/pybind11/include/pybind11/eigen.h
+++ b/pybind11/include/pybind11/eigen.h
@@ -17,18 +17,17 @@
 #  pragma GCC diagnostic push
 #  pragma GCC diagnostic ignored "-Wconversion"
 #  pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#  if __GNUC__ >= 7
+#    pragma GCC diagnostic ignored "-Wint-in-bool-context"
+#  endif
 #endif
 
 #include <Eigen/Core>
 #include <Eigen/SparseCore>
 
-#if defined(__GNUG__) || defined(__clang__)
-#  pragma GCC diagnostic pop
-#endif
-
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable: 4127) // warning C4127: Conditional expression is constant
+#  pragma warning(push)
+#  pragma warning(disable: 4127) // warning C4127: Conditional expression is constant
 #endif
 
 NAMESPACE_BEGIN(pybind11)
@@ -42,9 +41,10 @@ template <typename T> using is_eigen_ref = is_template_base_of<Eigen::RefBase, T
 // basically covers anything that can be assigned to a dense matrix but that don't have a typical
 // matrix data layout that can be copied from their .data().  For example, DiagonalMatrix and
 // SelfAdjointView fall into this category.
-template <typename T> using is_eigen_base = bool_constant<
-    is_template_base_of<Eigen::EigenBase, T>::value
-    && !is_eigen_dense<T>::value && !is_eigen_sparse<T>::value
+template <typename T> using is_eigen_base = all_of<
+    is_template_base_of<Eigen::EigenBase, T>,
+    negation<is_eigen_dense<T>>,
+    negation<is_eigen_sparse<T>>
 >;
 
 template<typename Type>
@@ -54,8 +54,8 @@ struct type_caster<Type, enable_if_t<is_eigen_dense<Type>::value && !is_eigen_re
     static constexpr bool isVector = Type::IsVectorAtCompileTime;
 
     bool load(handle src, bool) {
-        array_t<Scalar> buf(src, true);
-        if (!buf.check())
+        auto buf = array_t<Scalar>::ensure(src);
+        if (!buf)
             return false;
 
         if (buf.ndim() == 1) {
@@ -137,7 +137,7 @@ struct type_caster<Eigen::Ref<CVDerived, Options, StrideType>> {
 protected:
     using Type = Eigen::Ref<CVDerived, Options, StrideType>;
     using Derived = typename std::remove_const<CVDerived>::type;
-    using DerivedCaster = type_caster<Derived>;
+    using DerivedCaster = make_caster<Derived>;
     DerivedCaster derived_caster;
     std::unique_ptr<Type> value;
 public:
@@ -158,7 +158,7 @@ template <typename Type>
 struct type_caster<Type, enable_if_t<is_eigen_base<Type>::value && !is_eigen_ref<Type>::value>> {
 protected:
     using Matrix = Eigen::Matrix<typename Type::Scalar, Eigen::Dynamic, Eigen::Dynamic>;
-    using MatrixCaster = type_caster<Matrix>;
+    using MatrixCaster = make_caster<Matrix>;
 public:
     [[noreturn]] bool load(handle, bool) { pybind11_fail("Unable to load() into specialized EigenBase object"); }
     static handle cast(const Type &src, return_value_policy policy, handle parent) { return MatrixCaster::cast(Matrix(src), policy, parent); }
@@ -182,7 +182,7 @@ struct type_caster<Type, enable_if_t<is_eigen_sparse<Type>::value>> {
         if (!src)
             return false;
 
-        object obj(src, true);
+        auto obj = reinterpret_borrow<object>(src);
         object sparse_module = module::import("scipy.sparse");
         object matrix_type = sparse_module.attr(
             rowMajor ? "csr_matrix" : "csc_matrix");
@@ -201,7 +201,7 @@ struct type_caster<Type, enable_if_t<is_eigen_sparse<Type>::value>> {
         auto shape = pybind11::tuple((pybind11::object) obj.attr("shape"));
         auto nnz = obj.attr("nnz").cast<Index>();
 
-        if (!values.check() || !innerIndices.check() || !outerIndices.check())
+        if (!values || !innerIndices || !outerIndices)
             return false;
 
         value = Eigen::MappedSparseMatrix<Scalar, Type::Flags, StorageIndex>(
@@ -234,6 +234,8 @@ struct type_caster<Type, enable_if_t<is_eigen_sparse<Type>::value>> {
 NAMESPACE_END(detail)
 NAMESPACE_END(pybind11)
 
-#if defined(_MSC_VER)
-#pragma warning(pop)
+#if defined(__GNUG__) || defined(__clang__)
+#  pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#  pragma warning(pop)
 #endif
diff --git a/pybind11/include/pybind11/eval.h b/pybind11/include/pybind11/eval.h
index b7e7e95c9..5b2b98272 100644
--- a/pybind11/include/pybind11/eval.h
+++ b/pybind11/include/pybind11/eval.h
@@ -31,7 +31,7 @@ enum eval_mode {
 template <eval_mode mode = eval_expr>
 object eval(str expr, object global = object(), object local = object()) {
     if (!global) {
-        global = object(PyEval_GetGlobals(), true);
+        global = reinterpret_borrow<object>(PyEval_GetGlobals());
         if (!global)
             global = dict();
     }
@@ -50,17 +50,16 @@ object eval(str expr, object global = object(), object local = object()) {
         default: pybind11_fail("invalid evaluation mode");
     }
 
-    object result(PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr()), false);
-
+    PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr());
     if (!result)
         throw error_already_set();
-    return result;
+    return reinterpret_steal<object>(result);
 }
 
 template <eval_mode mode = eval_statements>
 object eval_file(str fname, object global = object(), object local = object()) {
     if (!global) {
-        global = object(PyEval_GetGlobals(), true);
+        global = reinterpret_borrow<object>(PyEval_GetGlobals());
         if (!global)
             global = dict();
     }
@@ -83,9 +82,9 @@ object eval_file(str fname, object global = object(), object local = object()) {
     FILE *f = _Py_fopen(fname.ptr(), "r");
 #else
     /* No unicode support in open() :( */
-    object fobj(PyFile_FromString(
+    auto fobj = reinterpret_steal<object>(PyFile_FromString(
         const_cast<char *>(fname_str.c_str()),
-        const_cast<char*>("r")), false);
+        const_cast<char*>("r")));
     FILE *f = nullptr;
     if (fobj)
         f = PyFile_AsFile(fobj.ptr());
@@ -96,14 +95,18 @@ object eval_file(str fname, object global = object(), object local = object()) {
         pybind11_fail("File \"" + fname_str + "\" could not be opened!");
     }
 
-    object result(PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(),
-                               local.ptr(), closeFile),
-                  false);
+#if PY_VERSION_HEX < 0x03000000 && defined(PYPY_VERSION)
+    PyObject *result = PyRun_File(f, fname_str.c_str(), start, global.ptr(),
+                                  local.ptr());
+    (void) closeFile;
+#else
+    PyObject *result = PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(),
+                                    local.ptr(), closeFile);
+#endif
 
     if (!result)
         throw error_already_set();
-
-    return result;
+    return reinterpret_steal<object>(result);
 }
 
 NAMESPACE_END(pybind11)
diff --git a/pybind11/include/pybind11/functional.h b/pybind11/include/pybind11/functional.h
index ed269b9bf..8e7e75e6b 100644
--- a/pybind11/include/pybind11/functional.h
+++ b/pybind11/include/pybind11/functional.h
@@ -15,9 +15,12 @@
 NAMESPACE_BEGIN(pybind11)
 NAMESPACE_BEGIN(detail)
 
-template <typename Return, typename... Args> struct type_caster<std::function<Return(Args...)>> {
-    typedef std::function<Return(Args...)> type;
-    typedef typename std::conditional<std::is_same<Return, void>::value, void_type, Return>::type retval_type;
+template <typename Return, typename... Args /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+struct type_caster<std::function<Return(Args...) PYBIND11_NOEXCEPT_SPECIFIER>> {
+    using type = std::function<Return(Args...) PYBIND11_NOEXCEPT_SPECIFIER>;
+    using retval_type = conditional_t<std::is_same<Return, void>::value, void_type, Return>;
+    using function_type = Return (*) (Args...) PYBIND11_NOEXCEPT_SPECIFIER;
+
 public:
     bool load(handle src_, bool) {
         if (src_.is_none())
@@ -27,29 +30,26 @@ public:
         if (!src_ || !PyCallable_Check(src_.ptr()))
             return false;
 
-        {
-            /*
-               When passing a C++ function as an argument to another C++
-               function via Python, every function call would normally involve
-               a full C++ -> Python -> C++ roundtrip, which can be prohibitive.
-               Here, we try to at least detect the case where the function is
-               stateless (i.e. function pointer or lambda function without
-               captured variables), in which case the roundtrip can be avoided.
-             */
-            if (PyCFunction_Check(src_.ptr())) {
-                capsule c(PyCFunction_GetSelf(src_.ptr()), true);
-                auto rec = (function_record *) c;
-                using FunctionType = Return (*) (Args...);
+        /*
+           When passing a C++ function as an argument to another C++
+           function via Python, every function call would normally involve
+           a full C++ -> Python -> C++ roundtrip, which can be prohibitive.
+           Here, we try to at least detect the case where the function is
+           stateless (i.e. function pointer or lambda function without
+           captured variables), in which case the roundtrip can be avoided.
+         */
+        if (PyCFunction_Check(src_.ptr())) {
+            auto c = reinterpret_borrow<capsule>(PyCFunction_GET_SELF(src_.ptr()));
+            auto rec = (function_record *) c;
 
-                if (rec && rec->is_stateless && rec->data[1] == &typeid(FunctionType)) {
-                    struct capture { FunctionType f; };
-                    value = ((capture *) &rec->data)->f;
-                    return true;
-                }
+            if (rec && rec->is_stateless && rec->data[1] == &typeid(function_type)) {
+                struct capture { function_type f; };
+                value = ((capture *) &rec->data)->f;
+                return true;
             }
         }
 
-        object src(src_, true);
+        auto src = reinterpret_borrow<object>(src_);
         value = [src](Args... args) -> Return {
             gil_scoped_acquire acq;
             object retval(src(std::move(args)...));
@@ -64,7 +64,7 @@ public:
         if (!f_)
             return none().inc_ref();
 
-        auto result = f_.template target<Return (*)(Args...)>();
+        auto result = f_.template target<function_type>();
         if (result)
             return cpp_function(*result, policy).release();
         else
@@ -72,8 +72,8 @@ public:
     }
 
     PYBIND11_TYPE_CASTER(type, _("Callable[[") +
-            type_caster<std::tuple<Args...>>::element_names() + _("], ") +
-            type_caster<retval_type>::name() +
+            argument_loader<Args...>::arg_names() + _("], ") +
+            make_caster<retval_type>::name() +
             _("]"));
 };
 
diff --git a/pybind11/include/pybind11/numpy.h b/pybind11/include/pybind11/numpy.h
index 4111ccd7b..6fecf2853 100644
--- a/pybind11/include/pybind11/numpy.h
+++ b/pybind11/include/pybind11/numpy.h
@@ -20,6 +20,8 @@
 #include <string>
 #include <initializer_list>
 #include <functional>
+#include <utility>
+#include <typeindex>
 
 #if defined(_MSC_VER)
 #  pragma warning(push)
@@ -63,6 +65,47 @@ struct PyArray_Proxy {
     int flags;
 };
 
+struct PyVoidScalarObject_Proxy {
+    PyObject_VAR_HEAD
+    char *obval;
+    PyArrayDescr_Proxy *descr;
+    int flags;
+    PyObject *base;
+};
+
+struct numpy_type_info {
+    PyObject* dtype_ptr;
+    std::string format_str;
+};
+
+struct numpy_internals {
+    std::unordered_map<std::type_index, numpy_type_info> registered_dtypes;
+
+    numpy_type_info *get_type_info(const std::type_info& tinfo, bool throw_if_missing = true) {
+        auto it = registered_dtypes.find(std::type_index(tinfo));
+        if (it != registered_dtypes.end())
+            return &(it->second);
+        if (throw_if_missing)
+            pybind11_fail(std::string("NumPy type info missing for ") + tinfo.name());
+        return nullptr;
+    }
+
+    template<typename T> numpy_type_info *get_type_info(bool throw_if_missing = true) {
+        return get_type_info(typeid(typename std::remove_cv<T>::type), throw_if_missing);
+    }
+};
+
+inline PYBIND11_NOINLINE void load_numpy_internals(numpy_internals* &ptr) {
+    ptr = &get_or_create_shared_data<numpy_internals>("_numpy_internals");
+}
+
+inline numpy_internals& get_numpy_internals() {
+    static numpy_internals* ptr = nullptr;
+    if (!ptr)
+        load_numpy_internals(ptr);
+    return *ptr;
+}
+
 struct npy_api {
     enum constants {
         NPY_C_CONTIGUOUS_ = 0x0001,
@@ -103,7 +146,9 @@ struct npy_api {
     PyObject *(*PyArray_DescrNewFromType_)(int);
     PyObject *(*PyArray_NewCopy_)(PyObject *, int);
     PyTypeObject *PyArray_Type_;
+    PyTypeObject *PyVoidArrType_Type_;
     PyTypeObject *PyArrayDescr_Type_;
+    PyObject *(*PyArray_DescrFromScalar_)(PyObject *);
     PyObject *(*PyArray_FromAny_) (PyObject *, PyObject *, int, int, int, PyObject *);
     int (*PyArray_DescrConverter_) (PyObject *, PyObject **);
     bool (*PyArray_EquivTypes_) (PyObject *, PyObject *);
@@ -114,7 +159,9 @@ private:
     enum functions {
         API_PyArray_Type = 2,
         API_PyArrayDescr_Type = 3,
+        API_PyVoidArrType_Type = 39,
         API_PyArray_DescrFromType = 45,
+        API_PyArray_DescrFromScalar = 57,
         API_PyArray_FromAny = 69,
         API_PyArray_NewCopy = 85,
         API_PyArray_NewFromDescr = 94,
@@ -136,8 +183,10 @@ private:
         npy_api api;
 #define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func];
         DECL_NPY_API(PyArray_Type);
+        DECL_NPY_API(PyVoidArrType_Type);
         DECL_NPY_API(PyArrayDescr_Type);
         DECL_NPY_API(PyArray_DescrFromType);
+        DECL_NPY_API(PyArray_DescrFromScalar);
         DECL_NPY_API(PyArray_FromAny);
         DECL_NPY_API(PyArray_NewCopy);
         DECL_NPY_API(PyArray_NewFromDescr);
@@ -150,16 +199,28 @@ private:
         return api;
     }
 };
-NAMESPACE_END(detail)
 
-#define PyArray_GET_(ptr, attr) \
-    (reinterpret_cast<::pybind11::detail::PyArray_Proxy*>(ptr)->attr)
-#define PyArrayDescr_GET_(ptr, attr) \
-    (reinterpret_cast<::pybind11::detail::PyArrayDescr_Proxy*>(ptr)->attr)
-#define PyArray_FLAGS_(ptr) \
-    PyArray_GET_(ptr, flags)
-#define PyArray_CHKFLAGS_(ptr, flag) \
-    (flag == (PyArray_FLAGS_(ptr) & flag))
+inline PyArray_Proxy* array_proxy(void* ptr) {
+    return reinterpret_cast<PyArray_Proxy*>(ptr);
+}
+
+inline const PyArray_Proxy* array_proxy(const void* ptr) {
+    return reinterpret_cast<const PyArray_Proxy*>(ptr);
+}
+
+inline PyArrayDescr_Proxy* array_descriptor_proxy(PyObject* ptr) {
+   return reinterpret_cast<PyArrayDescr_Proxy*>(ptr);
+}
+
+inline const PyArrayDescr_Proxy* array_descriptor_proxy(const PyObject* ptr) {
+   return reinterpret_cast<const PyArrayDescr_Proxy*>(ptr);
+}
+
+inline bool check_flags(const void* ptr, int flag) {
+    return (flag == (array_proxy(ptr)->flags & flag));
+}
+
+NAMESPACE_END(detail)
 
 class dtype : public object {
 public:
@@ -167,7 +228,8 @@ public:
 
     explicit dtype(const buffer_info &info) {
         dtype descr(_dtype_from_pep3118()(PYBIND11_STR_TYPE(info.format)));
-        m_ptr = descr.strip_padding().release().ptr();
+        // If info.itemsize == 0, use the value calculated from the format string
+        m_ptr = descr.strip_padding(info.itemsize ? info.itemsize : descr.itemsize()).release().ptr();
     }
 
     explicit dtype(const std::string &format) {
@@ -190,7 +252,7 @@ public:
         PyObject *ptr = nullptr;
         if (!detail::npy_api::get().PyArray_DescrConverter_(args.release().ptr(), &ptr) || !ptr)
             throw error_already_set();
-        return object(ptr, false);
+        return reinterpret_steal<dtype>(ptr);
     }
 
     /// Return dtype associated with a C++ type.
@@ -200,27 +262,27 @@ public:
 
     /// Size of the data type in bytes.
     size_t itemsize() const {
-        return (size_t) PyArrayDescr_GET_(m_ptr, elsize);
+        return (size_t) detail::array_descriptor_proxy(m_ptr)->elsize;
     }
 
     /// Returns true for structured data types.
     bool has_fields() const {
-        return PyArrayDescr_GET_(m_ptr, names) != nullptr;
+        return detail::array_descriptor_proxy(m_ptr)->names != nullptr;
     }
 
     /// Single-character type code.
     char kind() const {
-        return PyArrayDescr_GET_(m_ptr, kind);
+        return detail::array_descriptor_proxy(m_ptr)->kind;
     }
 
 private:
     static object _dtype_from_pep3118() {
         static PyObject *obj = module::import("numpy.core._internal")
             .attr("_dtype_from_pep3118").cast<object>().release().ptr();
-        return object(obj, true);
+        return reinterpret_borrow<object>(obj);
     }
 
-    dtype strip_padding() {
+    dtype strip_padding(size_t itemsize) {
         // Recursively strip all void fields with empty names that are generated for
         // padding fields (as of NumPy v1.11).
         if (!has_fields())
@@ -230,13 +292,13 @@ private:
         std::vector<field_descr> field_descriptors;
 
         for (auto field : attr("fields").attr("items")()) {
-            auto spec = object(field, true).cast<tuple>();
+            auto spec = field.cast<tuple>();
             auto name = spec[0].cast<pybind11::str>();
             auto format = spec[1].cast<tuple>()[0].cast<dtype>();
             auto offset = spec[1].cast<tuple>()[1].cast<pybind11::int_>();
             if (!len(name) && format.kind() == 'V')
                 continue;
-            field_descriptors.push_back({(PYBIND11_STR_TYPE) name, format.strip_padding(), offset});
+            field_descriptors.push_back({(PYBIND11_STR_TYPE) name, format.strip_padding(format.itemsize()), offset});
         }
 
         std::sort(field_descriptors.begin(), field_descriptors.end(),
@@ -250,13 +312,13 @@ private:
             formats.append(descr.format);
             offsets.append(descr.offset);
         }
-        return dtype(names, formats, offsets, itemsize());
+        return dtype(names, formats, offsets, itemsize);
     }
 };
 
 class array : public buffer {
 public:
-    PYBIND11_OBJECT_DEFAULT(array, buffer, detail::npy_api::get().PyArray_Check_)
+    PYBIND11_OBJECT_CVT(array, buffer, detail::npy_api::get().PyArray_Check_, raw_array)
 
     enum {
         c_style = detail::npy_api::NPY_C_CONTIGUOUS_,
@@ -264,6 +326,8 @@ public:
         forcecast = detail::npy_api::NPY_ARRAY_FORCECAST_
     };
 
+    array() : array(0, static_cast<const double *>(nullptr)) {}
+
     array(const pybind11::dtype &dt, const std::vector<size_t> &shape,
           const std::vector<size_t> &strides, const void *ptr = nullptr,
           handle base = handle()) {
@@ -275,25 +339,24 @@ public:
 
         int flags = 0;
         if (base && ptr) {
-            array base_array(base, true);
-            if (base_array.check())
+            if (isinstance<array>(base))
                 /* Copy flags from base (except baseship bit) */
-                flags = base_array.flags() & ~detail::npy_api::NPY_ARRAY_OWNDATA_;
+                flags = reinterpret_borrow<array>(base).flags() & ~detail::npy_api::NPY_ARRAY_OWNDATA_;
             else
                 /* Writable by default, easy to downgrade later on if needed */
                 flags = detail::npy_api::NPY_ARRAY_WRITEABLE_;
         }
 
-        object tmp(api.PyArray_NewFromDescr_(
+        auto tmp = reinterpret_steal<object>(api.PyArray_NewFromDescr_(
             api.PyArray_Type_, descr.release().ptr(), (int) ndim, (Py_intptr_t *) shape.data(),
-            (Py_intptr_t *) strides.data(), const_cast<void *>(ptr), flags, nullptr), false);
+            (Py_intptr_t *) strides.data(), const_cast<void *>(ptr), flags, nullptr));
         if (!tmp)
             pybind11_fail("NumPy: unable to create array!");
         if (ptr) {
             if (base) {
-                PyArray_GET_(tmp.ptr(), base) = base.inc_ref().ptr();
+                detail::array_proxy(tmp.ptr())->base = base.inc_ref().ptr();
             } else {
-                tmp = object(api.PyArray_NewCopy_(tmp.ptr(), -1 /* any order */), false);
+                tmp = reinterpret_steal<object>(api.PyArray_NewCopy_(tmp.ptr(), -1 /* any order */));
             }
         }
         m_ptr = tmp.release().ptr();
@@ -326,7 +389,7 @@ public:
 
     /// Array descriptor (dtype)
     pybind11::dtype dtype() const {
-        return object(PyArray_GET_(m_ptr, descr), true);
+        return reinterpret_borrow<pybind11::dtype>(detail::array_proxy(m_ptr)->descr);
     }
 
     /// Total number of elements
@@ -336,7 +399,7 @@ public:
 
     /// Byte size of a single element
     size_t itemsize() const {
-        return (size_t) PyArrayDescr_GET_(PyArray_GET_(m_ptr, descr), elsize);
+        return (size_t) detail::array_descriptor_proxy(detail::array_proxy(m_ptr)->descr)->elsize;
     }
 
     /// Total number of bytes
@@ -346,17 +409,17 @@ public:
 
     /// Number of dimensions
     size_t ndim() const {
-        return (size_t) PyArray_GET_(m_ptr, nd);
+        return (size_t) detail::array_proxy(m_ptr)->nd;
     }
 
     /// Base object
     object base() const {
-        return object(PyArray_GET_(m_ptr, base), true);
+        return reinterpret_borrow<object>(detail::array_proxy(m_ptr)->base);
     }
 
     /// Dimensions of the array
     const size_t* shape() const {
-        return reinterpret_cast<const size_t *>(PyArray_GET_(m_ptr, dimensions));
+        return reinterpret_cast<const size_t *>(detail::array_proxy(m_ptr)->dimensions);
     }
 
     /// Dimension along a given axis
@@ -368,7 +431,7 @@ public:
 
     /// Strides of the array
     const size_t* strides() const {
-        return reinterpret_cast<const size_t *>(PyArray_GET_(m_ptr, strides));
+        return reinterpret_cast<const size_t *>(detail::array_proxy(m_ptr)->strides);
     }
 
     /// Stride along a given axis
@@ -380,60 +443,62 @@ public:
 
     /// Return the NumPy array flags
     int flags() const {
-        return PyArray_FLAGS_(m_ptr);
+        return detail::array_proxy(m_ptr)->flags;
     }
 
     /// If set, the array is writeable (otherwise the buffer is read-only)
     bool writeable() const {
-        return PyArray_CHKFLAGS_(m_ptr, detail::npy_api::NPY_ARRAY_WRITEABLE_);
+        return detail::check_flags(m_ptr, detail::npy_api::NPY_ARRAY_WRITEABLE_);
     }
 
     /// If set, the array owns the data (will be freed when the array is deleted)
     bool owndata() const {
-        return PyArray_CHKFLAGS_(m_ptr, detail::npy_api::NPY_ARRAY_OWNDATA_);
+        return detail::check_flags(m_ptr, detail::npy_api::NPY_ARRAY_OWNDATA_);
     }
 
     /// Pointer to the contained data. If index is not provided, points to the
     /// beginning of the buffer. May throw if the index would lead to out of bounds access.
-    template<typename... Ix> const void* data(Ix&&... index) const {
-        return static_cast<const void *>(PyArray_GET_(m_ptr, data) + offset_at(index...));
+    template<typename... Ix> const void* data(Ix... index) const {
+        return static_cast<const void *>(detail::array_proxy(m_ptr)->data + offset_at(index...));
     }
 
     /// Mutable pointer to the contained data. If index is not provided, points to the
     /// beginning of the buffer. May throw if the index would lead to out of bounds access.
     /// May throw if the array is not writeable.
-    template<typename... Ix> void* mutable_data(Ix&&... index) {
+    template<typename... Ix> void* mutable_data(Ix... index) {
         check_writeable();
-        return static_cast<void *>(PyArray_GET_(m_ptr, data) + offset_at(index...));
+        return static_cast<void *>(detail::array_proxy(m_ptr)->data + offset_at(index...));
     }
 
     /// Byte offset from beginning of the array to a given index (full or partial).
     /// May throw if the index would lead to out of bounds access.
-    template<typename... Ix> size_t offset_at(Ix&&... index) const {
+    template<typename... Ix> size_t offset_at(Ix... index) const {
         if (sizeof...(index) > ndim())
             fail_dim_check(sizeof...(index), "too many indices for an array");
-        return get_byte_offset(index...);
+        return byte_offset(size_t(index)...);
     }
 
     size_t offset_at() const { return 0; }
 
     /// Item count from beginning of the array to a given index (full or partial).
     /// May throw if the index would lead to out of bounds access.
-    template<typename... Ix> size_t index_at(Ix&&... index) const {
+    template<typename... Ix> size_t index_at(Ix... index) const {
         return offset_at(index...) / itemsize();
     }
 
     /// Return a new view with all of the dimensions of length 1 removed
     array squeeze() {
         auto& api = detail::npy_api::get();
-        return array(api.PyArray_Squeeze_(m_ptr), false);
+        return reinterpret_steal<array>(api.PyArray_Squeeze_(m_ptr));
     }
 
     /// Ensure that the argument is a NumPy array
-    static array ensure(object input, int ExtraFlags = 0) {
-        auto& api = detail::npy_api::get();
-        return array(api.PyArray_FromAny_(
-            input.release().ptr(), nullptr, 0, 0, detail::npy_api::NPY_ENSURE_ARRAY_ | ExtraFlags, nullptr), false);
+    /// In case of an error, nullptr is returned and the Python error is cleared.
+    static array ensure(handle h, int ExtraFlags = 0) {
+        auto result = reinterpret_steal<array>(raw_array(h.ptr(), ExtraFlags));
+        if (!result)
+            PyErr_Clear();
+        return result;
     }
 
 protected:
@@ -444,18 +509,16 @@ protected:
                           " (ndim = " + std::to_string(ndim()) + ")");
     }
 
-    template<typename... Ix> size_t get_byte_offset(Ix&&... index) const {
-        const size_t idx[] = { (size_t) index... };
-        if (!std::equal(idx + 0, idx + sizeof...(index), shape(), std::less<size_t>{})) {
-            auto mismatch = std::mismatch(idx + 0, idx + sizeof...(index), shape(), std::less<size_t>{});
-            throw index_error(std::string("index ") + std::to_string(*mismatch.first) +
-                              " is out of bounds for axis " + std::to_string(mismatch.first - idx) +
-                              " with size " + std::to_string(*mismatch.second));
-        }
-        return std::inner_product(idx + 0, idx + sizeof...(index), strides(), (size_t) 0);
+    template<typename... Ix> size_t byte_offset(Ix... index) const {
+        check_dimensions(index...);
+        return byte_offset_unsafe(index...);
     }
 
-    size_t get_byte_offset() const { return 0; }
+    template<size_t dim = 0, typename... Ix> size_t byte_offset_unsafe(size_t i, Ix... index) const {
+        return i * strides()[dim] + byte_offset_unsafe<dim + 1>(index...);
+    }
+
+    template<size_t dim = 0> size_t byte_offset_unsafe() const { return 0; }
 
     void check_writeable() const {
         if (!writeable())
@@ -473,13 +536,46 @@ protected:
         }
         return strides;
     }
+
+    template<typename... Ix> void check_dimensions(Ix... index) const {
+        check_dimensions_impl(size_t(0), shape(), size_t(index)...);
+    }
+
+    void check_dimensions_impl(size_t, const size_t*) const { }
+
+    template<typename... Ix> void check_dimensions_impl(size_t axis, const size_t* shape, size_t i, Ix... index) const {
+        if (i >= *shape) {
+            throw index_error(std::string("index ") + std::to_string(i) +
+                              " is out of bounds for axis " + std::to_string(axis) +
+                              " with size " + std::to_string(*shape));
+        }
+        check_dimensions_impl(axis + 1, shape + 1, index...);
+    }
+
+    /// Create array from any object -- always returns a new reference
+    static PyObject *raw_array(PyObject *ptr, int ExtraFlags = 0) {
+        if (ptr == nullptr)
+            return nullptr;
+        return detail::npy_api::get().PyArray_FromAny_(
+            ptr, nullptr, 0, 0, detail::npy_api::NPY_ENSURE_ARRAY_ | ExtraFlags, nullptr);
+    }
 };
 
 template <typename T, int ExtraFlags = array::forcecast> class array_t : public array {
 public:
-    PYBIND11_OBJECT_CVT(array_t, array, is_non_null, m_ptr = ensure_(m_ptr));
+    array_t() : array(0, static_cast<const T *>(nullptr)) {}
+    array_t(handle h, borrowed_t) : array(h, borrowed) { }
+    array_t(handle h, stolen_t) : array(h, stolen) { }
 
-    array_t() : array() { }
+    PYBIND11_DEPRECATED("Use array_t<T>::ensure() instead")
+    array_t(handle h, bool is_borrowed) : array(raw_array_t(h.ptr()), stolen) {
+        if (!m_ptr) PyErr_Clear();
+        if (!is_borrowed) Py_XDECREF(h.ptr());
+    }
+
+    array_t(const object &o) : array(raw_array_t(o.ptr()), stolen) {
+        if (!m_ptr) throw error_already_set();
+    }
 
     explicit array_t(const buffer_info& info) : array(info) { }
 
@@ -499,47 +595,56 @@ public:
         return sizeof(T);
     }
 
-    template<typename... Ix> size_t index_at(Ix&... index) const {
+    template<typename... Ix> size_t index_at(Ix... index) const {
         return offset_at(index...) / itemsize();
     }
 
-    template<typename... Ix> const T* data(Ix&&... index) const {
+    template<typename... Ix> const T* data(Ix... index) const {
         return static_cast<const T*>(array::data(index...));
     }
 
-    template<typename... Ix> T* mutable_data(Ix&&... index) {
+    template<typename... Ix> T* mutable_data(Ix... index) {
         return static_cast<T*>(array::mutable_data(index...));
     }
 
     // Reference to element at a given index
-    template<typename... Ix> const T& at(Ix&&... index) const {
+    template<typename... Ix> const T& at(Ix... index) const {
         if (sizeof...(index) != ndim())
             fail_dim_check(sizeof...(index), "index dimension mismatch");
-        // not using offset_at() / index_at() here so as to avoid another dimension check
-        return *(static_cast<const T*>(array::data()) + get_byte_offset(index...) / itemsize());
+        return *(static_cast<const T*>(array::data()) + byte_offset(size_t(index)...) / itemsize());
     }
 
     // Mutable reference to element at a given index
-    template<typename... Ix> T& mutable_at(Ix&&... index) {
+    template<typename... Ix> T& mutable_at(Ix... index) {
         if (sizeof...(index) != ndim())
             fail_dim_check(sizeof...(index), "index dimension mismatch");
-        // not using offset_at() / index_at() here so as to avoid another dimension check
-        return *(static_cast<T*>(array::mutable_data()) + get_byte_offset(index...) / itemsize());
+        return *(static_cast<T*>(array::mutable_data()) + byte_offset(size_t(index)...) / itemsize());
     }
 
-    static bool is_non_null(PyObject *ptr) { return ptr != nullptr; }
-
-    static PyObject *ensure_(PyObject *ptr) {
-        if (ptr == nullptr)
-            return nullptr;
-        auto& api = detail::npy_api::get();
-        PyObject *result = api.PyArray_FromAny_(ptr, pybind11::dtype::of<T>().release().ptr(), 0, 0,
-                                                detail::npy_api::NPY_ENSURE_ARRAY_ | ExtraFlags, nullptr);
+    /// Ensure that the argument is a NumPy array of the correct dtype.
+    /// In case of an error, nullptr is returned and the Python error is cleared.
+    static array_t ensure(handle h) {
+        auto result = reinterpret_steal<array_t>(raw_array_t(h.ptr()));
         if (!result)
             PyErr_Clear();
-        Py_DECREF(ptr);
         return result;
     }
+
+    static bool _check(handle h) {
+        const auto &api = detail::npy_api::get();
+        return api.PyArray_Check_(h.ptr())
+               && api.PyArray_EquivTypes_(detail::array_proxy(h.ptr())->descr, dtype::of<T>().ptr());
+    }
+
+protected:
+    /// Create array from any object -- always returns a new reference
+    static PyObject *raw_array_t(PyObject *ptr) {
+        if (ptr == nullptr)
+            return nullptr;
+        return detail::npy_api::get().PyArray_FromAny_(
+            ptr, dtype::of<T>().release().ptr(), 0, 0,
+            detail::npy_api::NPY_ENSURE_ARRAY_ | ExtraFlags, nullptr);
+    }
 };
 
 template <typename T>
@@ -565,6 +670,21 @@ struct format_descriptor<T, detail::enable_if_t<std::is_enum<T>::value>> {
 };
 
 NAMESPACE_BEGIN(detail)
+template <typename T, int ExtraFlags>
+struct pyobject_caster<array_t<T, ExtraFlags>> {
+    using type = array_t<T, ExtraFlags>;
+
+    bool load(handle src, bool /* convert */) {
+        value = type::ensure(src);
+        return static_cast<bool>(value);
+    }
+
+    static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) {
+        return src.inc_ref();
+    }
+    PYBIND11_TYPE_CASTER(type, handle_type_name<type>::name());
+};
+
 template <typename T> struct is_std_array : std::false_type { };
 template <typename T, size_t N> struct is_std_array<std::array<T, N>> : std::true_type { };
 
@@ -592,7 +712,7 @@ public:
     enum { value = values[detail::log2(sizeof(T)) * 2 + (std::is_unsigned<T>::value ? 1 : 0)] };
     static pybind11::dtype dtype() {
         if (auto ptr = npy_api::get().PyArray_DescrFromType_(value))
-            return object(ptr, true);
+            return reinterpret_borrow<pybind11::dtype>(ptr);
         pybind11_fail("Unsupported buffer format!");
     }
     template <typename T2 = T, enable_if_t<std::is_signed<T2>::value, int> = 0>
@@ -607,7 +727,7 @@ template <typename T> constexpr const int npy_format_descriptor<
     enum { value = npy_api::NumPyName }; \
     static pybind11::dtype dtype() { \
         if (auto ptr = npy_api::get().PyArray_DescrFromType_(value)) \
-            return object(ptr, true); \
+            return reinterpret_borrow<pybind11::dtype>(ptr); \
         pybind11_fail("Unsupported buffer format!"); \
     } \
     static PYBIND11_DESCR name() { return _(Name); } }
@@ -637,88 +757,117 @@ struct field_descriptor {
     const char *name;
     size_t offset;
     size_t size;
+    size_t alignment;
     std::string format;
     dtype descr;
 };
 
+inline PYBIND11_NOINLINE void register_structured_dtype(
+    const std::initializer_list<field_descriptor>& fields,
+    const std::type_info& tinfo, size_t itemsize,
+    bool (*direct_converter)(PyObject *, void *&)) {
+
+    auto& numpy_internals = get_numpy_internals();
+    if (numpy_internals.get_type_info(tinfo, false))
+        pybind11_fail("NumPy: dtype is already registered");
+
+    list names, formats, offsets;
+    for (auto field : fields) {
+        if (!field.descr)
+            pybind11_fail(std::string("NumPy: unsupported field dtype: `") +
+                            field.name + "` @ " + tinfo.name());
+        names.append(PYBIND11_STR_TYPE(field.name));
+        formats.append(field.descr);
+        offsets.append(pybind11::int_(field.offset));
+    }
+    auto dtype_ptr = pybind11::dtype(names, formats, offsets, itemsize).release().ptr();
+
+    // There is an existing bug in NumPy (as of v1.11): trailing bytes are
+    // not encoded explicitly into the format string. This will supposedly
+    // get fixed in v1.12; for further details, see these:
+    // - https://github.com/numpy/numpy/issues/7797
+    // - https://github.com/numpy/numpy/pull/7798
+    // Because of this, we won't use numpy's logic to generate buffer format
+    // strings and will just do it ourselves.
+    std::vector<field_descriptor> ordered_fields(fields);
+    std::sort(ordered_fields.begin(), ordered_fields.end(),
+        [](const field_descriptor &a, const field_descriptor &b) { return a.offset < b.offset; });
+    size_t offset = 0;
+    std::ostringstream oss;
+    oss << "T{";
+    for (auto& field : ordered_fields) {
+        if (field.offset > offset)
+            oss << (field.offset - offset) << 'x';
+        // mark unaligned fields with '='
+        if (field.offset % field.alignment)
+            oss << '=';
+        oss << field.format << ':' << field.name << ':';
+        offset = field.offset + field.size;
+    }
+    if (itemsize > offset)
+        oss << (itemsize - offset) << 'x';
+    oss << '}';
+    auto format_str = oss.str();
+
+    // Sanity check: verify that NumPy properly parses our buffer format string
+    auto& api = npy_api::get();
+    auto arr =  array(buffer_info(nullptr, itemsize, format_str, 1));
+    if (!api.PyArray_EquivTypes_(dtype_ptr, arr.dtype().ptr()))
+        pybind11_fail("NumPy: invalid buffer descriptor!");
+
+    auto tindex = std::type_index(tinfo);
+    numpy_internals.registered_dtypes[tindex] = { dtype_ptr, format_str };
+    get_internals().direct_conversions[tindex].push_back(direct_converter);
+}
+
 template <typename T>
 struct npy_format_descriptor<T, enable_if_t<is_pod_struct<T>::value>> {
     static PYBIND11_DESCR name() { return _("struct"); }
 
     static pybind11::dtype dtype() {
-        if (!dtype_ptr)
-            pybind11_fail("NumPy: unsupported buffer format!");
-        return object(dtype_ptr, true);
+        return reinterpret_borrow<pybind11::dtype>(dtype_ptr());
     }
 
     static std::string format() {
-        if (!dtype_ptr)
-            pybind11_fail("NumPy: unsupported buffer format!");
+        static auto format_str = get_numpy_internals().get_type_info<T>(true)->format_str;
         return format_str;
     }
 
-    static void register_dtype(std::initializer_list<field_descriptor> fields) {
-        list names, formats, offsets;
-        for (auto field : fields) {
-            if (!field.descr)
-                pybind11_fail("NumPy: unsupported field dtype");
-            names.append(PYBIND11_STR_TYPE(field.name));
-            formats.append(field.descr);
-            offsets.append(pybind11::int_(field.offset));
-        }
-        dtype_ptr = pybind11::dtype(names, formats, offsets, sizeof(T)).release().ptr();
-
-        // There is an existing bug in NumPy (as of v1.11): trailing bytes are
-        // not encoded explicitly into the format string. This will supposedly
-        // get fixed in v1.12; for further details, see these:
-        // - https://github.com/numpy/numpy/issues/7797
-        // - https://github.com/numpy/numpy/pull/7798
-        // Because of this, we won't use numpy's logic to generate buffer format
-        // strings and will just do it ourselves.
-        std::vector<field_descriptor> ordered_fields(fields);
-        std::sort(ordered_fields.begin(), ordered_fields.end(),
-                  [](const field_descriptor &a, const field_descriptor &b) {
-                      return a.offset < b.offset;
-                  });
-        size_t offset = 0;
-        std::ostringstream oss;
-        oss << "T{";
-        for (auto& field : ordered_fields) {
-            if (field.offset > offset)
-                oss << (field.offset - offset) << 'x';
-            // note that '=' is required to cover the case of unaligned fields
-            oss << '=' << field.format << ':' << field.name << ':';
-            offset = field.offset + field.size;
-        }
-        if (sizeof(T) > offset)
-            oss << (sizeof(T) - offset) << 'x';
-        oss << '}';
-        format_str = oss.str();
-
-        // Sanity check: verify that NumPy properly parses our buffer format string
-        auto& api = npy_api::get();
-        auto arr =  array(buffer_info(nullptr, sizeof(T), format(), 1));
-        if (!api.PyArray_EquivTypes_(dtype_ptr, arr.dtype().ptr()))
-            pybind11_fail("NumPy: invalid buffer descriptor!");
+    static void register_dtype(const std::initializer_list<field_descriptor>& fields) {
+        register_structured_dtype(fields, typeid(typename std::remove_cv<T>::type),
+                                  sizeof(T), &direct_converter);
     }
 
 private:
-    static std::string format_str;
-    static PyObject* dtype_ptr;
+    static PyObject* dtype_ptr() {
+        static PyObject* ptr = get_numpy_internals().get_type_info<T>(true)->dtype_ptr;
+        return ptr;
+    }
+
+    static bool direct_converter(PyObject *obj, void*& value) {
+        auto& api = npy_api::get();
+        if (!PyObject_TypeCheck(obj, api.PyVoidArrType_Type_))
+            return false;
+        if (auto descr = reinterpret_steal<object>(api.PyArray_DescrFromScalar_(obj))) {
+            if (api.PyArray_EquivTypes_(dtype_ptr(), descr.ptr())) {
+                value = ((PyVoidScalarObject_Proxy *) obj)->obval;
+                return true;
+            }
+        }
+        return false;
+    }
 };
 
-template <typename T>
-std::string npy_format_descriptor<T, enable_if_t<is_pod_struct<T>::value>>::format_str;
-template <typename T>
-PyObject* npy_format_descriptor<T, enable_if_t<is_pod_struct<T>::value>>::dtype_ptr = nullptr;
+#define PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, Name)                                          \
+    ::pybind11::detail::field_descriptor {                                                    \
+        Name, offsetof(T, Field), sizeof(decltype(std::declval<T>().Field)),                  \
+        alignof(decltype(std::declval<T>().Field)),                                           \
+        ::pybind11::format_descriptor<decltype(std::declval<T>().Field)>::format(),           \
+        ::pybind11::detail::npy_format_descriptor<decltype(std::declval<T>().Field)>::dtype() \
+    }
 
 // Extract name, offset and format descriptor for a struct field
-#define PYBIND11_FIELD_DESCRIPTOR(Type, Field) \
-    ::pybind11::detail::field_descriptor { \
-        #Field, offsetof(Type, Field), sizeof(decltype(static_cast<Type*>(0)->Field)), \
-        ::pybind11::format_descriptor<decltype(static_cast<Type*>(0)->Field)>::format(), \
-        ::pybind11::detail::npy_format_descriptor<decltype(static_cast<Type*>(0)->Field)>::dtype() \
-    }
+#define PYBIND11_FIELD_DESCRIPTOR(T, Field) PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, #Field)
 
 // The main idea of this macro is borrowed from https://github.com/swansontec/map-macro
 // (C) William Swanson, Paul Fultz
@@ -756,6 +905,27 @@ PyObject* npy_format_descriptor<T, enable_if_t<is_pod_struct<T>::value>>::dtype_
     ::pybind11::detail::npy_format_descriptor<Type>::register_dtype \
         ({PYBIND11_MAP_LIST (PYBIND11_FIELD_DESCRIPTOR, Type, __VA_ARGS__)})
 
+#ifdef _MSC_VER
+#define PYBIND11_MAP2_LIST_NEXT1(test, next) \
+    PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0))
+#else
+#define PYBIND11_MAP2_LIST_NEXT1(test, next) \
+    PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0)
+#endif
+#define PYBIND11_MAP2_LIST_NEXT(test, next) \
+    PYBIND11_MAP2_LIST_NEXT1 (PYBIND11_MAP_GET_END test, next)
+#define PYBIND11_MAP2_LIST0(f, t, x1, x2, peek, ...) \
+    f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT (peek, PYBIND11_MAP2_LIST1) (f, t, peek, __VA_ARGS__)
+#define PYBIND11_MAP2_LIST1(f, t, x1, x2, peek, ...) \
+    f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT (peek, PYBIND11_MAP2_LIST0) (f, t, peek, __VA_ARGS__)
+// PYBIND11_MAP2_LIST(f, t, a1, a2, ...) expands to f(t, a1, a2), f(t, a3, a4), ...
+#define PYBIND11_MAP2_LIST(f, t, ...) \
+    PYBIND11_EVAL (PYBIND11_MAP2_LIST1 (f, t, __VA_ARGS__, (), 0))
+
+#define PYBIND11_NUMPY_DTYPE_EX(Type, ...) \
+    ::pybind11::detail::npy_format_descriptor<Type>::register_dtype \
+        ({PYBIND11_MAP2_LIST (PYBIND11_FIELD_DESCRIPTOR_EX, Type, __VA_ARGS__)})
+
 template  <class T>
 using array_iterator = typename std::add_pointer<T>::type;
 
@@ -907,7 +1077,7 @@ struct vectorize_helper {
     explicit vectorize_helper(T&&f) : f(std::forward<T>(f)) { }
 
     object operator()(array_t<Args, array::c_style | array::forcecast>... args) {
-        return run(args..., typename make_index_sequence<sizeof...(Args)>::type());
+        return run(args..., make_index_sequence<sizeof...(Args)>());
     }
 
     template <size_t ... Index> object run(array_t<Args, array::c_style | array::forcecast>&... args, index_sequence<Index...> index) {
@@ -970,18 +1140,20 @@ struct vectorize_helper {
 };
 
 template <typename T, int Flags> struct handle_type_name<array_t<T, Flags>> {
-    static PYBIND11_DESCR name() { return _("numpy.ndarray[") + type_caster<T>::name() + _("]"); }
+    static PYBIND11_DESCR name() { return _("numpy.ndarray[") + make_caster<T>::name() + _("]"); }
 };
 
 NAMESPACE_END(detail)
 
-template <typename Func, typename Return, typename... Args>
-detail::vectorize_helper<Func, Return, Args...> vectorize(const Func &f, Return (*) (Args ...)) {
+template <typename Func, typename Return, typename... Args /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+detail::vectorize_helper<Func, Return, Args...>
+vectorize(const Func &f, Return (*) (Args ...) PYBIND11_NOEXCEPT_SPECIFIER) {
     return detail::vectorize_helper<Func, Return, Args...>(f);
 }
 
-template <typename Return, typename... Args>
-detail::vectorize_helper<Return (*) (Args ...), Return, Args...> vectorize(Return (*f) (Args ...)) {
+template <typename Return, typename... Args /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+detail::vectorize_helper<Return (*) (Args ...) PYBIND11_NOEXCEPT_SPECIFIER, Return, Args...>
+vectorize(Return (*f) (Args ...) PYBIND11_NOEXCEPT_SPECIFIER) {
     return vectorize<Return (*) (Args ...), Return, Args...>(f, f);
 }
 
diff --git a/pybind11/include/pybind11/options.h b/pybind11/include/pybind11/options.h
new file mode 100644
index 000000000..3105551dd
--- /dev/null
+++ b/pybind11/include/pybind11/options.h
@@ -0,0 +1,65 @@
+/*
+    pybind11/options.h: global settings that are configurable at runtime.
+
+    Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#pragma once
+
+#include "common.h"
+
+NAMESPACE_BEGIN(pybind11)
+
+class options {
+public:
+
+    // Default RAII constructor, which leaves settings as they currently are.
+    options() : previous_state(global_state()) {}
+
+    // Class is non-copyable.
+    options(const options&) = delete;
+    options& operator=(const options&) = delete;
+
+    // Destructor, which restores settings that were in effect before.
+    ~options() {
+        global_state() = previous_state;
+    }
+
+    // Setter methods (affect the global state):
+
+    options& disable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = false; return *this; }
+
+    options& enable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = true; return *this; }
+
+    options& disable_function_signatures() & { global_state().show_function_signatures = false; return *this; }
+
+    options& enable_function_signatures() & { global_state().show_function_signatures = true; return *this; }
+
+    // Getter methods (return the global state):
+
+    static bool show_user_defined_docstrings() { return global_state().show_user_defined_docstrings; }
+
+    static bool show_function_signatures() { return global_state().show_function_signatures; }
+
+    // This type is not meant to be allocated on the heap.
+    void* operator new(size_t) = delete;
+
+private:
+
+    struct state {
+        bool show_user_defined_docstrings = true;  //< Include user-supplied texts in docstrings.
+        bool show_function_signatures = true;      //< Include auto-generated function signatures in docstrings.
+    };
+
+    static state &global_state() {
+        static state instance;
+        return instance;
+    }
+
+    state previous_state;
+};
+
+NAMESPACE_END(pybind11)
diff --git a/pybind11/include/pybind11/pybind11.h b/pybind11/include/pybind11/pybind11.h
index 83abe51fb..addcce74b 100644
--- a/pybind11/include/pybind11/pybind11.h
+++ b/pybind11/include/pybind11/pybind11.h
@@ -34,6 +34,7 @@
 #endif
 
 #include "attr.h"
+#include "options.h"
 
 NAMESPACE_BEGIN(pybind11)
 
@@ -43,8 +44,8 @@ public:
     cpp_function() { }
 
     /// Construct a cpp_function from a vanilla function pointer
-    template <typename Return, typename... Args, typename... Extra>
-    cpp_function(Return (*f)(Args...), const Extra&... extra) {
+    template <typename Return, typename... Args, typename... Extra /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+    cpp_function(Return (*f)(Args...) PYBIND11_NOEXCEPT_SPECIFIER, const Extra&... extra) {
         initialize(f, f, extra...);
     }
 
@@ -56,17 +57,17 @@ public:
     }
 
     /// Construct a cpp_function from a class method (non-const)
-    template <typename Return, typename Class, typename... Arg, typename... Extra>
-    cpp_function(Return (Class::*f)(Arg...), const Extra&... extra) {
+    template <typename Return, typename Class, typename... Arg, typename... Extra /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+    cpp_function(Return (Class::*f)(Arg...) PYBIND11_NOEXCEPT_SPECIFIER, const Extra&... extra) {
         initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(args...); },
-                   (Return (*) (Class *, Arg...)) nullptr, extra...);
+                   (Return (*) (Class *, Arg...) PYBIND11_NOEXCEPT_SPECIFIER) nullptr, extra...);
     }
 
     /// Construct a cpp_function from a class method (const)
-    template <typename Return, typename Class, typename... Arg, typename... Extra>
-    cpp_function(Return (Class::*f)(Arg...) const, const Extra&... extra) {
+    template <typename Return, typename Class, typename... Arg, typename... Extra /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+    cpp_function(Return (Class::*f)(Arg...) const PYBIND11_NOEXCEPT_SPECIFIER, const Extra&... extra) {
         initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(args...); },
-                   (Return (*)(const Class *, Arg ...)) nullptr, extra...);
+                   (Return (*)(const Class *, Arg ...) PYBIND11_NOEXCEPT_SPECIFIER) nullptr, extra...);
     }
 
     /// Return the function name
@@ -79,8 +80,8 @@ protected:
     }
 
     /// Special internal constructor for functors, lambda functions, etc.
-    template <typename Func, typename Return, typename... Args, typename... Extra>
-    void initialize(Func &&f, Return (*)(Args...), const Extra&... extra) {
+    template <typename Func, typename Return, typename... Args, typename... Extra /*,*/ PYBIND11_NOEXCEPT_TPL_ARG>
+    void initialize(Func &&f, Return (*)(Args...) PYBIND11_NOEXCEPT_SPECIFIER, const Extra&... extra) {
         static_assert(detail::expected_num_args<Extra...>(sizeof...(Args)),
                       "The number of named arguments does not match the function signature");
 
@@ -110,17 +111,17 @@ protected:
         }
 
         /* Type casters for the function arguments and return value */
-        typedef detail::type_caster<typename std::tuple<Args...>> cast_in;
-        typedef detail::type_caster<typename std::conditional<
-            std::is_void<Return>::value, detail::void_type,
-            typename detail::intrinsic_type<Return>::type>::type> cast_out;
+        using cast_in = detail::argument_loader<Args...>;
+        using cast_out = detail::make_caster<
+            detail::conditional_t<std::is_void<Return>::value, detail::void_type, Return>
+        >;
 
         /* Dispatch code which converts function arguments and performs the actual function call */
         rec->impl = [](detail::function_record *rec, handle args, handle kwargs, handle parent) -> handle {
             cast_in args_converter;
 
             /* Try to cast the function arguments into the C++ domain */
-            if (!args_converter.load_args(args, kwargs, true))
+            if (!args_converter.load_args(args, kwargs))
                 return PYBIND11_TRY_NEXT_OVERLOAD;
 
             /* Invoke call policy pre-call hook */
@@ -130,9 +131,14 @@ protected:
             capture *cap = (capture *) (sizeof(capture) <= sizeof(rec->data)
                                         ? &rec->data : rec->data[0]);
 
+            /* Override policy for rvalues -- always move */
+            constexpr auto is_rvalue = !std::is_pointer<Return>::value
+                                       && !std::is_lvalue_reference<Return>::value;
+            const auto policy = is_rvalue ? return_value_policy::move : rec->policy;
+
             /* Perform the function call */
             handle result = cast_out::cast(args_converter.template call<Return>(cap->f),
-                                           rec->policy, parent);
+                                           policy, parent);
 
             /* Invoke call policy post-call hook */
             detail::process_attributes<Extra...>::postcall(args, result);
@@ -145,7 +151,7 @@ protected:
 
         /* Generate a readable signature describing the function's arguments and return value types */
         using detail::descr; using detail::_;
-        PYBIND11_DESCR signature = _("(") + cast_in::element_names() + _(") -> ") + cast_out::name();
+        PYBIND11_DESCR signature = _("(") + cast_in::arg_names() + _(") -> ") + cast_out::name();
 
         /* Register the function with Python from generic (non-templated) code */
         initialize_generic(rec, signature.text(), signature.types(), sizeof...(Args));
@@ -154,7 +160,7 @@ protected:
         if (cast_in::has_kwargs) rec->has_kwargs = true;
 
         /* Stash some additional information used by an important optimization in 'functional.h' */
-        using FunctionType = Return (*)(Args...);
+        using FunctionType = Return (*)(Args...) PYBIND11_NOEXCEPT_SPECIFIER;
         constexpr bool is_function_ptr =
             std::is_convertible<Func, FunctionType>::value &&
             sizeof(capture) == sizeof(void *);
@@ -180,8 +186,6 @@ protected:
                 a.descr = strdup(a.value.attr("__repr__")().cast<std::string>().c_str());
         }
 
-        auto const &registered_types = detail::get_internals().registered_types_cpp;
-
         /* Generate a proper function signature */
         std::string signature;
         size_t type_depth = 0, char_index = 0, type_index = 0, arg_index = 0;
@@ -195,10 +199,10 @@ protected:
                 if (type_depth == 0 && text[char_index] != '*' && arg_index < args) {
                     if (!rec->args.empty()) {
                         signature += rec->args[arg_index].name;
-                    } else if (arg_index == 0 && rec->class_) {
+                    } else if (arg_index == 0 && rec->is_method) {
                         signature += "self";
                     } else {
-                        signature += "arg" + std::to_string(arg_index - (rec->class_ ? 1 : 0));
+                        signature += "arg" + std::to_string(arg_index - (rec->is_method ? 1 : 0));
                     }
                     signature += ": ";
                 }
@@ -216,9 +220,13 @@ protected:
                 const std::type_info *t = types[type_index++];
                 if (!t)
                     pybind11_fail("Internal error while parsing type signature (1)");
-                auto it = registered_types.find(std::type_index(*t));
-                if (it != registered_types.end()) {
-                    signature += ((const detail::type_info *) it->second)->type->tp_name;
+                if (auto tinfo = detail::get_type_info(*t)) {
+#if defined(PYPY_VERSION)
+                    signature += handle((PyObject *) tinfo->type)
+                                     .attr("__module__")
+                                     .cast<std::string>() + ".";
+#endif
+                    signature += tinfo->type->tp_name;
                 } else {
                     std::string tname(t->name());
                     detail::clean_type_id(tname);
@@ -256,13 +264,19 @@ protected:
 #endif
 
         detail::function_record *chain = nullptr, *chain_start = rec;
-        if (rec->sibling && PyCFunction_Check(rec->sibling.ptr())) {
-            capsule rec_capsule(PyCFunction_GetSelf(rec->sibling.ptr()), true);
-            chain = (detail::function_record *) rec_capsule;
-            /* Never append a method to an overload chain of a parent class;
-               instead, hide the parent's overloads in this case */
-            if (chain->class_ != rec->class_)
-                chain = nullptr;
+        if (rec->sibling) {
+            if (PyCFunction_Check(rec->sibling.ptr())) {
+                auto rec_capsule = reinterpret_borrow<capsule>(PyCFunction_GET_SELF(rec->sibling.ptr()));
+                chain = (detail::function_record *) rec_capsule;
+                /* Never append a method to an overload chain of a parent class;
+                   instead, hide the parent's overloads in this case */
+                if (chain->scope != rec->scope)
+                    chain = nullptr;
+            }
+            // Don't trigger for things like the default __init__, which are wrapper_descriptors that we are intentionally replacing
+            else if (!rec->sibling.is_none() && rec->name[0] != '_')
+                pybind11_fail("Cannot overload existing non-function object \"" + std::string(rec->name) +
+                        "\" with a function of the same name");
         }
 
         if (!chain) {
@@ -303,7 +317,7 @@ protected:
         int index = 0;
         /* Create a nice pydoc rec including all signatures and
            docstrings of the functions in the overload chain */
-        if (chain) {
+        if (chain && options::show_function_signatures()) {
             // First a generic signature
             signatures += rec->name;
             signatures += "(*args, **kwargs)\n";
@@ -311,15 +325,17 @@ protected:
         }
         // Then specific overload signatures
         for (auto it = chain_start; it != nullptr; it = it->next) {
-            if (chain)
-                signatures += std::to_string(++index) + ". ";
-            signatures += rec->name;
-            signatures += it->signature;
-            signatures += "\n";
-            if (it->doc && strlen(it->doc) > 0) {
+            if (options::show_function_signatures()) {
+                if (chain)
+                    signatures += std::to_string(++index) + ". ";
+                signatures += rec->name;
+                signatures += it->signature;
                 signatures += "\n";
+            }
+            if (it->doc && strlen(it->doc) > 0 && options::show_user_defined_docstrings()) {
+                if (options::show_function_signatures()) signatures += "\n";
                 signatures += it->doc;
-                signatures += "\n";
+                if (options::show_function_signatures()) signatures += "\n";
             }
             if (it->next)
                 signatures += "\n";
@@ -331,8 +347,8 @@ protected:
             std::free((char *) func->m_ml->ml_doc);
         func->m_ml->ml_doc = strdup(signatures.c_str());
 
-        if (rec->class_) {
-            m_ptr = PYBIND11_INSTANCE_METHOD_NEW(m_ptr, rec->class_.ptr());
+        if (rec->is_method) {
+            m_ptr = PYBIND11_INSTANCE_METHOD_NEW(m_ptr, rec->scope.ptr());
             if (!m_ptr)
                 pybind11_fail("cpp_function::cpp_function(): Could not allocate instance method object");
             Py_DECREF(func);
@@ -376,7 +392,7 @@ protected:
                result = PYBIND11_TRY_NEXT_OVERLOAD;
         try {
             for (; it != nullptr; it = it->next) {
-                tuple args_(args, true);
+                auto args_ = reinterpret_borrow<tuple>(args);
                 size_t kwargs_consumed = 0;
 
                 /* For each overload:
@@ -496,9 +512,9 @@ protected:
                 msg += "\n";
             }
             msg += "\nInvoked with: ";
-            tuple args_(args, true);
+            auto args_ = reinterpret_borrow<tuple>(args);
             for (size_t ti = overloads->is_constructor ? 1 : 0; ti < args_.size(); ++ti) {
-                msg += static_cast<std::string>(static_cast<object>(args_[ti]).str());
+                msg += pybind11::repr(args_[ti]);
                 if ((ti + 1) != args_.size() )
                     msg += ", ";
             }
@@ -529,6 +545,7 @@ public:
     PYBIND11_OBJECT_DEFAULT(module, object, PyModule_Check)
 
     explicit module(const char *name, const char *doc = nullptr) {
+        if (!options::show_user_defined_docstrings()) doc = nullptr;
 #if PY_MAJOR_VERSION >= 3
         PyModuleDef *def = new PyModuleDef();
         memset(def, 0, sizeof(PyModuleDef));
@@ -549,16 +566,17 @@ public:
     module &def(const char *name_, Func &&f, const Extra& ... extra) {
         cpp_function func(std::forward<Func>(f), name(name_), scope(*this),
                           sibling(getattr(*this, name_, none())), extra...);
-        /* PyModule_AddObject steals a reference to 'func' */
-        PyModule_AddObject(ptr(), name_, func.inc_ref().ptr());
+        // NB: allow overwriting here because cpp_function sets up a chain with the intention of
+        // overwriting (and has already checked internally that it isn't overwriting non-functions).
+        add_object(name_, func, true /* overwrite */);
         return *this;
     }
 
     module def_submodule(const char *name, const char *doc = nullptr) {
         std::string full_name = std::string(PyModule_GetName(m_ptr))
             + std::string(".") + std::string(name);
-        module result(PyImport_AddModule(full_name.c_str()), true);
-        if (doc)
+        auto result = reinterpret_borrow<module>(PyImport_AddModule(full_name.c_str()));
+        if (doc && options::show_user_defined_docstrings())
             result.attr("__doc__") = pybind11::str(doc);
         attr(name) = result;
         return result;
@@ -567,17 +585,30 @@ public:
     static module import(const char *name) {
         PyObject *obj = PyImport_ImportModule(name);
         if (!obj)
-            throw import_error("Module \"" + std::string(name) + "\" not found!");
-        return module(obj, false);
+            throw error_already_set();
+        return reinterpret_steal<module>(obj);
+    }
+
+    // Adds an object to the module using the given name.  Throws if an object with the given name
+    // already exists.
+    //
+    // overwrite should almost always be false: attempting to overwrite objects that pybind11 has
+    // established will, in most cases, break things.
+    PYBIND11_NOINLINE void add_object(const char *name, object &obj, bool overwrite = false) {
+        if (!overwrite && hasattr(*this, name))
+            pybind11_fail("Error during initialization: multiple incompatible definitions with name \"" +
+                    std::string(name) + "\"");
+
+        obj.inc_ref(); // PyModule_AddObject() steals a reference
+        PyModule_AddObject(ptr(), name, obj.ptr());
     }
 };
 
 NAMESPACE_BEGIN(detail)
 extern "C" inline PyObject *get_dict(PyObject *op, void *) {
     PyObject *&dict = *_PyObject_GetDictPtr(op);
-    if (!dict) {
+    if (!dict)
         dict = PyDict_New();
-    }
     Py_XINCREF(dict);
     return dict;
 }
@@ -610,14 +641,17 @@ protected:
         auto &internals = get_internals();
         auto tindex = std::type_index(*(rec->type));
 
-        if (internals.registered_types_cpp.find(tindex) !=
-            internals.registered_types_cpp.end())
+        if (get_type_info(*(rec->type)))
             pybind11_fail("generic_type: type \"" + std::string(rec->name) +
                           "\" is already registered!");
 
-        object name(PYBIND11_FROM_STRING(rec->name), false);
+        auto name = reinterpret_steal<object>(PYBIND11_FROM_STRING(rec->name));
         object scope_module;
         if (rec->scope) {
+            if (hasattr(rec->scope, rec->name))
+                pybind11_fail("generic_type: cannot initialize type \"" + std::string(rec->name) +
+                        "\": an object with that name is already defined");
+
             if (hasattr(rec->scope, "__module__")) {
                 scope_module = rec->scope.attr("__module__");
             } else if (hasattr(rec->scope, "__name__")) {
@@ -630,25 +664,59 @@ protected:
         object scope_qualname;
         if (rec->scope && hasattr(rec->scope, "__qualname__"))
             scope_qualname = rec->scope.attr("__qualname__");
-        object ht_qualname;
-        if (scope_qualname) {
-            ht_qualname = object(PyUnicode_FromFormat(
-                "%U.%U", scope_qualname.ptr(), name.ptr()), false);
-        } else {
+        object ht_qualname, ht_qualname_meta;
+        if (scope_qualname)
+            ht_qualname = reinterpret_steal<object>(PyUnicode_FromFormat(
+                "%U.%U", scope_qualname.ptr(), name.ptr()));
+        else
             ht_qualname = name;
-        }
+        if (rec->metaclass)
+            ht_qualname_meta = reinterpret_steal<object>(
+                PyUnicode_FromFormat("%U__Meta", ht_qualname.ptr()));
 #endif
 
-        size_t num_bases = rec->bases.size();
-        tuple bases(num_bases);
-        for (size_t i = 0; i < num_bases; ++i)
-            bases[i] = rec->bases[i];
-
-        std::string full_name = (scope_module ? ((std::string) scope_module.str() + "." + rec->name)
+#if !defined(PYPY_VERSION)
+        std::string full_name = (scope_module ? ((std::string) pybind11::str(scope_module) + "." + rec->name)
                                               : std::string(rec->name));
+#else
+        std::string full_name = std::string(rec->name);
+#endif
+
+        /* Create a custom metaclass if requested (used for static properties) */
+        object metaclass;
+        if (rec->metaclass) {
+            std::string meta_name_ = full_name + "__Meta";
+            object meta_name = reinterpret_steal<object>(PYBIND11_FROM_STRING(meta_name_.c_str()));
+            metaclass = reinterpret_steal<object>(PyType_Type.tp_alloc(&PyType_Type, 0));
+            if (!metaclass || !name)
+                pybind11_fail("generic_type::generic_type(): unable to create metaclass!");
+
+            /* Danger zone: from now (and until PyType_Ready), make sure to
+               issue no Python C API calls which could potentially invoke the
+               garbage collector (the GC will call type_traverse(), which will in
+               turn find the newly constructed type in an invalid state) */
+
+            auto type = (PyHeapTypeObject*) metaclass.ptr();
+            type->ht_name = meta_name.release().ptr();
+
+#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 3
+            /* Qualified names for Python >= 3.3 */
+            type->ht_qualname = ht_qualname_meta.release().ptr();
+#endif
+            type->ht_type.tp_name = strdup(meta_name_.c_str());
+            type->ht_type.tp_base = &PyType_Type;
+            type->ht_type.tp_flags |= (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE) &
+                                      ~Py_TPFLAGS_HAVE_GC;
+
+            if (PyType_Ready(&type->ht_type) < 0)
+                pybind11_fail("generic_type::generic_type(): failure in PyType_Ready() for metaclass!");
+        }
+
+        size_t num_bases = rec->bases.size();
+        auto bases = tuple(rec->bases);
 
         char *tp_doc = nullptr;
-        if (rec->doc) {
+        if (rec->doc && options::show_user_defined_docstrings()) {
             /* Allocate memory for docstring (using PyObject_MALLOC, since
                Python will free this later on) */
             size_t size = strlen(rec->doc) + 1;
@@ -661,7 +729,7 @@ protected:
            garbage collector (the GC will call type_traverse(), which will in
            turn find the newly constructed type in an invalid state) */
 
-        object type_holder(PyType_Type.tp_alloc(&PyType_Type, 0), false);
+        auto type_holder = reinterpret_steal<object>(PyType_Type.tp_alloc(&PyType_Type, 0));
         auto type = (PyHeapTypeObject*) type_holder.ptr();
 
         if (!type_holder || !name)
@@ -672,6 +740,7 @@ protected:
         tinfo->type = (PyTypeObject *) type;
         tinfo->type_size = rec->type_size;
         tinfo->init_holder = rec->init_holder;
+        tinfo->direct_conversions = &internals.direct_conversions[tindex];
         internals.registered_types_cpp[tindex] = tinfo;
         internals.registered_types_py[type] = tinfo;
 
@@ -691,6 +760,9 @@ protected:
         type->ht_qualname = ht_qualname.release().ptr();
 #endif
 
+        /* Metaclass */
+        PYBIND11_OB_TYPE(type->ht_type) = (PyTypeObject *) metaclass.release().ptr();
+
         /* Supported protocols */
         type->ht_type.tp_as_number = &type->as_number;
         type->ht_type.tp_as_sequence = &type->as_sequence;
@@ -713,6 +785,11 @@ protected:
 
         /* Support dynamic attributes */
         if (rec->dynamic_attr) {
+            #if defined(PYPY_VERSION)
+                pybind11_fail(std::string(rec->name) + ": dynamic attributes are "
+                                                       "currently not supported in "
+                                                       "conunction with PyPy!");
+            #endif
             type->ht_type.tp_flags |= Py_TPFLAGS_HAVE_GC;
             type->ht_type.tp_dictoffset = type->ht_type.tp_basicsize; // place the dict at the end
             type->ht_type.tp_basicsize += sizeof(PyObject *); // and allocate enough space for it
@@ -721,14 +798,23 @@ protected:
             type->ht_type.tp_clear = clear;
         }
 
+        if (rec->buffer_protocol) {
+            type->ht_type.tp_as_buffer = &type->as_buffer;
+#if PY_MAJOR_VERSION < 3
+            type->ht_type.tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER;
+#endif
+            type->as_buffer.bf_getbuffer = getbuffer;
+            type->as_buffer.bf_releasebuffer = releasebuffer;
+        }
+
         type->ht_type.tp_doc = tp_doc;
 
+        m_ptr = type_holder.ptr();
+
         if (PyType_Ready(&type->ht_type) < 0)
             pybind11_fail(std::string(rec->name) + ": PyType_Ready failed (" +
                           detail::error_string() + ")!");
 
-        m_ptr = type_holder.ptr();
-
         if (scope_module) // Needed by pydoc
             attr("__module__") = scope_module;
 
@@ -744,7 +830,7 @@ protected:
 
     /// Helper function which tags all parents of a type using mult. inheritance
     void mark_parents_nonsimple(PyTypeObject *value) {
-        tuple t(value->tp_bases, true);
+        auto t = reinterpret_borrow<tuple>(value->tp_bases);
         for (handle h : t) {
             auto tinfo2 = get_type_info((PyTypeObject *) h.ptr());
             if (tinfo2)
@@ -753,43 +839,14 @@ protected:
         }
     }
 
-    /// Allocate a metaclass on demand (for static properties)
-    handle metaclass() {
-        auto &ht_type = ((PyHeapTypeObject *) m_ptr)->ht_type;
-        auto &ob_type = PYBIND11_OB_TYPE(ht_type);
-
-        if (ob_type == &PyType_Type) {
-            std::string name_ = std::string(ht_type.tp_name) + "__Meta";
-#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 3
-            object ht_qualname(PyUnicode_FromFormat("%U__Meta", attr("__qualname__").ptr()), false);
-#endif
-            object name(PYBIND11_FROM_STRING(name_.c_str()), false);
-            object type_holder(PyType_Type.tp_alloc(&PyType_Type, 0), false);
-            if (!type_holder || !name)
-                pybind11_fail("generic_type::metaclass(): unable to create type object!");
-
-            auto type = (PyHeapTypeObject*) type_holder.ptr();
-            type->ht_name = name.release().ptr();
-
-#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 3
-            /* Qualified names for Python >= 3.3 */
-            type->ht_qualname = ht_qualname.release().ptr();
-#endif
-            type->ht_type.tp_name = strdup(name_.c_str());
-            type->ht_type.tp_base = ob_type;
-            type->ht_type.tp_flags |= (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE) &
-                                      ~Py_TPFLAGS_HAVE_GC;
-
-            if (PyType_Ready(&type->ht_type) < 0)
-                pybind11_fail("generic_type::metaclass(): PyType_Ready failed!");
-
-            ob_type = (PyTypeObject *) type_holder.release().ptr();
-        }
-        return handle((PyObject *) ob_type);
-    }
-
     static int init(void *self, PyObject *, PyObject *) {
-        std::string msg = std::string(Py_TYPE(self)->tp_name) + ": No constructor defined!";
+        PyTypeObject *type = Py_TYPE(self);
+        std::string msg;
+#if defined(PYPY_VERSION)
+        msg += handle((PyObject *) type).attr("__module__").cast<std::string>() + ".";
+#endif
+        msg += type->tp_name;
+        msg += ": No constructor defined!";
         PyErr_SetString(PyExc_TypeError, msg.c_str());
         return -1;
     }
@@ -799,7 +856,7 @@ protected:
         auto tinfo = detail::get_type_info(type);
         self->value = ::operator new(tinfo->type_size);
         self->owned = true;
-        self->constructed = false;
+        self->holder_constructed = false;
         detail::get_internals().registered_instances.emplace(self->value, (PyObject *) self);
         return (PyObject *) self;
     }
@@ -824,9 +881,8 @@ protected:
                 PyObject_ClearWeakRefs((PyObject *) self);
 
             PyObject **dict_ptr = _PyObject_GetDictPtr((PyObject *) self);
-            if (dict_ptr) {
+            if (dict_ptr)
                 Py_CLEAR(*dict_ptr);
-            }
         }
         Py_TYPE(self)->tp_free((PyObject*) self);
     }
@@ -847,13 +903,15 @@ protected:
             buffer_info *(*get_buffer)(PyObject *, void *),
             void *get_buffer_data) {
         PyHeapTypeObject *type = (PyHeapTypeObject*) m_ptr;
-        type->ht_type.tp_as_buffer = &type->as_buffer;
-#if PY_MAJOR_VERSION < 3
-        type->ht_type.tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER;
-#endif
-        type->as_buffer.bf_getbuffer = getbuffer;
-        type->as_buffer.bf_releasebuffer = releasebuffer;
         auto tinfo = detail::get_type_info(&type->ht_type);
+
+        if (!type->ht_type.tp_as_buffer)
+            pybind11_fail(
+                "To be able to register buffer protocol support for the type '" +
+                std::string(tinfo->type->tp_name) +
+                "' the associated class<>(..) invocation must "
+                "include the pybind11::buffer_protocol() annotation!");
+
         tinfo->get_buffer = get_buffer;
         tinfo->get_buffer_data = get_buffer_data;
     }
@@ -861,6 +919,8 @@ protected:
     static int getbuffer(PyObject *obj, Py_buffer *view, int flags) {
         auto tinfo = detail::get_type_info(Py_TYPE(obj));
         if (view == nullptr || obj == nullptr || !tinfo || !tinfo->get_buffer) {
+            if (view)
+                view->obj = nullptr;
             PyErr_SetString(PyExc_BufferError, "generic_type::getbuffer(): Internal error");
             return -1;
         }
@@ -886,6 +946,31 @@ protected:
     }
 
     static void releasebuffer(PyObject *, Py_buffer *view) { delete (buffer_info *) view->internal; }
+
+    void def_property_static_impl(const char *name,
+                                  handle fget, handle fset,
+                                  detail::function_record *rec_fget) {
+        pybind11::str doc_obj = pybind11::str(
+            (rec_fget->doc && pybind11::options::show_user_defined_docstrings())
+                ? rec_fget->doc : "");
+        const auto property = reinterpret_steal<object>(
+            PyObject_CallFunctionObjArgs((PyObject *) &PyProperty_Type, fget.ptr() ? fget.ptr() : Py_None,
+                                         fset.ptr() ? fset.ptr() : Py_None, Py_None, doc_obj.ptr(), nullptr));
+        if (rec_fget->is_method && rec_fget->scope) {
+            attr(name) = property;
+        } else {
+            auto mclass = handle((PyObject *) PYBIND11_OB_TYPE(*((PyTypeObject *) m_ptr)));
+
+            if ((PyTypeObject *) mclass.ptr() == &PyType_Type)
+                pybind11_fail(
+                    "Adding static properties to the type '" +
+                    std::string(((PyTypeObject *) m_ptr)->tp_name) +
+                    "' requires the type to have a custom metaclass. Please "
+                    "ensure that one is created by supplying the pybind11::metaclass() "
+                    "annotation to the associated class_<>(..) invocation.");
+            mclass.attr(name) = property;
+        }
+    }
 };
 
 NAMESPACE_END(detail)
@@ -895,12 +980,9 @@ class class_ : public detail::generic_type {
     template <typename T> using is_holder = detail::is_holder_type<type_, T>;
     template <typename T> using is_subtype = detail::bool_constant<std::is_base_of<type_, T>::value && !std::is_same<T, type_>::value>;
     template <typename T> using is_base = detail::bool_constant<std::is_base_of<T, type_>::value && !std::is_same<T, type_>::value>;
-    template <typename T> using is_valid_class_option =
-        detail::bool_constant<
-            is_holder<T>::value ||
-            is_subtype<T>::value ||
-            is_base<T>::value
-        >;
+    // struct instead of using here to help MSVC:
+    template <typename T> struct is_valid_class_option :
+        detail::any_of<is_holder<T>, is_subtype<T>, is_base<T>> {};
 
 public:
     using type = type_;
@@ -909,10 +991,10 @@ public:
     using holder_type = detail::first_of_t<is_holder, std::unique_ptr<type>, options...>;
     using instance_type = detail::instance<type, holder_type>;
 
-    static_assert(detail::all_of_t<is_valid_class_option, options...>::value,
+    static_assert(detail::all_of<is_valid_class_option<options>...>::value,
             "Unknown/invalid class_ template parameters provided");
 
-    PYBIND11_OBJECT(class_, detail::generic_type, PyType_Check)
+    PYBIND11_OBJECT(class_, generic_type, PyType_Check)
 
     template <typename... Extra>
     class_(handle scope, const char *name, const Extra &... extra) {
@@ -994,7 +1076,7 @@ public:
         struct capture { Func func; };
         capture *ptr = new capture { std::forward<Func>(func) };
         install_buffer_funcs([](PyObject *obj, void *ptr) -> buffer_info* {
-            detail::type_caster<type> caster;
+            detail::make_caster<type> caster;
             if (!caster.load(obj, false))
                 return nullptr;
             return new buffer_info(((capture *) ptr)->func(caster));
@@ -1032,23 +1114,49 @@ public:
         return *this;
     }
 
+    /// Uses return_value_policy::reference_internal by default
+    template <typename Getter, typename... Extra>
+    class_ &def_property_readonly(const char *name, const Getter &fget, const Extra& ...extra) {
+        return def_property_readonly(name, cpp_function(fget), return_value_policy::reference_internal, extra...);
+    }
+
+    /// Uses cpp_function's return_value_policy by default
     template <typename... Extra>
     class_ &def_property_readonly(const char *name, const cpp_function &fget, const Extra& ...extra) {
-        def_property(name, fget, cpp_function(), extra...);
-        return *this;
+        return def_property(name, fget, cpp_function(), extra...);
+    }
+
+    /// Uses return_value_policy::reference by default
+    template <typename Getter, typename... Extra>
+    class_ &def_property_readonly_static(const char *name, const Getter &fget, const Extra& ...extra) {
+        return def_property_readonly_static(name, cpp_function(fget), return_value_policy::reference, extra...);
     }
 
+    /// Uses cpp_function's return_value_policy by default
     template <typename... Extra>
     class_ &def_property_readonly_static(const char *name, const cpp_function &fget, const Extra& ...extra) {
-        def_property_static(name, fget, cpp_function(), extra...);
-        return *this;
+        return def_property_static(name, fget, cpp_function(), extra...);
+    }
+
+    /// Uses return_value_policy::reference_internal by default
+    template <typename Getter, typename... Extra>
+    class_ &def_property(const char *name, const Getter &fget, const cpp_function &fset, const Extra& ...extra) {
+        return def_property(name, cpp_function(fget), fset, return_value_policy::reference_internal, extra...);
     }
 
+    /// Uses cpp_function's return_value_policy by default
     template <typename... Extra>
     class_ &def_property(const char *name, const cpp_function &fget, const cpp_function &fset, const Extra& ...extra) {
         return def_property_static(name, fget, fset, is_method(*this), extra...);
     }
 
+    /// Uses return_value_policy::reference by default
+    template <typename Getter, typename... Extra>
+    class_ &def_property_static(const char *name, const Getter &fget, const cpp_function &fset, const Extra& ...extra) {
+        return def_property_static(name, cpp_function(fget), fset, return_value_policy::reference, extra...);
+    }
+
+    /// Uses cpp_function's return_value_policy by default
     template <typename... Extra>
     class_ &def_property_static(const char *name, const cpp_function &fget, const cpp_function &fset, const Extra& ...extra) {
         auto rec_fget = get_function_record(fget), rec_fset = get_function_record(fset);
@@ -1066,14 +1174,7 @@ public:
                 rec_fset->doc = strdup(rec_fset->doc);
             }
         }
-        pybind11::str doc_obj = pybind11::str(rec_fget->doc ? rec_fget->doc : "");
-        object property(
-            PyObject_CallFunctionObjArgs((PyObject *) &PyProperty_Type, fget.ptr() ? fget.ptr() : Py_None,
-                                         fset.ptr() ? fset.ptr() : Py_None, Py_None, doc_obj.ptr(), nullptr), false);
-        if (rec_fget->class_)
-            attr(name) = property;
-        else
-            metaclass().attr(name) = property;
+        def_property_static_impl(name, fget, fset, rec_fget);
         return *this;
     }
 
@@ -1083,8 +1184,12 @@ private:
     static void init_holder_helper(instance_type *inst, const holder_type * /* unused */, const std::enable_shared_from_this<T> * /* dummy */) {
         try {
             new (&inst->holder) holder_type(std::static_pointer_cast<typename holder_type::element_type>(inst->value->shared_from_this()));
+            inst->holder_constructed = true;
         } catch (const std::bad_weak_ptr &) {
-            new (&inst->holder) holder_type(inst->value);
+            if (inst->owned) {
+                new (&inst->holder) holder_type(inst->value);
+                inst->holder_constructed = true;
+            }
         }
     }
 
@@ -1092,41 +1197,45 @@ private:
     template <typename T = holder_type,
               detail::enable_if_t<std::is_copy_constructible<T>::value, int> = 0>
     static void init_holder_helper(instance_type *inst, const holder_type *holder_ptr, const void * /* dummy */) {
-        if (holder_ptr)
+        if (holder_ptr) {
             new (&inst->holder) holder_type(*holder_ptr);
-        else
+            inst->holder_constructed = true;
+        } else if (inst->owned || detail::always_construct_holder<holder_type>::value) {
             new (&inst->holder) holder_type(inst->value);
+            inst->holder_constructed = true;
+        }
     }
 
     /// Initialize holder object, variant 3: holder is not copy constructible (e.g. unique_ptr), always initialize from raw pointer
     template <typename T = holder_type,
               detail::enable_if_t<!std::is_copy_constructible<T>::value, int> = 0>
     static void init_holder_helper(instance_type *inst, const holder_type * /* unused */, const void * /* dummy */) {
-        new (&inst->holder) holder_type(inst->value);
+        if (inst->owned || detail::always_construct_holder<holder_type>::value) {
+            new (&inst->holder) holder_type(inst->value);
+            inst->holder_constructed = true;
+        }
     }
 
     /// Initialize holder object of an instance, possibly given a pointer to an existing holder
     static void init_holder(PyObject *inst_, const void *holder_ptr) {
         auto inst = (instance_type *) inst_;
         init_holder_helper(inst, (const holder_type *) holder_ptr, inst->value);
-        inst->constructed = true;
     }
 
     static void dealloc(PyObject *inst_) {
         instance_type *inst = (instance_type *) inst_;
-        if (inst->owned) {
-            if (inst->constructed)
-                inst->holder.~holder_type();
-            else
-                ::operator delete(inst->value);
-        }
+        if (inst->holder_constructed)
+            inst->holder.~holder_type();
+        else if (inst->owned)
+            ::operator delete(inst->value);
+
         generic_type::dealloc((detail::instance<void> *) inst);
     }
 
     static detail::function_record *get_function_record(handle h) {
         h = detail::get_function(h);
-        return h ? (detail::function_record *) capsule(
-               PyCFunction_GetSelf(h.ptr()), true) : nullptr;
+        return h ? (detail::function_record *) reinterpret_borrow<capsule>(PyCFunction_GET_SELF(h.ptr()))
+                 : nullptr;
     }
 };
 
@@ -1134,54 +1243,101 @@ private:
 template <typename Type> class enum_ : public class_<Type> {
 public:
     using class_<Type>::def;
-    using UnderlyingType = typename std::underlying_type<Type>::type;
+    using Scalar = typename std::underlying_type<Type>::type;
+    template <typename T> using arithmetic_tag = std::is_same<T, arithmetic>;
+
     template <typename... Extra>
     enum_(const handle &scope, const char *name, const Extra&... extra)
       : class_<Type>(scope, name, extra...), m_parent(scope) {
-        auto entries = new std::unordered_map<UnderlyingType, const char *>();
+
+        constexpr bool is_arithmetic =
+            !std::is_same<detail::first_of_t<arithmetic_tag, void, Extra...>,
+                          void>::value;
+
+        auto entries = new std::unordered_map<Scalar, const char *>();
         def("__repr__", [name, entries](Type value) -> std::string {
-            auto it = entries->find((UnderlyingType) value);
+            auto it = entries->find((Scalar) value);
             return std::string(name) + "." +
                 ((it == entries->end()) ? std::string("???")
                                         : std::string(it->second));
         });
-        def("__init__", [](Type& value, UnderlyingType i) { value = (Type)i; });
-        def("__init__", [](Type& value, UnderlyingType i) { new (&value) Type((Type) i); });
-        def("__int__", [](Type value) { return (UnderlyingType) value; });
+        def("__init__", [](Type& value, Scalar i) { value = (Type)i; });
+        def("__init__", [](Type& value, Scalar i) { new (&value) Type((Type) i); });
+        def("__int__", [](Type value) { return (Scalar) value; });
         def("__eq__", [](const Type &value, Type *value2) { return value2 && value == *value2; });
         def("__ne__", [](const Type &value, Type *value2) { return !value2 || value != *value2; });
-        if (std::is_convertible<Type, UnderlyingType>::value) {
+        if (is_arithmetic) {
+            def("__lt__", [](const Type &value, Type *value2) { return value2 && value < *value2; });
+            def("__gt__", [](const Type &value, Type *value2) { return value2 && value > *value2; });
+            def("__le__", [](const Type &value, Type *value2) { return value2 && value <= *value2; });
+            def("__ge__", [](const Type &value, Type *value2) { return value2 && value >= *value2; });
+        }
+        if (std::is_convertible<Type, Scalar>::value) {
             // Don't provide comparison with the underlying type if the enum isn't convertible,
             // i.e. if Type is a scoped enum, mirroring the C++ behaviour.  (NB: we explicitly
-            // convert Type to UnderlyingType below anyway because this needs to compile).
-            def("__eq__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value == value2; });
-            def("__ne__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value != value2; });
+            // convert Type to Scalar below anyway because this needs to compile).
+            def("__eq__", [](const Type &value, Scalar value2) { return (Scalar) value == value2; });
+            def("__ne__", [](const Type &value, Scalar value2) { return (Scalar) value != value2; });
+            if (is_arithmetic) {
+                def("__lt__", [](const Type &value, Scalar value2) { return (Scalar) value < value2; });
+                def("__gt__", [](const Type &value, Scalar value2) { return (Scalar) value > value2; });
+                def("__le__", [](const Type &value, Scalar value2) { return (Scalar) value <= value2; });
+                def("__ge__", [](const Type &value, Scalar value2) { return (Scalar) value >= value2; });
+                def("__invert__", [](const Type &value) { return ~((Scalar) value); });
+                def("__and__", [](const Type &value, Scalar value2) { return (Scalar) value & value2; });
+                def("__or__", [](const Type &value, Scalar value2) { return (Scalar) value | value2; });
+                def("__xor__", [](const Type &value, Scalar value2) { return (Scalar) value ^ value2; });
+                def("__rand__", [](const Type &value, Scalar value2) { return (Scalar) value & value2; });
+                def("__ror__", [](const Type &value, Scalar value2) { return (Scalar) value | value2; });
+                def("__rxor__", [](const Type &value, Scalar value2) { return (Scalar) value ^ value2; });
+                def("__and__", [](const Type &value, const Type &value2) { return (Scalar) value & (Scalar) value2; });
+                def("__or__", [](const Type &value, const Type &value2) { return (Scalar) value | (Scalar) value2; });
+                def("__xor__", [](const Type &value, const Type &value2) { return (Scalar) value ^ (Scalar) value2; });
+            }
         }
-        def("__hash__", [](const Type &value) { return (UnderlyingType) value; });
+        def("__hash__", [](const Type &value) { return (Scalar) value; });
         // Pickling and unpickling -- needed for use with the 'multiprocessing' module
-        def("__getstate__", [](const Type &value) { return pybind11::make_tuple((UnderlyingType) value); });
-        def("__setstate__", [](Type &p, tuple t) { new (&p) Type((Type) t[0].cast<UnderlyingType>()); });
+        def("__getstate__", [](const Type &value) { return pybind11::make_tuple((Scalar) value); });
+        def("__setstate__", [](Type &p, tuple t) { new (&p) Type((Type) t[0].cast<Scalar>()); });
         m_entries = entries;
     }
 
     /// Export enumeration entries into the parent scope
-    void export_values() {
+    enum_ &export_values() {
+#if !defined(PYPY_VERSION)
         PyObject *dict = ((PyTypeObject *) this->m_ptr)->tp_dict;
         PyObject *key, *value;
         ssize_t pos = 0;
-        while (PyDict_Next(dict, &pos, &key, &value))
+
+        while (PyDict_Next(dict, &pos, &key, &value)) {
             if (PyObject_IsInstance(value, this->m_ptr))
                 m_parent.attr(key) = value;
+        }
+#else
+        /* PyPy's cpyext still has difficulties with the above
+           CPython API calls; emulate using Python code. */
+        dict d; d["t"] = *this; d["p"] = m_parent;
+        PyObject *result = PyRun_String(
+            "for k, v in t.__dict__.items():\n"
+            "    if isinstance(v, t):\n"
+            "        setattr(p, k, v)\n",
+            Py_file_input, d.ptr(), d.ptr());
+        if (result == nullptr)
+            throw error_already_set();
+        Py_DECREF(result);
+#endif
+
+        return *this;
     }
 
     /// Add an enumeration entry
     enum_& value(char const* name, Type value) {
         this->attr(name) = pybind11::cast(value, return_value_policy::copy);
-        (*m_entries)[(UnderlyingType) value] = name;
+        (*m_entries)[(Scalar) value] = name;
         return *this;
     }
 private:
-    std::unordered_map<UnderlyingType, const char *> *m_entries;
+    std::unordered_map<Scalar, const char *> *m_entries;
     handle m_parent;
 };
 
@@ -1324,7 +1480,7 @@ template <return_value_policy Policy = return_value_policy::reference_internal,
 
 template <typename InputType, typename OutputType> void implicitly_convertible() {
     auto implicit_caster = [](PyObject *obj, PyTypeObject *type) -> PyObject * {
-        if (!detail::type_caster<InputType>().load(obj, false))
+        if (!detail::make_caster<InputType>().load(obj, false))
             return nullptr;
         tuple args(1);
         args[0] = obj;
@@ -1333,11 +1489,11 @@ template <typename InputType, typename OutputType> void implicitly_convertible()
             PyErr_Clear();
         return result;
     };
-    auto &registered_types = detail::get_internals().registered_types_cpp;
-    auto it = registered_types.find(std::type_index(typeid(OutputType)));
-    if (it == registered_types.end())
+
+    if (auto tinfo = detail::get_type_info(typeid(OutputType)))
+        tinfo->implicit_conversions.push_back(implicit_caster);
+    else
         pybind11_fail("implicitly_convertible: Unable to find type " + type_id<OutputType>());
-    ((detail::type_info *) it->second)->implicit_conversions.push_back(implicit_caster);
 }
 
 template <typename ExceptionTranslator>
@@ -1355,13 +1511,14 @@ void register_exception_translator(ExceptionTranslator&& translator) {
 template <typename type>
 class exception : public object {
 public:
-    exception(module &m, const std::string &name, PyObject* base=PyExc_Exception) {
-        std::string full_name = std::string(PyModule_GetName(m.ptr()))
-                + std::string(".") + name;
-        char* exception_name = const_cast<char*>(full_name.c_str());
-        m_ptr = PyErr_NewException(exception_name, base, NULL);
-        inc_ref(); // PyModule_AddObject() steals a reference
-        PyModule_AddObject(m.ptr(), name.c_str(), m_ptr);
+    exception(handle scope, const char *name, PyObject *base = PyExc_Exception) {
+        std::string full_name = scope.attr("__name__").cast<std::string>() +
+                                std::string(".") + name;
+        m_ptr = PyErr_NewException((char *) full_name.c_str(), base, NULL);
+        if (hasattr(scope, name))
+            pybind11_fail("Error during initialization: multiple incompatible "
+                          "definitions with name \"" + std::string(name) + "\"");
+        scope.attr(name) = *this;
     }
 
     // Sets the current python exception to this exception object with the given message
@@ -1375,14 +1532,16 @@ public:
  * This is intended for simple exception translations; for more complex translation, register the
  * exception object and translator directly.
  */
-template <typename CppException> exception<CppException>& register_exception(module &m, const std::string &name, PyObject* base = PyExc_Exception) {
-    static exception<CppException> ex(m, name, base);
+template <typename CppException>
+exception<CppException> &register_exception(handle scope,
+                                            const char *name,
+                                            PyObject *base = PyExc_Exception) {
+    static exception<CppException> ex(scope, name, base);
     register_exception_translator([](std::exception_ptr p) {
         if (!p) return;
         try {
             std::rethrow_exception(p);
-        }
-        catch (const CppException &e) {
+        } catch (const CppException &e) {
             ex(e.what());
         }
     });
@@ -1393,7 +1552,7 @@ NAMESPACE_BEGIN(detail)
 PYBIND11_NOINLINE inline void print(tuple args, dict kwargs) {
     auto strings = tuple(args.size());
     for (size_t i = 0; i < args.size(); ++i) {
-        strings[i] = args[i].str();
+        strings[i] = str(args[i]);
     }
     auto sep = kwargs.contains("sep") ? kwargs["sep"] : cast(" ");
     auto line = sep.attr("join")(strings);
@@ -1404,7 +1563,7 @@ PYBIND11_NOINLINE inline void print(tuple args, dict kwargs) {
     } else {
         try {
             file = module::import("sys").attr("stdout");
-        } catch (const import_error &) {
+        } catch (const error_already_set &) {
             /* If print() is called from code that is executed as
                part of garbage collection during interpreter shutdown,
                importing 'sys' can fail. Give up rather than crashing the
@@ -1428,7 +1587,7 @@ void print(Args &&...args) {
     detail::print(c.args(), c.kwargs());
 }
 
-#if defined(WITH_THREAD)
+#if defined(WITH_THREAD) && !defined(PYPY_VERSION)
 
 /* The functions below essentially reproduce the PyGILState_* API using a RAII
  * pattern, but there are a few important differences:
@@ -1551,16 +1710,38 @@ private:
     PyThreadState *tstate;
     bool disassoc;
 };
+#elif defined(PYPY_VERSION)
+class gil_scoped_acquire {
+    PyGILState_STATE state;
+public:
+    gil_scoped_acquire() { state = PyGILState_Ensure(); }
+    ~gil_scoped_acquire() { PyGILState_Release(state); }
+};
+
+class gil_scoped_release {
+    PyThreadState *state;
+public:
+    gil_scoped_release() { state = PyEval_SaveThread(); }
+    ~gil_scoped_release() { PyEval_RestoreThread(state); }
+};
 #else
 class gil_scoped_acquire { };
 class gil_scoped_release { };
 #endif
 
+error_already_set::~error_already_set() {
+    if (value) {
+        gil_scoped_acquire gil;
+        PyErr_Restore(type, value, trace);
+        PyErr_Clear();
+    }
+}
+
 inline function get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name)  {
-    handle py_object = detail::get_object_handle(this_ptr, this_type);
-    if (!py_object)
+    handle self = detail::get_object_handle(this_ptr, this_type);
+    if (!self)
         return function();
-    handle type = py_object.get_type();
+    handle type = self.get_type();
     auto key = std::make_pair(type.ptr(), name);
 
     /* Cache functions that aren't overloaded in Python to avoid
@@ -1569,31 +1750,53 @@ inline function get_type_overload(const void *this_ptr, const detail::type_info
     if (cache.find(key) != cache.end())
         return function();
 
-    function overload = getattr(py_object, name, function());
+    function overload = getattr(self, name, function());
     if (overload.is_cpp_function()) {
         cache.insert(key);
         return function();
     }
 
-    /* Don't call dispatch code if invoked from overridden function */
+    /* Don't call dispatch code if invoked from overridden function.
+       Unfortunately this doesn't work on PyPy. */
+#if !defined(PYPY_VERSION)
     PyFrameObject *frame = PyThreadState_Get()->frame;
-    if (frame && (std::string) pybind11::handle(frame->f_code->co_name).str() == name &&
+    if (frame && (std::string) str(frame->f_code->co_name) == name &&
         frame->f_code->co_argcount > 0) {
         PyFrame_FastToLocals(frame);
         PyObject *self_caller = PyDict_GetItem(
             frame->f_locals, PyTuple_GET_ITEM(frame->f_code->co_varnames, 0));
-        if (self_caller == py_object.ptr())
+        if (self_caller == self.ptr())
             return function();
     }
+#else
+    /* PyPy currently doesn't provide a detailed cpyext emulation of
+       frame objects, so we have to emulate this using Python. This
+       is going to be slow..*/
+    dict d; d["self"] = self; d["name"] = pybind11::str(name);
+    PyObject *result = PyRun_String(
+        "import inspect\n"
+        "frame = inspect.currentframe()\n"
+        "if frame is not None:\n"
+        "    frame = frame.f_back\n"
+        "    if frame is not None and str(frame.f_code.co_name) == name and "
+        "frame.f_code.co_argcount > 0:\n"
+        "        self_caller = frame.f_locals[frame.f_code.co_varnames[0]]\n"
+        "        if self_caller == self:\n"
+        "            self = None\n",
+        Py_file_input, d.ptr(), d.ptr());
+    if (result == nullptr)
+        throw error_already_set();
+    if ((handle) d["self"] == Py_None)
+        return function();
+    Py_DECREF(result);
+#endif
+
     return overload;
 }
 
 template <class T> function get_overload(const T *this_ptr, const char *name) {
-    auto &cpp_types = detail::get_internals().registered_types_cpp;
-    auto it = cpp_types.find(typeid(T));
-    if (it == cpp_types.end())
-        return function();
-    return get_type_overload(this_ptr, (const detail::type_info *) it->second, name);
+    auto tinfo = detail::get_type_info(typeid(T));
+    return tinfo ? get_type_overload(this_ptr, tinfo, name) : function();
 }
 
 #define PYBIND11_OVERLOAD_INT(ret_type, cname, name, ...) { \
diff --git a/pybind11/include/pybind11/pytypes.h b/pybind11/include/pybind11/pytypes.h
index bfe169a72..a89aad78d 100644
--- a/pybind11/include/pybind11/pytypes.h
+++ b/pybind11/include/pybind11/pytypes.h
@@ -22,6 +22,7 @@ struct arg; struct arg_v;
 
 NAMESPACE_BEGIN(detail)
 class args_proxy;
+inline bool isinstance_generic(handle obj, const std::type_info &tp);
 
 // Accessor forward declarations
 template <typename Policy> class accessor;
@@ -42,7 +43,7 @@ using tuple_accessor = accessor<accessor_policies::tuple_item>;
 
 /// Tag and check to identify a class which implements the Python object API
 class pyobject_tag { };
-template <typename T> using is_pyobject = std::is_base_of<pyobject_tag, T>;
+template <typename T> using is_pyobject = std::is_base_of<pyobject_tag, typename std::remove_reference<T>::type>;
 
 /// Mixin which adds common functions to handle, object and various accessors.
 /// The only requirement for `Derived` is to implement `PyObject *Derived::ptr() const`.
@@ -67,8 +68,8 @@ public:
         object call(Args&&... args) const;
 
     bool is_none() const { return derived().ptr() == Py_None; }
+    PYBIND11_DEPRECATED("Instead of obj.str(), use py::str(obj)")
     pybind11::str str() const;
-    pybind11::str repr() const;
 
     int ref_count() const { return static_cast<int>(Py_REFCNT(derived().ptr())); }
     handle get_type() const;
@@ -80,7 +81,7 @@ NAMESPACE_END(detail)
 class handle : public detail::object_api<handle> {
 public:
     handle() = default;
-    handle(PyObject *ptr) : m_ptr(ptr) { }
+    handle(PyObject *ptr) : m_ptr(ptr) { } // Allow implicit conversion from PyObject*
 
     PyObject *ptr() const { return m_ptr; }
     PyObject *&ptr() { return m_ptr; }
@@ -91,6 +92,7 @@ public:
     explicit operator bool() const { return m_ptr != nullptr; }
     bool operator==(const handle &h) const { return m_ptr == h.m_ptr; }
     bool operator!=(const handle &h) const { return m_ptr != h.m_ptr; }
+    PYBIND11_DEPRECATED("Use handle::operator bool() instead")
     bool check() const { return m_ptr != nullptr; }
 protected:
     PyObject *m_ptr = nullptr;
@@ -100,9 +102,9 @@ protected:
 class object : public handle {
 public:
     object() = default;
+    PYBIND11_DEPRECATED("Use reinterpret_borrow<object>() or reinterpret_steal<object>()")
+    object(handle h, bool is_borrowed) : handle(h) { if (is_borrowed) inc_ref(); }
     object(const object &o) : handle(o) { inc_ref(); }
-    object(const handle &h, bool borrowed) : handle(h) { if (borrowed) inc_ref(); }
-    object(PyObject *ptr, bool borrowed) : handle(ptr) { if (borrowed) inc_ref(); }
     object(object &&other) noexcept { m_ptr = other.m_ptr; other.m_ptr = nullptr; }
     ~object() { dec_ref(); }
 
@@ -133,8 +135,43 @@ public:
     template <typename T> T cast() const &;
     // Calling on an object rvalue does a move, if needed and/or possible
     template <typename T> T cast() &&;
+
+protected:
+    // Tags for choosing constructors from raw PyObject *
+    struct borrowed_t { }; static constexpr borrowed_t borrowed{};
+    struct stolen_t { }; static constexpr stolen_t stolen{};
+
+    template <typename T> friend T reinterpret_borrow(handle);
+    template <typename T> friend T reinterpret_steal(handle);
+
+public:
+    // Only accessible from derived classes and the reinterpret_* functions
+    object(handle h, borrowed_t) : handle(h) { inc_ref(); }
+    object(handle h, stolen_t) : handle(h) { }
 };
 
+/** The following functions don't do any kind of conversion, they simply declare
+    that a PyObject is a certain type and borrow or steal the reference. */
+template <typename T> T reinterpret_borrow(handle h) { return {h, object::borrowed}; }
+template <typename T> T reinterpret_steal(handle h) { return {h, object::stolen}; }
+
+/// Check if `obj` is an instance of type `T`
+template <typename T, detail::enable_if_t<std::is_base_of<object, T>::value, int> = 0>
+bool isinstance(handle obj) { return T::_check(obj); }
+
+template <typename T, detail::enable_if_t<!std::is_base_of<object, T>::value, int> = 0>
+bool isinstance(handle obj) { return detail::isinstance_generic(obj, typeid(T)); }
+
+template <> inline bool isinstance<handle>(handle obj) = delete;
+template <> inline bool isinstance<object>(handle obj) { return obj.ptr() != nullptr; }
+
+inline bool isinstance(handle obj, handle type) {
+    const auto result = PyObject_IsInstance(obj.ptr(), type.ptr());
+    if (result == -1)
+        throw error_already_set();
+    return result != 0;
+}
+
 inline bool hasattr(handle obj, handle name) {
     return PyObject_HasAttr(obj.ptr(), name.ptr()) == 1;
 }
@@ -146,30 +183,30 @@ inline bool hasattr(handle obj, const char *name) {
 inline object getattr(handle obj, handle name) {
     PyObject *result = PyObject_GetAttr(obj.ptr(), name.ptr());
     if (!result) { throw error_already_set(); }
-    return {result, false};
+    return reinterpret_steal<object>(result);
 }
 
 inline object getattr(handle obj, const char *name) {
     PyObject *result = PyObject_GetAttrString(obj.ptr(), name);
     if (!result) { throw error_already_set(); }
-    return {result, false};
+    return reinterpret_steal<object>(result);
 }
 
 inline object getattr(handle obj, handle name, handle default_) {
     if (PyObject *result = PyObject_GetAttr(obj.ptr(), name.ptr())) {
-        return {result, false};
+        return reinterpret_steal<object>(result);
     } else {
         PyErr_Clear();
-        return {default_, true};
+        return reinterpret_borrow<object>(default_);
     }
 }
 
 inline object getattr(handle obj, const char *name, handle default_) {
     if (PyObject *result = PyObject_GetAttrString(obj.ptr(), name)) {
-        return {result, false};
+        return reinterpret_steal<object>(result);
     } else {
         PyErr_Clear();
-        return {default_, true};
+        return reinterpret_borrow<object>(default_);
     }
 }
 
@@ -194,6 +231,18 @@ inline handle get_function(handle value) {
     return value;
 }
 
+// Helper aliases/functions to support implicit casting of values given to python accessors/methods.
+// When given a pyobject, this simply returns the pyobject as-is; for other C++ type, the value goes
+// through pybind11::cast(obj) to convert it to an `object`.
+template <typename T, enable_if_t<is_pyobject<T>::value, int> = 0>
+auto object_or_cast(T &&o) -> decltype(std::forward<T>(o)) { return std::forward<T>(o); }
+// The following casting version is implemented in cast.h:
+template <typename T, enable_if_t<!is_pyobject<T>::value, int> = 0>
+object object_or_cast(T &&o);
+// Match a PyObject*, which we want to convert directly to handle via its converting constructor
+inline handle object_or_cast(PyObject *ptr) { return ptr; }
+
+
 template <typename Policy>
 class accessor : public object_api<accessor<Policy>> {
     using key_type = typename Policy::key_type;
@@ -201,22 +250,27 @@ class accessor : public object_api<accessor<Policy>> {
 public:
     accessor(handle obj, key_type key) : obj(obj), key(std::move(key)) { }
 
+    // accessor overload required to override default assignment operator (templates are not allowed
+    // to replace default compiler-generated assignments).
     void operator=(const accessor &a) && { std::move(*this).operator=(handle(a)); }
     void operator=(const accessor &a) & { operator=(handle(a)); }
-    void operator=(const object &o) && { std::move(*this).operator=(handle(o)); }
-    void operator=(const object &o) & { operator=(handle(o)); }
-    void operator=(handle value) && { Policy::set(obj, key, value); }
-    void operator=(handle value) & { get_cache() = object(value, true); }
+
+    template <typename T> void operator=(T &&value) && {
+        Policy::set(obj, key, object_or_cast(std::forward<T>(value)));
+    }
+    template <typename T> void operator=(T &&value) & {
+        get_cache() = reinterpret_borrow<object>(object_or_cast(std::forward<T>(value)));
+    }
 
     template <typename T = Policy>
     PYBIND11_DEPRECATED("Use of obj.attr(...) as bool is deprecated in favor of pybind11::hasattr(obj, ...)")
-    operator enable_if_t<std::is_same<T, accessor_policies::str_attr>::value ||
+    explicit operator enable_if_t<std::is_same<T, accessor_policies::str_attr>::value ||
             std::is_same<T, accessor_policies::obj_attr>::value, bool>() const {
         return hasattr(obj, key);
     }
     template <typename T = Policy>
     PYBIND11_DEPRECATED("Use of obj[key] as bool is deprecated in favor of obj.contains(key)")
-    operator enable_if_t<std::is_same<T, accessor_policies::generic_item>::value, bool>() const {
+    explicit operator enable_if_t<std::is_same<T, accessor_policies::generic_item>::value, bool>() const {
         return obj.contains(key);
     }
 
@@ -255,7 +309,7 @@ struct generic_item {
     static object get(handle obj, handle key) {
         PyObject *result = PyObject_GetItem(obj.ptr(), key.ptr());
         if (!result) { throw error_already_set(); }
-        return {result, false};
+        return reinterpret_steal<object>(result);
     }
 
     static void set(handle obj, handle key, handle val) {
@@ -269,7 +323,7 @@ struct sequence_item {
     static object get(handle obj, size_t index) {
         PyObject *result = PySequence_GetItem(obj.ptr(), static_cast<ssize_t>(index));
         if (!result) { throw error_already_set(); }
-        return {result, true};
+        return reinterpret_borrow<object>(result);
     }
 
     static void set(handle obj, size_t index, handle val) {
@@ -286,7 +340,7 @@ struct list_item {
     static object get(handle obj, size_t index) {
         PyObject *result = PyList_GetItem(obj.ptr(), static_cast<ssize_t>(index));
         if (!result) { throw error_already_set(); }
-        return {result, true};
+        return reinterpret_borrow<object>(result);
     }
 
     static void set(handle obj, size_t index, handle val) {
@@ -303,7 +357,7 @@ struct tuple_item {
     static object get(handle obj, size_t index) {
         PyObject *result = PyTuple_GetItem(obj.ptr(), static_cast<ssize_t>(index));
         if (!result) { throw error_already_set(); }
-        return {result, true};
+        return reinterpret_borrow<object>(result);
     }
 
     static void set(handle obj, size_t index, handle val) {
@@ -363,12 +417,10 @@ public:
 template <typename T> using is_keyword = std::is_base_of<arg, T>;
 template <typename T> using is_s_unpacking = std::is_same<args_proxy, T>; // * unpacking
 template <typename T> using is_ds_unpacking = std::is_same<kwargs_proxy, T>; // ** unpacking
-template <typename T> using is_positional = bool_constant<
-    !is_keyword<T>::value && !is_s_unpacking<T>::value && !is_ds_unpacking<T>::value
->;
-template <typename T> using is_keyword_or_ds = bool_constant<
-    is_keyword<T>::value || is_ds_unpacking<T>::value
+template <typename T> using is_positional = none_of<
+    is_keyword<T>, is_s_unpacking<T>, is_ds_unpacking<T>
 >;
+template <typename T> using is_keyword_or_ds = any_of<is_keyword<T>, is_ds_unpacking<T>>;
 
 // Call argument collector forward declarations
 template <return_value_policy policy = return_value_policy::automatic_reference>
@@ -378,18 +430,30 @@ class unpacking_collector;
 
 NAMESPACE_END(detail)
 
-#define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, CvtStmt) \
+// TODO: After the deprecated constructors are removed, this macro can be simplified by
+//       inheriting ctors: `using Parent::Parent`. It's not an option right now because
+//       the `using` statement triggers the parent deprecation warning even if the ctor
+//       isn't even used.
+#define PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
     public: \
-        Name(const handle &h, bool borrowed) : Parent(h, borrowed) { CvtStmt; } \
-        /* These are deliberately not 'explicit' to allow implicit conversion from object: */ \
-        Name(const object& o): Parent(o) { CvtStmt; } \
-        Name(object&& o) noexcept : Parent(std::move(o)) { CvtStmt; } \
-        Name& operator=(object&& o) noexcept { (void) object::operator=(std::move(o)); CvtStmt; return *this; } \
-        Name& operator=(const object& o) { return static_cast<Name&>(object::operator=(o)); CvtStmt; } \
-        bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); }
+        PYBIND11_DEPRECATED("Use reinterpret_borrow<"#Name">() or reinterpret_steal<"#Name">()") \
+        Name(handle h, bool is_borrowed) : Parent(is_borrowed ? Parent(h, borrowed) : Parent(h, stolen)) { } \
+        Name(handle h, borrowed_t) : Parent(h, borrowed) { } \
+        Name(handle h, stolen_t) : Parent(h, stolen) { } \
+        PYBIND11_DEPRECATED("Use py::isinstance<py::python_type>(obj) instead") \
+        bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \
+        static bool _check(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); }
+
+#define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \
+    PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
+    /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \
+    Name(const object &o) : Parent(ConvertFun(o.ptr()), stolen) { if (!m_ptr) throw error_already_set(); }
 
 #define PYBIND11_OBJECT(Name, Parent, CheckFun) \
-    PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, )
+    PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
+    /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \
+    Name(const object &o) : Parent(o) { } \
+    Name(object &&o) : Parent(std::move(o)) { }
 
 #define PYBIND11_OBJECT_DEFAULT(Name, Parent, CheckFun) \
     PYBIND11_OBJECT(Name, Parent, CheckFun) \
@@ -397,26 +461,9 @@ NAMESPACE_END(detail)
 
 class iterator : public object {
 public:
-    PYBIND11_OBJECT_CVT(iterator, object, PyIter_Check, value = object(); ready = false)
-    iterator() : object(), value(object()), ready(false) { }
-    iterator(const iterator& it) : object(it), value(it.value), ready(it.ready) { }
-    iterator(iterator&& it) : object(std::move(it)), value(std::move(it.value)), ready(it.ready) { }
-
-    /** Caveat: this copy constructor does not (and cannot) clone the internal
+    /** Caveat: copying an iterator does not (and cannot) clone the internal
         state of the Python iterable */
-    iterator &operator=(const iterator &it) {
-        (void) object::operator=(it);
-        value = it.value;
-        ready = it.ready;
-        return *this;
-    }
-
-    iterator &operator=(iterator &&it) noexcept {
-        (void) object::operator=(std::move(it));
-        value = std::move(it.value);
-        ready = it.ready;
-        return *this;
-    }
+    PYBIND11_OBJECT_DEFAULT(iterator, object, PyIter_Check)
 
     iterator& operator++() {
         if (m_ptr)
@@ -448,11 +495,11 @@ public:
     }
 
 private:
-    void advance() { value = object(PyIter_Next(m_ptr), false); }
+    void advance() { value = reinterpret_steal<object>(PyIter_Next(m_ptr)); }
 
 private:
-    object value;
-    bool ready;
+    object value = {};
+    bool ready = false;
 };
 
 class iterable : public object {
@@ -464,16 +511,16 @@ class bytes;
 
 class str : public object {
 public:
-    PYBIND11_OBJECT_DEFAULT(str, object, detail::PyUnicode_Check_Permissive)
+    PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
 
     str(const char *c, size_t n)
-    : object(PyUnicode_FromStringAndSize(c, (ssize_t) n), false) {
+        : object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate string object!");
     }
 
     // 'explicit' is explicitly omitted from the following constructors to allow implicit conversion to py::str from C++ string-like objects
-    str(const char *c)
-        : object(PyUnicode_FromString(c), false) {
+    str(const char *c = "")
+        : object(PyUnicode_FromString(c), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate string object!");
     }
 
@@ -481,10 +528,12 @@ public:
 
     explicit str(const bytes &b);
 
+    explicit str(handle h) : object(raw_str(h.ptr()), stolen) { }
+
     operator std::string() const {
         object temp = *this;
         if (PyUnicode_Check(m_ptr)) {
-            temp = object(PyUnicode_AsUTF8String(m_ptr), false);
+            temp = reinterpret_steal<object>(PyUnicode_AsUTF8String(m_ptr));
             if (!temp)
                 pybind11_fail("Unable to extract string contents! (encoding issue)");
         }
@@ -499,6 +548,18 @@ public:
     str format(Args &&...args) const {
         return attr("format")(std::forward<Args>(args)...);
     }
+
+private:
+    /// Return string representation -- always returns a new reference, even if already a str
+    static PyObject *raw_str(PyObject *op) {
+        PyObject *str_value = PyObject_Str(op);
+#if PY_MAJOR_VERSION < 3
+        if (!str_value) throw error_already_set();
+        PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr);
+        Py_XDECREF(str_value); str_value = unicode;
+#endif
+        return str_value;
+    }
 };
 
 inline namespace literals {
@@ -508,16 +569,16 @@ inline str operator"" _s(const char *s, size_t size) { return {s, size}; }
 
 class bytes : public object {
 public:
-    PYBIND11_OBJECT_DEFAULT(bytes, object, PYBIND11_BYTES_CHECK)
+    PYBIND11_OBJECT(bytes, object, PYBIND11_BYTES_CHECK)
 
     // Allow implicit conversion:
-    bytes(const char *c)
-    : object(PYBIND11_BYTES_FROM_STRING(c), false) {
+    bytes(const char *c = "")
+        : object(PYBIND11_BYTES_FROM_STRING(c), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate bytes object!");
     }
 
     bytes(const char *c, size_t n)
-    : object(PYBIND11_BYTES_FROM_STRING_AND_SIZE(c, (ssize_t) n), false) {
+        : object(PYBIND11_BYTES_FROM_STRING_AND_SIZE(c, (ssize_t) n), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate bytes object!");
     }
 
@@ -538,7 +599,7 @@ public:
 inline bytes::bytes(const pybind11::str &s) {
     object temp = s;
     if (PyUnicode_Check(s.ptr())) {
-        temp = object(PyUnicode_AsUTF8String(s.ptr()), false);
+        temp = reinterpret_steal<object>(PyUnicode_AsUTF8String(s.ptr()));
         if (!temp)
             pybind11_fail("Unable to extract string contents! (encoding issue)");
     }
@@ -546,7 +607,7 @@ inline bytes::bytes(const pybind11::str &s) {
     ssize_t length;
     if (PYBIND11_BYTES_AS_STRING_AND_SIZE(temp.ptr(), &buffer, &length))
         pybind11_fail("Unable to extract string contents! (invalid type)");
-    auto obj = object(PYBIND11_BYTES_FROM_STRING_AND_SIZE(buffer, length), false);
+    auto obj = reinterpret_steal<object>(PYBIND11_BYTES_FROM_STRING_AND_SIZE(buffer, length));
     if (!obj)
         pybind11_fail("Could not allocate bytes object!");
     m_ptr = obj.release().ptr();
@@ -557,7 +618,7 @@ inline str::str(const bytes& b) {
     ssize_t length;
     if (PYBIND11_BYTES_AS_STRING_AND_SIZE(b.ptr(), &buffer, &length))
         pybind11_fail("Unable to extract bytes contents!");
-    auto obj = object(PyUnicode_FromStringAndSize(buffer, (ssize_t) length), false);
+    auto obj = reinterpret_steal<object>(PyUnicode_FromStringAndSize(buffer, (ssize_t) length));
     if (!obj)
         pybind11_fail("Could not allocate string object!");
     m_ptr = obj.release().ptr();
@@ -566,20 +627,30 @@ inline str::str(const bytes& b) {
 class none : public object {
 public:
     PYBIND11_OBJECT(none, object, detail::PyNone_Check)
-    none() : object(Py_None, true) { }
+    none() : object(Py_None, borrowed) { }
 };
 
 class bool_ : public object {
 public:
-    PYBIND11_OBJECT_DEFAULT(bool_, object, PyBool_Check)
+    PYBIND11_OBJECT_CVT(bool_, object, PyBool_Check, raw_bool)
+    bool_() : object(Py_False, borrowed) { }
     // Allow implicit conversion from and to `bool`:
-    bool_(bool value) : object(value ? Py_True : Py_False, true) { }
+    bool_(bool value) : object(value ? Py_True : Py_False, borrowed) { }
     operator bool() const { return m_ptr && PyLong_AsLong(m_ptr) != 0; }
+
+private:
+    /// Return the truth value of an object -- always returns a new reference
+    static PyObject *raw_bool(PyObject *op) {
+        const auto value = PyObject_IsTrue(op);
+        if (value == -1) return nullptr;
+        return handle(value ? Py_True : Py_False).inc_ref().ptr();
+    }
 };
 
 class int_ : public object {
 public:
-    PYBIND11_OBJECT_DEFAULT(int_, object, PYBIND11_LONG_CHECK)
+    PYBIND11_OBJECT_CVT(int_, object, PYBIND11_LONG_CHECK, PyNumber_Long)
+    int_() : object(PyLong_FromLong(0), stolen) { }
     // Allow implicit conversion from C++ integral types:
     template <typename T,
               detail::enable_if_t<std::is_integral<T>::value, int> = 0>
@@ -617,12 +688,12 @@ public:
 
 class float_ : public object {
 public:
-    PYBIND11_OBJECT_DEFAULT(float_, object, PyFloat_Check)
+    PYBIND11_OBJECT_CVT(float_, object, PyFloat_Check, PyNumber_Float)
     // Allow implicit conversion from float/double:
-    float_(float value) : object(PyFloat_FromDouble((double) value), false) {
+    float_(float value) : object(PyFloat_FromDouble((double) value), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate float object!");
     }
-    float_(double value) : object(PyFloat_FromDouble((double) value), false) {
+    float_(double value = .0) : object(PyFloat_FromDouble((double) value), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate float object!");
     }
     operator float() const { return (float) PyFloat_AsDouble(m_ptr); }
@@ -632,7 +703,8 @@ public:
 class weakref : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(weakref, object, PyWeakref_Check)
-    explicit weakref(handle obj, handle callback = handle()) : object(PyWeakref_NewRef(obj.ptr(), callback.ptr()), false) {
+    explicit weakref(handle obj, handle callback = {})
+        : object(PyWeakref_NewRef(obj.ptr(), callback.ptr()), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate weak reference!");
     }
 };
@@ -657,9 +729,10 @@ public:
 class capsule : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(capsule, object, PyCapsule_CheckExact)
-    capsule(PyObject *obj, bool borrowed) : object(obj, borrowed) { }
+    PYBIND11_DEPRECATED("Use reinterpret_borrow<capsule>() or reinterpret_steal<capsule>()")
+    capsule(PyObject *ptr, bool is_borrowed) : object(is_borrowed ? object(ptr, borrowed) : object(ptr, stolen)) { }
     explicit capsule(const void *value, void (*destruct)(PyObject *) = nullptr)
-        : object(PyCapsule_New(const_cast<void*>(value), nullptr, destruct), false) {
+        : object(PyCapsule_New(const_cast<void*>(value), nullptr, destruct), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate capsule object!");
     }
     template <typename T> operator T *() const {
@@ -671,8 +744,8 @@ public:
 
 class tuple : public object {
 public:
-    PYBIND11_OBJECT(tuple, object, PyTuple_Check)
-    explicit tuple(size_t size = 0) : object(PyTuple_New((ssize_t) size), false) {
+    PYBIND11_OBJECT_CVT(tuple, object, PyTuple_Check, PySequence_Tuple)
+    explicit tuple(size_t size = 0) : object(PyTuple_New((ssize_t) size), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate tuple object!");
     }
     size_t size() const { return (size_t) PyTuple_Size(m_ptr); }
@@ -681,12 +754,12 @@ public:
 
 class dict : public object {
 public:
-    PYBIND11_OBJECT(dict, object, PyDict_Check)
-    dict() : object(PyDict_New(), false) {
+    PYBIND11_OBJECT_CVT(dict, object, PyDict_Check, raw_dict)
+    dict() : object(PyDict_New(), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate dict object!");
     }
     template <typename... Args,
-              typename = detail::enable_if_t<detail::all_of_t<detail::is_keyword_or_ds, Args...>::value>,
+              typename = detail::enable_if_t<detail::all_of<detail::is_keyword_or_ds<Args>...>::value>,
               // MSVC workaround: it can't compile an out-of-line definition, so defer the collector
               typename collector = detail::deferred_t<detail::unpacking_collector<>, Args...>>
     explicit dict(Args &&...args) : dict(collector(std::forward<Args>(args)...).kwargs()) { }
@@ -697,6 +770,14 @@ public:
     void clear() const { PyDict_Clear(ptr()); }
     bool contains(handle key) const { return PyDict_Contains(ptr(), key.ptr()) == 1; }
     bool contains(const char *key) const { return PyDict_Contains(ptr(), pybind11::str(key).ptr()) == 1; }
+
+private:
+    /// Call the `dict` Python type -- always returns a new reference
+    static PyObject *raw_dict(PyObject *op) {
+        if (PyDict_Check(op))
+            return handle(op).inc_ref().ptr();
+        return PyObject_CallFunctionObjArgs((PyObject *) &PyDict_Type, op, nullptr);
+    }
 };
 
 class sequence : public object {
@@ -708,13 +789,15 @@ public:
 
 class list : public object {
 public:
-    PYBIND11_OBJECT(list, object, PyList_Check)
-    explicit list(size_t size = 0) : object(PyList_New((ssize_t) size), false) {
+    PYBIND11_OBJECT_CVT(list, object, PyList_Check, PySequence_List)
+    explicit list(size_t size = 0) : object(PyList_New((ssize_t) size), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate list object!");
     }
     size_t size() const { return (size_t) PyList_Size(m_ptr); }
     detail::list_accessor operator[](size_t index) const { return {*this, index}; }
-    void append(handle h) const { PyList_Append(m_ptr, h.ptr()); }
+    template <typename T> void append(T &&val) const {
+        PyList_Append(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr());
+    }
 };
 
 class args : public tuple { PYBIND11_OBJECT_DEFAULT(args, tuple, PyTuple_Check) };
@@ -722,12 +805,14 @@ class kwargs : public dict { PYBIND11_OBJECT_DEFAULT(kwargs, dict, PyDict_Check)
 
 class set : public object {
 public:
-    PYBIND11_OBJECT(set, object, PySet_Check)
-    set() : object(PySet_New(nullptr), false) {
+    PYBIND11_OBJECT_CVT(set, object, PySet_Check, PySet_New)
+    set() : object(PySet_New(nullptr), stolen) {
         if (!m_ptr) pybind11_fail("Could not allocate set object!");
     }
     size_t size() const { return (size_t) PySet_Size(m_ptr); }
-    bool add(const object &object) const { return PySet_Add(m_ptr, object.ptr()) == 0; }
+    template <typename T> bool add(T &&val) const {
+        return PySet_Add(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr()) == 0;
+    }
     void clear() const { PySet_Clear(m_ptr); }
 };
 
@@ -783,7 +868,7 @@ public:
             pybind11_fail("Unable to create memoryview from buffer descriptor");
     }
 
-    PYBIND11_OBJECT_DEFAULT(memoryview, object, PyMemoryView_Check)
+    PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject)
 };
 
 inline size_t len(handle h) {
@@ -793,37 +878,45 @@ inline size_t len(handle h) {
     return (size_t) result;
 }
 
-NAMESPACE_BEGIN(detail)
-template <typename D> iterator object_api<D>::begin() const { return {PyObject_GetIter(derived().ptr()), false}; }
-template <typename D> iterator object_api<D>::end() const { return {nullptr, false}; }
-template <typename D> item_accessor object_api<D>::operator[](handle key) const { return {derived(), object(key, true)}; }
-template <typename D> item_accessor object_api<D>::operator[](const char *key) const { return {derived(), pybind11::str(key)}; }
-template <typename D> obj_attr_accessor object_api<D>::attr(handle key) const { return {derived(), object(key, true)}; }
-template <typename D> str_attr_accessor object_api<D>::attr(const char *key) const { return {derived(), key}; }
-template <typename D> args_proxy object_api<D>::operator*() const { return args_proxy(derived().ptr()); }
-template <typename D> template <typename T> bool object_api<D>::contains(T &&key) const {
-    return attr("__contains__")(std::forward<T>(key)).template cast<bool>();
-}
-
-template <typename D>
-pybind11::str object_api<D>::str() const {
-    PyObject *str_value = PyObject_Str(derived().ptr());
+inline str repr(handle h) {
+    PyObject *str_value = PyObject_Repr(h.ptr());
+    if (!str_value) throw error_already_set();
 #if PY_MAJOR_VERSION < 3
     PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr);
     Py_XDECREF(str_value); str_value = unicode;
+    if (!str_value) throw error_already_set();
 #endif
-    return {str_value, false};
+    return reinterpret_steal<str>(str_value);
 }
 
-template <typename D>
-pybind11::str object_api<D>::repr() const {
-    PyObject *str_value = PyObject_Repr(derived().ptr());
-#if PY_MAJOR_VERSION < 3
-    PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr);
-    Py_XDECREF(str_value); str_value = unicode;
-#endif
-    return {str_value, false};
+NAMESPACE_BEGIN(detail)
+template <typename D> iterator object_api<D>::begin() const {
+    return reinterpret_steal<iterator>(PyObject_GetIter(derived().ptr()));
+}
+template <typename D> iterator object_api<D>::end() const {
+    return {};
+}
+template <typename D> item_accessor object_api<D>::operator[](handle key) const {
+    return {derived(), reinterpret_borrow<object>(key)};
+}
+template <typename D> item_accessor object_api<D>::operator[](const char *key) const {
+    return {derived(), pybind11::str(key)};
+}
+template <typename D> obj_attr_accessor object_api<D>::attr(handle key) const {
+    return {derived(), reinterpret_borrow<object>(key)};
 }
+template <typename D> str_attr_accessor object_api<D>::attr(const char *key) const {
+    return {derived(), key};
+}
+template <typename D> args_proxy object_api<D>::operator*() const {
+    return args_proxy(derived().ptr());
+}
+template <typename D> template <typename T> bool object_api<D>::contains(T &&key) const {
+    return attr("__contains__")(std::forward<T>(key)).template cast<bool>();
+}
+
+template <typename D>
+pybind11::str object_api<D>::str() const { return pybind11::str(derived()); }
 
 template <typename D>
 handle object_api<D>::get_type() const { return (PyObject *) Py_TYPE(derived().ptr()); }
diff --git a/pybind11/include/pybind11/stl.h b/pybind11/include/pybind11/stl.h
index e5c6e3c7e..4b557bd16 100644
--- a/pybind11/include/pybind11/stl.h
+++ b/pybind11/include/pybind11/stl.h
@@ -16,12 +16,28 @@
 #include <unordered_map>
 #include <iostream>
 #include <list>
+#include <valarray>
 
 #if defined(_MSC_VER)
 #pragma warning(push)
 #pragma warning(disable: 4127) // warning C4127: Conditional expression is constant
 #endif
 
+#ifdef __has_include
+// std::optional (but including it in c++14 mode isn't allowed)
+#  if defined(PYBIND11_CPP17) && __has_include(<optional>)
+#    include <optional>
+#    define PYBIND11_HAS_OPTIONAL 1
+#  endif
+// std::experimental::optional (but not allowed in c++11 mode)
+#  if defined(PYBIND11_CPP14) && __has_include(<experimental/optional>)
+#    include <experimental/optional>
+#    if __cpp_lib_experimental_optional  // just in case
+#      define PYBIND11_HAS_EXP_OPTIONAL 1
+#    endif
+#  endif
+#endif
+
 NAMESPACE_BEGIN(pybind11)
 NAMESPACE_BEGIN(detail)
 
@@ -30,15 +46,15 @@ template <typename Type, typename Key> struct set_caster {
     using key_conv = make_caster<Key>;
 
     bool load(handle src, bool convert) {
-        pybind11::set s(src, true);
-        if (!s.check())
+        if (!isinstance<pybind11::set>(src))
             return false;
+        auto s = reinterpret_borrow<pybind11::set>(src);
         value.clear();
         key_conv conv;
         for (auto entry : s) {
             if (!conv.load(entry, convert))
                 return false;
-            value.insert((Key) conv);
+            value.insert(cast_op<Key>(conv));
         }
         return true;
     }
@@ -46,7 +62,7 @@ template <typename Type, typename Key> struct set_caster {
     static handle cast(const type &src, return_value_policy policy, handle parent) {
         pybind11::set s;
         for (auto const &value: src) {
-            object value_ = object(key_conv::cast(value, policy, parent), false);
+            auto value_ = reinterpret_steal<object>(key_conv::cast(value, policy, parent));
             if (!value_ || !s.add(value_))
                 return handle();
         }
@@ -57,14 +73,13 @@ template <typename Type, typename Key> struct set_caster {
 };
 
 template <typename Type, typename Key, typename Value> struct map_caster {
-    using type = Type;
     using key_conv   = make_caster<Key>;
     using value_conv = make_caster<Value>;
 
     bool load(handle src, bool convert) {
-        dict d(src, true);
-        if (!d.check())
+        if (!isinstance<dict>(src))
             return false;
+        auto d = reinterpret_borrow<dict>(src);
         key_conv kconv;
         value_conv vconv;
         value.clear();
@@ -72,16 +87,16 @@ template <typename Type, typename Key, typename Value> struct map_caster {
             if (!kconv.load(it.first.ptr(), convert) ||
                 !vconv.load(it.second.ptr(), convert))
                 return false;
-            value.emplace((Key) kconv, (Value) vconv);
+            value.emplace(cast_op<Key>(kconv), cast_op<Value>(vconv));
         }
         return true;
     }
 
-    static handle cast(const type &src, return_value_policy policy, handle parent) {
+    static handle cast(const Type &src, return_value_policy policy, handle parent) {
         dict d;
         for (auto const &kv: src) {
-            object key = object(key_conv::cast(kv.first, policy, parent), false);
-            object value = object(value_conv::cast(kv.second, policy, parent), false);
+            auto key = reinterpret_steal<object>(key_conv::cast(kv.first, policy, parent));
+            auto value = reinterpret_steal<object>(value_conv::cast(kv.second, policy, parent));
             if (!key || !value)
                 return handle();
             d[key] = value;
@@ -89,41 +104,42 @@ template <typename Type, typename Key, typename Value> struct map_caster {
         return d.release();
     }
 
-    PYBIND11_TYPE_CASTER(type, _("Dict[") + key_conv::name() + _(", ") + value_conv::name() + _("]"));
+    PYBIND11_TYPE_CASTER(Type, _("Dict[") + key_conv::name() + _(", ") + value_conv::name() + _("]"));
 };
 
 template <typename Type, typename Value> struct list_caster {
-    using type = Type;
     using value_conv = make_caster<Value>;
 
     bool load(handle src, bool convert) {
-        sequence s(src, true);
-        if (!s.check())
+        if (!isinstance<sequence>(src))
             return false;
+        auto s = reinterpret_borrow<sequence>(src);
         value_conv conv;
         value.clear();
         reserve_maybe(s, &value);
         for (auto it : s) {
             if (!conv.load(it, convert))
                 return false;
-            value.push_back((Value) conv);
+            value.push_back(cast_op<Value>(conv));
         }
         return true;
     }
 
+private:
     template <typename T = Type,
               enable_if_t<std::is_same<decltype(std::declval<T>().reserve(0)), void>::value, int> = 0>
     void reserve_maybe(sequence s, Type *) { value.reserve(s.size()); }
     void reserve_maybe(sequence, void *) { }
 
+public:
     static handle cast(const Type &src, return_value_policy policy, handle parent) {
         list l(src.size());
         size_t index = 0;
         for (auto const &value: src) {
-            object value_ = object(value_conv::cast(value, policy, parent), false);
+            auto value_ = reinterpret_steal<object>(value_conv::cast(value, policy, parent));
             if (!value_)
                 return handle();
-            PyList_SET_ITEM(l.ptr(), index++, value_.release().ptr()); // steals a reference
+            PyList_SET_ITEM(l.ptr(), (ssize_t) index++, value_.release().ptr()); // steals a reference
         }
         return l.release();
     }
@@ -137,40 +153,59 @@ template <typename Type, typename Alloc> struct type_caster<std::vector<Type, Al
 template <typename Type, typename Alloc> struct type_caster<std::list<Type, Alloc>>
  : list_caster<std::list<Type, Alloc>, Type> { };
 
-template <typename Type, size_t Size> struct type_caster<std::array<Type, Size>> {
-    using array_type = std::array<Type, Size>;
-    using value_conv = make_caster<Type>;
+template <typename ArrayType, typename Value, bool Resizable, size_t Size = 0> struct array_caster {
+    using value_conv = make_caster<Value>;
 
+private:
+    template <bool R = Resizable>
+    bool require_size(enable_if_t<R, size_t> size) {
+        if (value.size() != size)
+            value.resize(size);
+        return true;
+    }
+    template <bool R = Resizable>
+    bool require_size(enable_if_t<!R, size_t> size) {
+        return size == Size;
+    }
+
+public:
     bool load(handle src, bool convert) {
-        list l(src, true);
-        if (!l.check())
+        if (!isinstance<list>(src))
             return false;
-        if (l.size() != Size)
+        auto l = reinterpret_borrow<list>(src);
+        if (!require_size(l.size()))
             return false;
         value_conv conv;
         size_t ctr = 0;
         for (auto it : l) {
             if (!conv.load(it, convert))
                 return false;
-            value[ctr++] = (Type) conv;
+            value[ctr++] = cast_op<Value>(conv);
         }
         return true;
     }
 
-    static handle cast(const array_type &src, return_value_policy policy, handle parent) {
-        list l(Size);
+    static handle cast(const ArrayType &src, return_value_policy policy, handle parent) {
+        list l(src.size());
         size_t index = 0;
         for (auto const &value: src) {
-            object value_ = object(value_conv::cast(value, policy, parent), false);
+            auto value_ = reinterpret_steal<object>(value_conv::cast(value, policy, parent));
             if (!value_)
                 return handle();
-            PyList_SET_ITEM(l.ptr(), index++, value_.release().ptr()); // steals a reference
+            PyList_SET_ITEM(l.ptr(), (ssize_t) index++, value_.release().ptr()); // steals a reference
         }
         return l.release();
     }
-    PYBIND11_TYPE_CASTER(array_type, _("List[") + value_conv::name() + _("[") + _<Size>() + _("]]"));
+
+    PYBIND11_TYPE_CASTER(ArrayType, _("List[") + value_conv::name() + _<Resizable>(_(""), _("[") + _<Size>() + _("]")) + _("]"));
 };
 
+template <typename Type, size_t Size> struct type_caster<std::array<Type, Size>>
+ : array_caster<std::array<Type, Size>, Type, false, Size> { };
+
+template <typename Type> struct type_caster<std::valarray<Type>>
+ : array_caster<std::valarray<Type>, Type, true> { };
+
 template <typename Key, typename Compare, typename Alloc> struct type_caster<std::set<Key, Compare, Alloc>>
   : set_caster<std::set<Key, Compare, Alloc>, Key> { };
 
@@ -183,10 +218,54 @@ template <typename Key, typename Value, typename Compare, typename Alloc> struct
 template <typename Key, typename Value, typename Hash, typename Equal, typename Alloc> struct type_caster<std::unordered_map<Key, Value, Hash, Equal, Alloc>>
   : map_caster<std::unordered_map<Key, Value, Hash, Equal, Alloc>, Key, Value> { };
 
+// This type caster is intended to be used for std::optional and std::experimental::optional
+template<typename T> struct optional_caster {
+    using value_conv = make_caster<typename T::value_type>;
+
+    static handle cast(const T& src, return_value_policy policy, handle parent) {
+        if (!src)
+            return none().inc_ref();
+        return value_conv::cast(*src, policy, parent);
+    }
+
+    bool load(handle src, bool convert) {
+        if (!src) {
+            return false;
+        } else if (src.is_none()) {
+            value = {};  // nullopt
+            return true;
+        }
+        value_conv inner_caster;
+        if (!inner_caster.load(src, convert))
+            return false;
+
+        value.emplace(cast_op<typename T::value_type>(inner_caster));
+        return true;
+    }
+
+    PYBIND11_TYPE_CASTER(T, _("Optional[") + value_conv::name() + _("]"));
+};
+
+#if PYBIND11_HAS_OPTIONAL
+template<typename T> struct type_caster<std::optional<T>>
+    : public optional_caster<std::optional<T>> {};
+
+template<> struct type_caster<std::nullopt_t>
+    : public void_caster<std::nullopt_t> {};
+#endif
+
+#if PYBIND11_HAS_EXP_OPTIONAL
+template<typename T> struct type_caster<std::experimental::optional<T>>
+    : public optional_caster<std::experimental::optional<T>> {};
+
+template<> struct type_caster<std::experimental::nullopt_t>
+    : public void_caster<std::experimental::nullopt_t> {};
+#endif
+
 NAMESPACE_END(detail)
 
 inline std::ostream &operator<<(std::ostream &os, const handle &obj) {
-    os << (std::string) obj.str();
+    os << (std::string) str(obj);
     return os;
 }
 
diff --git a/pybind11/include/pybind11/stl_bind.h b/pybind11/include/pybind11/stl_bind.h
index 24963aaa0..d1d45e2c0 100644
--- a/pybind11/include/pybind11/stl_bind.h
+++ b/pybind11/include/pybind11/stl_bind.h
@@ -60,18 +60,21 @@ struct is_comparable<T, enable_if_t<container_traits<T>::is_pair>> {
 };
 
 /* Fallback functions */
-template <typename, typename, typename... Args> void vector_if_copy_constructible(const Args&...) { }
-template <typename, typename, typename... Args> void vector_if_equal_operator(const Args&...) { }
-template <typename, typename, typename... Args> void vector_if_insertion_operator(const Args&...) { }
-
-template<typename Vector, typename Class_, enable_if_t<std::is_copy_constructible<typename Vector::value_type>::value, int> = 0>
-void vector_if_copy_constructible(Class_ &cl) {
-    cl.def(pybind11::init<const Vector &>(),
-           "Copy constructor");
+template <typename, typename, typename... Args> void vector_if_copy_constructible(const Args &...) { }
+template <typename, typename, typename... Args> void vector_if_equal_operator(const Args &...) { }
+template <typename, typename, typename... Args> void vector_if_insertion_operator(const Args &...) { }
+template <typename, typename, typename... Args> void vector_modifiers(const Args &...) { }
+
+template<typename Vector, typename Class_>
+void vector_if_copy_constructible(enable_if_t<
+    std::is_copy_constructible<Vector>::value &&
+    std::is_copy_constructible<typename Vector::value_type>::value, Class_> &cl) {
+
+    cl.def(pybind11::init<const Vector &>(), "Copy constructor");
 }
 
-template<typename Vector, typename Class_, enable_if_t<is_comparable<Vector>::value, int> = 0>
-void vector_if_equal_operator(Class_ &cl) {
+template<typename Vector, typename Class_>
+void vector_if_equal_operator(enable_if_t<is_comparable<Vector>::value, Class_> &cl) {
     using T = typename Vector::value_type;
 
     cl.def(self == self);
@@ -106,71 +109,34 @@ void vector_if_equal_operator(Class_ &cl) {
     );
 }
 
-template <typename Vector, typename Class_> auto vector_if_insertion_operator(Class_ &cl, std::string const &name)
-    -> decltype(std::declval<std::ostream&>() << std::declval<typename Vector::value_type>(), void()) {
-    using size_type = typename Vector::size_type;
-
-    cl.def("__repr__",
-           [name](Vector &v) {
-            std::ostringstream s;
-            s << name << '[';
-            for (size_type i=0; i < v.size(); ++i) {
-                s << v[i];
-                if (i != v.size() - 1)
-                    s << ", ";
-            }
-            s << ']';
-            return s.str();
-        },
-        "Return the canonical string representation of this list."
-    );
-}
-
-NAMESPACE_END(detail)
-
-//
-// std::vector
-//
-template <typename Vector, typename holder_type = std::unique_ptr<Vector>, typename... Args>
-pybind11::class_<Vector, holder_type> bind_vector(pybind11::module &m, std::string const &name, Args&&... args) {
+// Vector modifiers -- requires a copyable vector_type:
+// (Technically, some of these (pop and __delitem__) don't actually require copyability, but it seems
+// silly to allow deletion but not insertion, so include them here too.)
+template <typename Vector, typename Class_>
+void vector_modifiers(enable_if_t<std::is_copy_constructible<typename Vector::value_type>::value, Class_> &cl) {
     using T = typename Vector::value_type;
     using SizeType = typename Vector::size_type;
     using DiffType = typename Vector::difference_type;
-    using ItType   = typename Vector::iterator;
-    using Class_ = pybind11::class_<Vector, holder_type>;
-
-    Class_ cl(m, name.c_str(), std::forward<Args>(args)...);
-
-    cl.def(pybind11::init<>());
-
-    // Register copy constructor (if possible)
-    detail::vector_if_copy_constructible<Vector, Class_>(cl);
-
-    // Register comparison-related operators and functions (if possible)
-    detail::vector_if_equal_operator<Vector, Class_>(cl);
 
-    // Register stream insertion operator (if possible)
-    detail::vector_if_insertion_operator<Vector, Class_>(cl, name);
+    cl.def("append",
+           [](Vector &v, const T &value) { v.push_back(value); },
+           arg("x"),
+           "Add an item to the end of the list");
 
     cl.def("__init__", [](Vector &v, iterable it) {
         new (&v) Vector();
         try {
             v.reserve(len(it));
             for (handle h : it)
-               v.push_back(h.cast<typename Vector::value_type>());
+               v.push_back(h.cast<T>());
         } catch (...) {
             v.~Vector();
             throw;
         }
     });
 
-    cl.def("append",
-           [](Vector &v, const T &value) { v.push_back(value); },
-           arg("x"),
-           "Add an item to the end of the list");
-
     cl.def("extend",
-       [](Vector &v, Vector &src) {
+       [](Vector &v, const Vector &src) {
            v.reserve(v.size() + src.size());
            v.insert(v.end(), src.begin(), src.end());
        },
@@ -209,21 +175,6 @@ pybind11::class_<Vector, holder_type> bind_vector(pybind11::module &m, std::stri
         "Remove and return the item at index ``i``"
     );
 
-    cl.def("__bool__",
-        [](const Vector &v) -> bool {
-            return !v.empty();
-        },
-        "Check whether the list is nonempty"
-    );
-
-    cl.def("__getitem__",
-        [](const Vector &v, SizeType i) -> T {
-            if (i >= v.size())
-                throw pybind11::index_error();
-            return v[i];
-        }
-    );
-
     cl.def("__setitem__",
         [](Vector &v, SizeType i, const T &t) {
             if (i >= v.size())
@@ -232,26 +183,6 @@ pybind11::class_<Vector, holder_type> bind_vector(pybind11::module &m, std::stri
         }
     );
 
-    cl.def("__delitem__",
-        [](Vector &v, SizeType i) {
-            if (i >= v.size())
-                throw pybind11::index_error();
-            v.erase(v.begin() + typename Vector::difference_type(i));
-        },
-        "Delete list elements using a slice object"
-    );
-
-    cl.def("__len__", &Vector::size);
-
-    cl.def("__iter__",
-           [](Vector &v) {
-               return pybind11::make_iterator<
-                   return_value_policy::reference_internal, ItType, ItType, T>(
-                   v.begin(), v.end());
-           },
-           pybind11::keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
-    );
-
     /// Slicing protocol
     cl.def("__getitem__",
         [](const Vector &v, slice slice) -> Vector * {
@@ -290,6 +221,15 @@ pybind11::class_<Vector, holder_type> bind_vector(pybind11::module &m, std::stri
         "Assign list elements using a slice object"
     );
 
+    cl.def("__delitem__",
+        [](Vector &v, SizeType i) {
+            if (i >= v.size())
+                throw pybind11::index_error();
+            v.erase(v.begin() + DiffType(i));
+        },
+        "Delete the list elements at index ``i``"
+    );
+
     cl.def("__delitem__",
         [](Vector &v, slice slice) {
             size_t start, stop, step, slicelength;
@@ -309,6 +249,123 @@ pybind11::class_<Vector, holder_type> bind_vector(pybind11::module &m, std::stri
         "Delete list elements using a slice object"
     );
 
+}
+
+// If the type has an operator[] that doesn't return a reference (most notably std::vector<bool>),
+// we have to access by copying; otherwise we return by reference.
+template <typename Vector> using vector_needs_copy = negation<
+    std::is_same<decltype(std::declval<Vector>()[typename Vector::size_type()]), typename Vector::value_type &>>;
+
+// The usual case: access and iterate by reference
+template <typename Vector, typename Class_>
+void vector_accessor(enable_if_t<!vector_needs_copy<Vector>::value, Class_> &cl) {
+    using T = typename Vector::value_type;
+    using SizeType = typename Vector::size_type;
+    using ItType   = typename Vector::iterator;
+
+    cl.def("__getitem__",
+        [](Vector &v, SizeType i) -> T & {
+            if (i >= v.size())
+                throw pybind11::index_error();
+            return v[i];
+        },
+        return_value_policy::reference_internal // ref + keepalive
+    );
+
+    cl.def("__iter__",
+           [](Vector &v) {
+               return pybind11::make_iterator<
+                   return_value_policy::reference_internal, ItType, ItType, T&>(
+                   v.begin(), v.end());
+           },
+           keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
+    );
+}
+
+// The case for special objects, like std::vector<bool>, that have to be returned-by-copy:
+template <typename Vector, typename Class_>
+void vector_accessor(enable_if_t<vector_needs_copy<Vector>::value, Class_> &cl) {
+    using T = typename Vector::value_type;
+    using SizeType = typename Vector::size_type;
+    using ItType   = typename Vector::iterator;
+    cl.def("__getitem__",
+        [](const Vector &v, SizeType i) -> T {
+            if (i >= v.size())
+                throw pybind11::index_error();
+            return v[i];
+        }
+    );
+
+    cl.def("__iter__",
+           [](Vector &v) {
+               return pybind11::make_iterator<
+                   return_value_policy::copy, ItType, ItType, T>(
+                   v.begin(), v.end());
+           },
+           keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
+    );
+}
+
+template <typename Vector, typename Class_> auto vector_if_insertion_operator(Class_ &cl, std::string const &name)
+    -> decltype(std::declval<std::ostream&>() << std::declval<typename Vector::value_type>(), void()) {
+    using size_type = typename Vector::size_type;
+
+    cl.def("__repr__",
+           [name](Vector &v) {
+            std::ostringstream s;
+            s << name << '[';
+            for (size_type i=0; i < v.size(); ++i) {
+                s << v[i];
+                if (i != v.size() - 1)
+                    s << ", ";
+            }
+            s << ']';
+            return s.str();
+        },
+        "Return the canonical string representation of this list."
+    );
+}
+
+NAMESPACE_END(detail)
+
+//
+// std::vector
+//
+template <typename Vector, typename holder_type = std::unique_ptr<Vector>, typename... Args>
+pybind11::class_<Vector, holder_type> bind_vector(pybind11::module &m, std::string const &name, Args&&... args) {
+    using Class_ = pybind11::class_<Vector, holder_type>;
+
+    Class_ cl(m, name.c_str(), std::forward<Args>(args)...);
+
+    cl.def(pybind11::init<>());
+
+    // Register copy constructor (if possible)
+    detail::vector_if_copy_constructible<Vector, Class_>(cl);
+
+    // Register comparison-related operators and functions (if possible)
+    detail::vector_if_equal_operator<Vector, Class_>(cl);
+
+    // Register stream insertion operator (if possible)
+    detail::vector_if_insertion_operator<Vector, Class_>(cl, name);
+
+    // Modifiers require copyable vector value type
+    detail::vector_modifiers<Vector, Class_>(cl);
+
+    // Accessor and iterator; return by value if copyable, otherwise we return by ref + keep-alive
+    detail::vector_accessor<Vector, Class_>(cl);
+
+    cl.def("__bool__",
+        [](const Vector &v) -> bool {
+            return !v.empty();
+        },
+        "Check whether the list is nonempty"
+    );
+
+    cl.def("__len__", &Vector::size);
+
+
+
+
 #if 0
     // C++ style functions deprecated, leaving it here as an example
     cl.def(pybind11::init<size_type>());
@@ -361,9 +418,12 @@ pybind11::class_<Vector, holder_type> bind_vector(pybind11::module &m, std::stri
 NAMESPACE_BEGIN(detail)
 
 /* Fallback functions */
-template <typename, typename, typename... Args> void map_if_insertion_operator(const Args&...) { }
+template <typename, typename, typename... Args> void map_if_insertion_operator(const Args &...) { }
+template <typename, typename, typename... Args> void map_assignment(const Args &...) { }
 
-template <typename Map, typename Class_, typename... Args> void map_if_copy_assignable(Class_ &cl, const Args&...) {
+// Map assignment when copy-assignable: just copy the value
+template <typename Map, typename Class_>
+void map_assignment(enable_if_t<std::is_copy_assignable<typename Map::mapped_type>::value, Class_> &cl) {
     using KeyType = typename Map::key_type;
     using MappedType = typename Map::mapped_type;
 
@@ -376,19 +436,23 @@ template <typename Map, typename Class_, typename... Args> void map_if_copy_assi
     );
 }
 
-template<typename Map, typename Class_, enable_if_t<!std::is_copy_assignable<typename Map::mapped_type>::value, int> = 0>
-void map_if_copy_assignable(Class_ &cl) {
+// Not copy-assignable, but still copy-constructible: we can update the value by erasing and reinserting
+template<typename Map, typename Class_>
+void map_assignment(enable_if_t<
+        !std::is_copy_assignable<typename Map::mapped_type>::value &&
+        std::is_copy_constructible<typename Map::mapped_type>::value,
+        Class_> &cl) {
     using KeyType = typename Map::key_type;
     using MappedType = typename Map::mapped_type;
 
     cl.def("__setitem__",
            [](Map &m, const KeyType &k, const MappedType &v) {
                // We can't use m[k] = v; because value type might not be default constructable
-               auto r = m.insert(std::make_pair(k, v));
+               auto r = m.emplace(k, v);
                if (!r.second) {
-                   // value type might be const so the only way to insert it is to erase it first...
+                   // value type is not copy assignable so the only way to insert it is to erase it first...
                    m.erase(r.first);
-                   m.insert(std::make_pair(k, v));
+                   m.emplace(k, v);
                }
            }
     );
@@ -415,6 +479,8 @@ template <typename Map, typename Class_> auto map_if_insertion_operator(Class_ &
         "Return the canonical string representation of this map."
     );
 }
+
+
 NAMESPACE_END(detail)
 
 template <typename Map, typename holder_type = std::unique_ptr<Map>, typename... Args>
@@ -446,15 +512,17 @@ pybind11::class_<Map, holder_type> bind_map(module &m, const std::string &name,
     );
 
     cl.def("__getitem__",
-           [](Map &m, const KeyType &k) -> MappedType {
-               auto it = m.find(k);
-               if (it == m.end())
-                  throw pybind11::key_error();
-               return it->second;
-           }
+        [](Map &m, const KeyType &k) -> MappedType & {
+            auto it = m.find(k);
+            if (it == m.end())
+              throw pybind11::key_error();
+           return it->second;
+        },
+        return_value_policy::reference_internal // ref + keepalive
     );
 
-    detail::map_if_copy_assignable<Map, Class_>(cl);
+    // Assignment provided only if the type is copyable
+    detail::map_assignment<Map, Class_>(cl);
 
     cl.def("__delitem__",
            [](Map &m, const KeyType &k) {
diff --git a/pybind11/pybind11/__init__.py b/pybind11/pybind11/__init__.py
index dc0ddf6f3..a765692fe 100644
--- a/pybind11/pybind11/__init__.py
+++ b/pybind11/pybind11/__init__.py
@@ -1,4 +1,4 @@
-from ._version import version_info, __version__
+from ._version import version_info, __version__  # noqa: F401 imported but unused
 
 
 def get_include(*args, **kwargs):
diff --git a/pybind11/pybind11/_version.py b/pybind11/pybind11/_version.py
index be66f0732..e67a37027 100644
--- a/pybind11/pybind11/_version.py
+++ b/pybind11/pybind11/_version.py
@@ -1,2 +1,2 @@
-version_info = (1, 9, 'dev0')
+version_info = (2, 0, 1)
 __version__ = '.'.join(map(str, version_info))
diff --git a/pybind11/setup.cfg b/pybind11/setup.cfg
index 3c6e79cf3..9e5e88d82 100644
--- a/pybind11/setup.cfg
+++ b/pybind11/setup.cfg
@@ -1,2 +1,10 @@
 [bdist_wheel]
 universal=1
+
+[flake8]
+max-line-length = 99
+show_source = True
+exclude = .git, __pycache__, build, dist, docs, tools, venv
+ignore =
+    # required for pretty matrix formating: multiple spaces after `,` and `[`
+    E201, E241
diff --git a/pybind11/setup.py b/pybind11/setup.py
index 425e6c57a..a25f1af40 100644
--- a/pybind11/setup.py
+++ b/pybind11/setup.py
@@ -18,17 +18,20 @@ setup(
     headers=[
         'include/pybind11/attr.h',
         'include/pybind11/cast.h',
+        'include/pybind11/chrono.h',
+        'include/pybind11/common.h',
         'include/pybind11/complex.h',
         'include/pybind11/descr.h',
         'include/pybind11/eigen.h',
+        'include/pybind11/eval.h',
+        'include/pybind11/functional.h',
         'include/pybind11/numpy.h',
+        'include/pybind11/operators.h',
+        'include/pybind11/options.h',
         'include/pybind11/pybind11.h',
+        'include/pybind11/pytypes.h',
         'include/pybind11/stl.h',
         'include/pybind11/stl_bind.h',
-        'include/pybind11/common.h',
-        'include/pybind11/functional.h',
-        'include/pybind11/operators.h',
-        'include/pybind11/pytypes.h',
         'include/pybind11/typeid.h'
     ],
     classifiers=[
@@ -43,14 +46,15 @@ setup(
         'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
-        'License :: OSI Approved :: BSD License',
+        'Programming Language :: Python :: 3.6',
+        'License :: OSI Approved :: BSD License'
     ],
     keywords='C++11, Python bindings',
-    long_description="""pybind11 is a lightweight header library that exposes
-C++ types in Python and vice versa, mainly to create Python bindings of
+    long_description="""pybind11 is a lightweight header-only library that
+exposes C++ types in Python and vice versa, mainly to create Python bindings of
 existing C++ code. Its goals and syntax are similar to the excellent
-Boost.Python library by David Abrahams: to minimize boilerplate code in
-traditional extension modules by inferring type information using compile-time
+Boost.Python by David Abrahams: to minimize boilerplate code in traditional
+extension modules by inferring type information using compile-time
 introspection.
 
 The main issue with Boost.Python-and the reason for creating such a similar
@@ -63,9 +67,9 @@ become an excessively large and unnecessary dependency.
 
 Think of this library as a tiny self-contained version of Boost.Python with
 everything stripped away that isn't relevant for binding generation. Without
-comments, the core header files only require ~2.5K lines of code and depend on
-Python (2.7 or 3.x) and the C++ standard library. This compact implementation
-was possible thanks to some of the new C++11 language features (specifically:
-tuples, lambda functions and variadic templates). Since its creation, this
-library has grown beyond Boost.Python in many ways, leading to dramatically
-simpler binding code in many common situations.""")
+comments, the core header files only require ~4K lines of code and depend on
+Python (2.7 or 3.x, or PyPy2.7 >= 5.7) and the C++ standard library. This
+compact implementation was possible thanks to some of the new C++11 language
+features (specifically: tuples, lambda functions and variadic templates). Since
+its creation, this library has grown beyond Boost.Python in many ways, leading
+to dramatically simpler binding code in many common situations.""")
diff --git a/pybind11/tests/CMakeLists.txt b/pybind11/tests/CMakeLists.txt
index 7470c5d93..763ad54a0 100644
--- a/pybind11/tests/CMakeLists.txt
+++ b/pybind11/tests/CMakeLists.txt
@@ -5,6 +5,7 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
     "MinSizeRel" "RelWithDebInfo")
 endif()
 
+# Full set of test files (you can override these; see below)
 set(PYBIND11_TEST_FILES
   test_alias_initialization.cpp
   test_buffers.cpp
@@ -13,6 +14,7 @@ set(PYBIND11_TEST_FILES
   test_class_args.cpp
   test_constants_and_functions.cpp
   test_copy_move_policies.cpp
+  test_docstring_options.cpp
   test_eigen.cpp
   test_enum.cpp
   test_eval.cpp
@@ -37,6 +39,14 @@ set(PYBIND11_TEST_FILES
   test_virtual_functions.cpp
 )
 
+# Invoking cmake with something like:
+#     cmake -DPYBIND11_TEST_OVERRIDE="test_issues.cpp;test_picking.cpp" ..
+# lets you override the tests that get compiled and run.  You can restore to all tests with:
+#     cmake -DPYBIND11_TEST_OVERRIDE= ..
+if (PYBIND11_TEST_OVERRIDE)
+  set(PYBIND11_TEST_FILES ${PYBIND11_TEST_OVERRIDE})
+endif()
+
 string(REPLACE ".cpp" ".py" PYBIND11_PYTEST_FILES "${PYBIND11_TEST_FILES}")
 
 # Check if Eigen is available; if not, remove from PYBIND11_TEST_FILES (but
@@ -78,7 +88,7 @@ endif()
 
 # Make sure pytest is found or produce a fatal error
 if(NOT PYBIND11_PYTEST_FOUND)
-  execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pytest --version --noconftest OUTPUT_QUIET ERROR_QUIET
+  execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import pytest" OUTPUT_QUIET ERROR_QUIET
                   RESULT_VARIABLE PYBIND11_EXEC_PYTHON_ERR)
   if(PYBIND11_EXEC_PYTHON_ERR)
     message(FATAL_ERROR "Running the tests requires pytest.  Please install it manually (try: ${PYTHON_EXECUTABLE} -m pip install pytest)")
@@ -89,3 +99,66 @@ endif()
 # A single command to compile and run the tests
 add_custom_target(pytest COMMAND ${PYTHON_EXECUTABLE} -m pytest -rws ${PYBIND11_PYTEST_FILES}
                   DEPENDS pybind11_tests WORKING_DIRECTORY ${testdir})
+
+if(PYBIND11_TEST_OVERRIDE)
+  add_custom_command(TARGET pytest POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E echo "Note: not all tests run: -DPYBIND11_TEST_OVERRIDE is in effect")
+endif()
+
+# And another to show the .so size and, if a previous size, compare it:
+add_custom_command(TARGET pybind11_tests POST_BUILD
+  COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/libsize.py
+  $<TARGET_FILE:pybind11_tests> ${CMAKE_CURRENT_BINARY_DIR}/sosize-$<TARGET_FILE_NAME:pybind11_tests>.txt)
+
+# Test CMake build using functions and targets from subdirectory or installed location
+add_custom_target(test_cmake_build)
+if(NOT CMAKE_VERSION VERSION_LESS 3.1)
+  # 3.0 needed for interface library for subdirectory_target/installed_target
+  # 3.1 needed for cmake -E env for testing
+
+  include(CMakeParseArguments)
+  function(pybind11_add_build_test name)
+    cmake_parse_arguments(ARG "INSTALL" "" "" ${ARGN})
+
+    set(build_options "-DCMAKE_PREFIX_PATH=${PROJECT_BINARY_DIR}/mock_install"
+                      "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
+                      "-DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}"
+                      "-DPYBIND11_CPP_STANDARD=${PYBIND11_CPP_STANDARD}")
+    if(NOT ARG_INSTALL)
+      list(APPEND build_options "-DPYBIND11_PROJECT_DIR=${PROJECT_SOURCE_DIR}")
+    endif()
+
+    add_custom_target(test_${name} ${CMAKE_CTEST_COMMAND}
+      --quiet --output-log test_cmake_build/${name}.log
+      --build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/test_cmake_build/${name}"
+                       "${CMAKE_CURRENT_BINARY_DIR}/test_cmake_build/${name}"
+      --build-config Release
+      --build-noclean
+      --build-generator ${CMAKE_GENERATOR}
+      $<$<BOOL:${CMAKE_GENERATOR_PLATFORM}>:--build-generator-platform> ${CMAKE_GENERATOR_PLATFORM}
+      --build-makeprogram ${CMAKE_MAKE_PROGRAM}
+      --build-target check
+      --build-options ${build_options}
+    )
+    if(ARG_INSTALL)
+      add_dependencies(test_${name} mock_install)
+    endif()
+    add_dependencies(test_cmake_build test_${name})
+  endfunction()
+
+  pybind11_add_build_test(subdirectory_function)
+  pybind11_add_build_test(subdirectory_target)
+
+  if(PYBIND11_INSTALL)
+    add_custom_target(mock_install ${CMAKE_COMMAND}
+      "-DCMAKE_INSTALL_PREFIX=${PROJECT_BINARY_DIR}/mock_install"
+      -P "${PROJECT_BINARY_DIR}/cmake_install.cmake"
+    )
+
+    pybind11_add_build_test(installed_function INSTALL)
+    pybind11_add_build_test(installed_target INSTALL)
+  endif()
+endif()
+
+# Run all the tests
+add_custom_target(check DEPENDS pytest test_cmake_build)
diff --git a/pybind11/tests/conftest.py b/pybind11/tests/conftest.py
index 1d7a24aa1..b69fd6cb2 100644
--- a/pybind11/tests/conftest.py
+++ b/pybind11/tests/conftest.py
@@ -8,13 +8,14 @@ import pytest
 import textwrap
 import difflib
 import re
-import os
 import sys
 import contextlib
+import platform
+import gc
 
 _unicode_marker = re.compile(r'u(\'[^\']*\')')
-_long_marker    = re.compile(r'([0-9])L')
-_hexadecimal    = re.compile(r'0x[0-9a-fA-F]+')
+_long_marker = re.compile(r'([0-9])L')
+_hexadecimal = re.compile(r'0x[0-9a-fA-F]+')
 
 
 def _strip_and_dedent(s):
@@ -177,6 +178,13 @@ def suppress(exception):
         pass
 
 
+def gc_collect():
+    ''' Run the garbage collector twice (needed when running
+    reference counting tests with PyPy) '''
+    gc.collect()
+    gc.collect()
+
+
 def pytest_namespace():
     """Add import suppression and test requirements to `pytest` namespace"""
     try:
@@ -191,6 +199,7 @@ def pytest_namespace():
         from pybind11_tests import have_eigen
     except ImportError:
         have_eigen = False
+    pypy = platform.python_implementation() == "PyPy"
 
     skipif = pytest.mark.skipif
     return {
@@ -201,6 +210,8 @@ def pytest_namespace():
                                            reason="eigen and/or numpy are not installed"),
         'requires_eigen_and_scipy': skipif(not have_eigen or not scipy,
                                            reason="eigen and/or scipy are not installed"),
+        'unsupported_on_pypy': skipif(pypy, reason="unsupported on PyPy"),
+        'gc_collect': gc_collect
     }
 
 
@@ -218,7 +229,7 @@ def _test_import_pybind11():
     """
     # noinspection PyBroadException
     try:
-        import pybind11_tests
+        import pybind11_tests  # noqa: F401 imported but unused
     except Exception as e:
         print("Failed to import pybind11_tests from pytest:")
         print("  {}: {}".format(type(e).__name__, e))
diff --git a/pybind11/tests/constructor_stats.h b/pybind11/tests/constructor_stats.h
index 5dd215f19..de5c133c1 100644
--- a/pybind11/tests/constructor_stats.h
+++ b/pybind11/tests/constructor_stats.h
@@ -56,7 +56,7 @@ from the ConstructorStats instance `.values()` method.
 In some cases, when you need to track instances of a C++ class not registered with pybind11, you
 need to add a function returning the ConstructorStats for the C++ class; this can be done with:
 
-    m.def("get_special_cstats", &ConstructorStats::get<SpecialClass>, py::return_value_policy::reference_internal)
+    m.def("get_special_cstats", &ConstructorStats::get<SpecialClass>, py::return_value_policy::reference)
 
 Finally, you can suppress the output messages, but keep the constructor tracking (for
 inspection/testing in python) by using the functions with `print_` replaced with `track_` (e.g.
@@ -85,27 +85,51 @@ public:
         created(inst);
         copy_constructions++;
     }
+
     void move_created(void *inst) {
         created(inst);
         move_constructions++;
     }
+
     void default_created(void *inst) {
         created(inst);
         default_constructions++;
     }
+
     void created(void *inst) {
         ++_instances[inst];
-    };
+    }
+
     void destroyed(void *inst) {
         if (--_instances[inst] < 0)
-            throw std::runtime_error("cstats.destroyed() called with unknown instance; potential double-destruction or a missing cstats.created()");
+            throw std::runtime_error("cstats.destroyed() called with unknown "
+                                     "instance; potential double-destruction "
+                                     "or a missing cstats.created()");
     }
 
-    int alive() {
+    static void gc() {
         // Force garbage collection to ensure any pending destructors are invoked:
+#if defined(PYPY_VERSION)
+        PyObject *globals = PyEval_GetGlobals();
+        PyObject *result = PyRun_String(
+            "import gc\n"
+            "for i in range(2):"
+            "    gc.collect()\n",
+            Py_file_input, globals, globals);
+        if (result == nullptr)
+            throw py::error_already_set();
+        Py_DECREF(result);
+#else
         py::module::import("gc").attr("collect")();
+#endif
+    }
+
+    int alive() {
+        gc();
         int total = 0;
-        for (const auto &p : _instances) if (p.second > 0) total += p.second;
+        for (const auto &p : _instances)
+            if (p.second > 0)
+                total += p.second;
         return total;
     }
 
@@ -134,6 +158,9 @@ public:
 
     // Gets constructor stats from a C++ type
     template <typename T> static ConstructorStats& get() {
+#if defined(PYPY_VERSION)
+        gc();
+#endif
         return get(typeid(T));
     }
 
diff --git a/pybind11/tests/pybind11_tests.cpp b/pybind11/tests/pybind11_tests.cpp
index 35981a0a6..9c593eee1 100644
--- a/pybind11/tests/pybind11_tests.cpp
+++ b/pybind11/tests/pybind11_tests.cpp
@@ -39,7 +39,7 @@ PYBIND11_PLUGIN(pybind11_tests) {
     for (const auto &initializer : initializers())
         initializer(m);
 
-    if (!py::hasattr(m, "have_eigen")) m.attr("have_eigen") = py::cast(false);
+    if (!py::hasattr(m, "have_eigen")) m.attr("have_eigen") = false;
 
     return m.ptr();
 }
diff --git a/pybind11/tests/test_alias_initialization.py b/pybind11/tests/test_alias_initialization.py
index b6d9e84ca..fb90cfc7b 100644
--- a/pybind11/tests/test_alias_initialization.py
+++ b/pybind11/tests/test_alias_initialization.py
@@ -1,12 +1,12 @@
 import pytest
-import gc
 
-def test_alias_delay_initialization(capture, msg):
 
-    # A only initializes its trampoline class when we inherit from it; if we
-    # just create and use an A instance directly, the trampoline initialization
-    # is bypassed and we only initialize an A() instead (for performance
-    # reasons)
+def test_alias_delay_initialization1(capture):
+    """
+    A only initializes its trampoline class when we inherit from it; if we just
+    create and use an A instance directly, the trampoline initialization is
+    bypassed and we only initialize an A() instead (for performance reasons).
+    """
     from pybind11_tests import A, call_f
 
     class B(A):
@@ -21,7 +21,7 @@ def test_alias_delay_initialization(capture, msg):
         a = A()
         call_f(a)
         del a
-        gc.collect()
+        pytest.gc_collect()
     assert capture == "A.f()"
 
     # Python version
@@ -29,7 +29,7 @@ def test_alias_delay_initialization(capture, msg):
         b = B()
         call_f(b)
         del b
-        gc.collect()
+        pytest.gc_collect()
     assert capture == """
         PyA.PyA()
         PyA.f()
@@ -37,14 +37,14 @@ def test_alias_delay_initialization(capture, msg):
         PyA.~PyA()
     """
 
-def test_alias_delay_initialization(capture, msg):
-    from pybind11_tests import A2, call_f
 
-    # A2, unlike the above, is configured to always initialize the alias; while
-    # the extra initialization and extra class layer has small virtual dispatch
-    # performance penalty, it also allows us to do more things with the
-    # trampoline class such as defining local variables and performing
-    # construction/destruction.
+def test_alias_delay_initialization2(capture):
+    """A2, unlike the above, is configured to always initialize the alias; while
+    the extra initialization and extra class layer has small virtual dispatch
+    performance penalty, it also allows us to do more things with the trampoline
+    class such as defining local variables and performing construction/destruction.
+    """
+    from pybind11_tests import A2, call_f
 
     class B2(A2):
         def __init__(self):
@@ -58,7 +58,7 @@ def test_alias_delay_initialization(capture, msg):
         a2 = A2()
         call_f(a2)
         del a2
-        gc.collect()
+        pytest.gc_collect()
     assert capture == """
         PyA2.PyA2()
         PyA2.f()
@@ -71,7 +71,7 @@ def test_alias_delay_initialization(capture, msg):
         b2 = B2()
         call_f(b2)
         del b2
-        gc.collect()
+        pytest.gc_collect()
     assert capture == """
         PyA2.PyA2()
         PyA2.f()
diff --git a/pybind11/tests/test_buffers.cpp b/pybind11/tests/test_buffers.cpp
index c3a7a9e02..057250d29 100644
--- a/pybind11/tests/test_buffers.cpp
+++ b/pybind11/tests/test_buffers.cpp
@@ -75,7 +75,7 @@ private:
 };
 
 test_initializer buffers([](py::module &m) {
-    py::class_<Matrix> mtx(m, "Matrix");
+    py::class_<Matrix> mtx(m, "Matrix", py::buffer_protocol());
 
     mtx.def(py::init<size_t, size_t>())
         /// Construct from a buffer
diff --git a/pybind11/tests/test_buffers.py b/pybind11/tests/test_buffers.py
index f0ea964d9..956839c1c 100644
--- a/pybind11/tests/test_buffers.py
+++ b/pybind11/tests/test_buffers.py
@@ -5,6 +5,33 @@ with pytest.suppress(ImportError):
     import numpy as np
 
 
+@pytest.requires_numpy
+def test_from_python():
+    with pytest.raises(RuntimeError) as excinfo:
+        Matrix(np.array([1, 2, 3]))  # trying to assign a 1D array
+    assert str(excinfo.value) == "Incompatible buffer format!"
+
+    m3 = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
+    m4 = Matrix(m3)
+
+    for i in range(m4.rows()):
+        for j in range(m4.cols()):
+            assert m3[i, j] == m4[i, j]
+
+    cstats = ConstructorStats.get(Matrix)
+    assert cstats.alive() == 1
+    del m3, m4
+    assert cstats.alive() == 0
+    assert cstats.values() == ["2x3 matrix"]
+    assert cstats.copy_constructions == 0
+    # assert cstats.move_constructions >= 0  # Don't invoke any
+    assert cstats.copy_assignments == 0
+    assert cstats.move_assignments == 0
+
+
+# PyPy: Memory leak in the "np.array(m, copy=False)" call
+# https://bitbucket.org/pypy/pypy/issues/2444
+@pytest.unsupported_on_pypy
 @pytest.requires_numpy
 def test_to_python():
     m = Matrix(5, 5)
@@ -23,35 +50,13 @@ def test_to_python():
     cstats = ConstructorStats.get(Matrix)
     assert cstats.alive() == 1
     del m
+    pytest.gc_collect()
     assert cstats.alive() == 1
     del m2  # holds an m reference
+    pytest.gc_collect()
     assert cstats.alive() == 0
     assert cstats.values() == ["5x5 matrix"]
     assert cstats.copy_constructions == 0
     # assert cstats.move_constructions >= 0  # Don't invoke any
     assert cstats.copy_assignments == 0
     assert cstats.move_assignments == 0
-
-
-@pytest.requires_numpy
-def test_from_python():
-    with pytest.raises(RuntimeError) as excinfo:
-        Matrix(np.array([1, 2, 3]))  # trying to assign a 1D array
-    assert str(excinfo.value) == "Incompatible buffer format!"
-
-    m3 = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
-    m4 = Matrix(m3)
-
-    for i in range(m4.rows()):
-        for j in range(m4.cols()):
-            assert m3[i, j] == m4[i, j]
-
-    cstats = ConstructorStats.get(Matrix)
-    assert cstats.alive() == 1
-    del m3, m4
-    assert cstats.alive() == 0
-    assert cstats.values() == ["2x3 matrix"]
-    assert cstats.copy_constructions == 0
-    # assert cstats.move_constructions >= 0  # Don't invoke any
-    assert cstats.copy_assignments == 0
-    assert cstats.move_assignments == 0
diff --git a/pybind11/tests/test_chrono.py b/pybind11/tests/test_chrono.py
index b1d4dc72f..94ca55c76 100644
--- a/pybind11/tests/test_chrono.py
+++ b/pybind11/tests/test_chrono.py
@@ -47,7 +47,7 @@ def test_chrono_duration_roundtrip():
     from pybind11_tests import test_chrono3
     import datetime
 
-    # Get the difference betwen two times (a timedelta)
+    # Get the difference between two times (a timedelta)
     date1 = datetime.datetime.today()
     date2 = datetime.datetime.today()
     diff = date2 - date1
diff --git a/pybind11/tests/test_class_args.py b/pybind11/tests/test_class_args.py
index a1fc21a7d..40cbcec9f 100644
--- a/pybind11/tests/test_class_args.py
+++ b/pybind11/tests/test_class_args.py
@@ -1,7 +1,8 @@
 
-import pytest
 
 def test_class_args():
-    # There's basically nothing to test here; just make sure the code compiled and declared its definition
+    """There's basically nothing to test here; just make sure the code compiled
+    and declared its definition
+    """
     from pybind11_tests import class_args_noop
     class_args_noop()
diff --git a/pybind11/tests/test_cmake_build/installed_function/CMakeLists.txt b/pybind11/tests/test_cmake_build/installed_function/CMakeLists.txt
new file mode 100644
index 000000000..e0c20a8a3
--- /dev/null
+++ b/pybind11/tests/test_cmake_build/installed_function/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 2.8.12)
+project(test_installed_module CXX)
+
+set(CMAKE_MODULE_PATH "")
+
+find_package(pybind11 CONFIG REQUIRED)
+message(STATUS "Found pybind11 v${pybind11_VERSION}: ${pybind11_INCLUDE_DIRS}")
+
+pybind11_add_module(test_cmake_build SHARED NO_EXTRAS ../main.cpp)
+
+add_custom_target(check ${CMAKE_COMMAND} -E env PYTHONPATH=$<TARGET_FILE_DIR:test_cmake_build>
+                  ${PYTHON_EXECUTABLE} ${PROJECT_SOURCE_DIR}/../test.py ${PROJECT_NAME})
diff --git a/pybind11/tests/test_cmake_build/installed_target/CMakeLists.txt b/pybind11/tests/test_cmake_build/installed_target/CMakeLists.txt
new file mode 100644
index 000000000..dd206592f
--- /dev/null
+++ b/pybind11/tests/test_cmake_build/installed_target/CMakeLists.txt
@@ -0,0 +1,18 @@
+cmake_minimum_required(VERSION 3.0)
+project(test_installed_target CXX)
+
+set(CMAKE_MODULE_PATH "")
+
+find_package(pybind11 CONFIG REQUIRED)
+message(STATUS "Found pybind11 v${pybind11_VERSION}: ${pybind11_INCLUDE_DIRS}")
+
+add_library(test_cmake_build MODULE ../main.cpp)
+
+target_link_libraries(test_cmake_build PRIVATE pybind11::module)
+
+# make sure result is, for example, test_installed_target.so, not libtest_installed_target.dylib
+set_target_properties(test_cmake_build PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
+                                                  SUFFIX "${PYTHON_MODULE_EXTENSION}")
+
+add_custom_target(check ${CMAKE_COMMAND} -E env PYTHONPATH=$<TARGET_FILE_DIR:test_cmake_build>
+                  ${PYTHON_EXECUTABLE} ${PROJECT_SOURCE_DIR}/../test.py ${PROJECT_NAME})
diff --git a/pybind11/tests/test_cmake_build/main.cpp b/pybind11/tests/test_cmake_build/main.cpp
new file mode 100644
index 000000000..e0f5b69c9
--- /dev/null
+++ b/pybind11/tests/test_cmake_build/main.cpp
@@ -0,0 +1,10 @@
+#include <pybind11/pybind11.h>
+namespace py = pybind11;
+
+PYBIND11_PLUGIN(test_cmake_build) {
+    py::module m("test_cmake_build");
+
+    m.def("add", [](int i, int j) { return i + j; });
+
+    return m.ptr();
+}
diff --git a/pybind11/tests/test_cmake_build/subdirectory_function/CMakeLists.txt b/pybind11/tests/test_cmake_build/subdirectory_function/CMakeLists.txt
new file mode 100644
index 000000000..278007aeb
--- /dev/null
+++ b/pybind11/tests/test_cmake_build/subdirectory_function/CMakeLists.txt
@@ -0,0 +1,8 @@
+cmake_minimum_required(VERSION 2.8.12)
+project(test_subdirectory_module CXX)
+
+add_subdirectory(${PYBIND11_PROJECT_DIR} pybind11)
+pybind11_add_module(test_cmake_build THIN_LTO ../main.cpp)
+
+add_custom_target(check ${CMAKE_COMMAND} -E env PYTHONPATH=$<TARGET_FILE_DIR:test_cmake_build>
+                  ${PYTHON_EXECUTABLE} ${PROJECT_SOURCE_DIR}/../test.py ${PROJECT_NAME})
diff --git a/pybind11/tests/test_cmake_build/subdirectory_target/CMakeLists.txt b/pybind11/tests/test_cmake_build/subdirectory_target/CMakeLists.txt
new file mode 100644
index 000000000..6b142d62a
--- /dev/null
+++ b/pybind11/tests/test_cmake_build/subdirectory_target/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.0)
+project(test_subdirectory_target CXX)
+
+add_subdirectory(${PYBIND11_PROJECT_DIR} pybind11)
+
+add_library(test_cmake_build MODULE ../main.cpp)
+
+target_link_libraries(test_cmake_build PRIVATE pybind11::module)
+
+# make sure result is, for example, test_installed_target.so, not libtest_installed_target.dylib
+set_target_properties(test_cmake_build PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
+                                                  SUFFIX "${PYTHON_MODULE_EXTENSION}")
+
+add_custom_target(check ${CMAKE_COMMAND} -E env PYTHONPATH=$<TARGET_FILE_DIR:test_cmake_build>
+                  ${PYTHON_EXECUTABLE} ${PROJECT_SOURCE_DIR}/../test.py ${PROJECT_NAME})
diff --git a/pybind11/tests/test_cmake_build/test.py b/pybind11/tests/test_cmake_build/test.py
new file mode 100644
index 000000000..1467a61dc
--- /dev/null
+++ b/pybind11/tests/test_cmake_build/test.py
@@ -0,0 +1,5 @@
+import sys
+import test_cmake_build
+
+assert test_cmake_build.add(1, 2) == 3
+print("{} imports, runs, and adds: 1 + 2 = 3".format(sys.argv[1]))
diff --git a/pybind11/tests/test_constants_and_functions.cpp b/pybind11/tests/test_constants_and_functions.cpp
index 29a27965a..653bdf6b6 100644
--- a/pybind11/tests/test_constants_and_functions.cpp
+++ b/pybind11/tests/test_constants_and_functions.cpp
@@ -23,6 +23,9 @@ std::string test_function3(int i) {
     return "test_function(" + std::to_string(i) + ")";
 }
 
+py::str test_function4(int, float) { return "test_function(int, float)"; }
+py::str test_function4(float, int) { return "test_function(float, int)"; }
+
 py::bytes return_bytes() {
     const char *data = "\x01\x00\x02\x00";
     return std::string(data, 4);
@@ -38,6 +41,26 @@ std::string print_bytes(py::bytes bytes) {
     return ret;
 }
 
+// Test that we properly handle C++17 exception specifiers (which are part of the function signature
+// in C++17).  These should all still work before C++17, but don't affect the function signature.
+namespace test_exc_sp {
+int f1(int x) noexcept { return x+1; }
+int f2(int x) noexcept(true) { return x+2; }
+int f3(int x) noexcept(false) { return x+3; }
+int f4(int x) throw() { return x+4; } // Deprecated equivalent to noexcept(true)
+struct C {
+    int m1(int x) noexcept { return x-1; }
+    int m2(int x) const noexcept { return x-2; }
+    int m3(int x) noexcept(true) { return x-3; }
+    int m4(int x) const noexcept(true) { return x-4; }
+    int m5(int x) noexcept(false) { return x-5; }
+    int m6(int x) const noexcept(false) { return x-6; }
+    int m7(int x) throw() { return x-7; }
+    int m8(int x) const throw() { return x-8; }
+};
+}
+
+
 test_initializer constants_and_functions([](py::module &m) {
     m.attr("some_constant") = py::int_(14);
 
@@ -45,6 +68,14 @@ test_initializer constants_and_functions([](py::module &m) {
     m.def("test_function", &test_function2);
     m.def("test_function", &test_function3);
 
+#if defined(PYBIND11_OVERLOAD_CAST)
+    m.def("test_function", py::overload_cast<int, float>(&test_function4));
+    m.def("test_function", py::overload_cast<float, int>(&test_function4));
+#else
+    m.def("test_function", static_cast<py::str (*)(int, float)>(&test_function4));
+    m.def("test_function", static_cast<py::str (*)(float, int)>(&test_function4));
+#endif
+
     py::enum_<MyEnum>(m, "MyEnum")
         .value("EFirstEntry", EFirstEntry)
         .value("ESecondEntry", ESecondEntry)
@@ -52,4 +83,22 @@ test_initializer constants_and_functions([](py::module &m) {
 
     m.def("return_bytes", &return_bytes);
     m.def("print_bytes", &print_bytes);
+
+    using namespace test_exc_sp;
+    py::module m2 = m.def_submodule("exc_sp");
+    py::class_<C>(m2, "C")
+        .def(py::init<>())
+        .def("m1", &C::m1)
+        .def("m2", &C::m2)
+        .def("m3", &C::m3)
+        .def("m4", &C::m4)
+        .def("m5", &C::m5)
+        .def("m6", &C::m6)
+        .def("m7", &C::m7)
+        .def("m8", &C::m8)
+        ;
+    m2.def("f1", f1);
+    m2.def("f2", f2);
+    m2.def("f3", f3);
+    m2.def("f4", f4);
 });
diff --git a/pybind11/tests/test_constants_and_functions.py b/pybind11/tests/test_constants_and_functions.py
index 2c6321e05..2a570d2e5 100644
--- a/pybind11/tests/test_constants_and_functions.py
+++ b/pybind11/tests/test_constants_and_functions.py
@@ -14,8 +14,30 @@ def test_function_overloading():
     assert test_function(MyEnum.EFirstEntry) == "test_function(enum=1)"
     assert test_function(MyEnum.ESecondEntry) == "test_function(enum=2)"
 
+    assert test_function(1, 1.0) == "test_function(int, float)"
+    assert test_function(2.0, 2) == "test_function(float, int)"
+
 
 def test_bytes():
     from pybind11_tests import return_bytes, print_bytes
 
     assert print_bytes(return_bytes()) == "bytes[1 0 2 0]"
+
+
+def test_exception_specifiers():
+    from pybind11_tests.exc_sp import C, f1, f2, f3, f4
+
+    c = C()
+    assert c.m1(2) == 1
+    assert c.m2(3) == 1
+    assert c.m3(5) == 2
+    assert c.m4(7) == 3
+    assert c.m5(10) == 5
+    assert c.m6(14) == 8
+    assert c.m7(20) == 13
+    assert c.m8(29) == 21
+
+    assert f1(33) == 34
+    assert f2(53) == 55
+    assert f3(86) == 89
+    assert f4(140) == 144
diff --git a/pybind11/tests/test_copy_move_policies.cpp b/pybind11/tests/test_copy_move_policies.cpp
index de1c6e647..6f7907c1f 100644
--- a/pybind11/tests/test_copy_move_policies.cpp
+++ b/pybind11/tests/test_copy_move_policies.cpp
@@ -21,7 +21,7 @@ struct lacking_copy_ctor : public empty<lacking_copy_ctor> {
     lacking_copy_ctor(const lacking_copy_ctor& other) = delete;
 };
 
-template <> lacking_copy_ctor empty<lacking_copy_ctor>::instance_ {};
+template <> lacking_copy_ctor empty<lacking_copy_ctor>::instance_ = {};
 
 struct lacking_move_ctor : public empty<lacking_move_ctor> {
     lacking_move_ctor() {}
@@ -29,7 +29,7 @@ struct lacking_move_ctor : public empty<lacking_move_ctor> {
     lacking_move_ctor(lacking_move_ctor&& other) = delete;
 };
 
-template <> lacking_move_ctor empty<lacking_move_ctor>::instance_ {};
+template <> lacking_move_ctor empty<lacking_move_ctor>::instance_ = {};
 
 test_initializer copy_move_policies([](py::module &m) {
     py::class_<lacking_copy_ctor>(m, "lacking_copy_ctor")
diff --git a/pybind11/tests/test_copy_move_policies.py b/pybind11/tests/test_copy_move_policies.py
index 07f09f693..edcf38075 100644
--- a/pybind11/tests/test_copy_move_policies.py
+++ b/pybind11/tests/test_copy_move_policies.py
@@ -13,5 +13,3 @@ def test_lacking_move_ctor():
     with pytest.raises(RuntimeError) as excinfo:
         lacking_move_ctor.get_one()
     assert "the object is neither movable nor copyable!" in str(excinfo.value)
-
-
diff --git a/pybind11/tests/test_docstring_options.cpp b/pybind11/tests/test_docstring_options.cpp
new file mode 100644
index 000000000..74178c272
--- /dev/null
+++ b/pybind11/tests/test_docstring_options.cpp
@@ -0,0 +1,53 @@
+/*
+    tests/test_docstring_options.cpp -- generation of docstrings and signatures
+
+    Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#include "pybind11_tests.h"
+
+struct DocstringTestFoo {
+    int value;
+    void setValue(int v) { value = v; }
+    int getValue() const { return value; }
+};
+
+test_initializer docstring_generation([](py::module &m) {
+
+    {
+        py::options options;
+        options.disable_function_signatures();
+
+        m.def("test_function1", [](int, int) {}, py::arg("a"), py::arg("b"));
+        m.def("test_function2", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+
+        options.enable_function_signatures();
+
+        m.def("test_function3", [](int, int) {}, py::arg("a"), py::arg("b"));
+        m.def("test_function4", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+
+        options.disable_function_signatures().disable_user_defined_docstrings();
+
+        m.def("test_function5", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+
+        {
+            py::options nested_options;
+            nested_options.enable_user_defined_docstrings();
+            m.def("test_function6", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+        }
+    }
+
+    m.def("test_function7", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+
+    {
+        py::options options;
+        options.disable_user_defined_docstrings();
+
+        py::class_<DocstringTestFoo>(m, "DocstringTestFoo", "This is a class docstring")
+            .def_property("value_prop", &DocstringTestFoo::getValue, &DocstringTestFoo::setValue, "This is a property docstring")
+        ;
+    }
+});
diff --git a/pybind11/tests/test_docstring_options.py b/pybind11/tests/test_docstring_options.py
new file mode 100644
index 000000000..66ad6b89f
--- /dev/null
+++ b/pybind11/tests/test_docstring_options.py
@@ -0,0 +1,32 @@
+
+
+def test_docstring_options():
+    from pybind11_tests import (test_function1, test_function2, test_function3,
+                                test_function4, test_function5, test_function6,
+                                test_function7, DocstringTestFoo)
+
+    # options.disable_function_signatures()
+    assert not test_function1.__doc__
+
+    assert test_function2.__doc__ == "A custom docstring"
+
+    # options.enable_function_signatures()
+    assert test_function3.__doc__ .startswith("test_function3(a: int, b: int) -> None")
+
+    assert test_function4.__doc__ .startswith("test_function4(a: int, b: int) -> None")
+    assert test_function4.__doc__ .endswith("A custom docstring\n")
+
+    # options.disable_function_signatures()
+    # options.disable_user_defined_docstrings()
+    assert not test_function5.__doc__
+
+    # nested options.enable_user_defined_docstrings()
+    assert test_function6.__doc__ == "A custom docstring"
+
+    # RAII destructor
+    assert test_function7.__doc__ .startswith("test_function7(a: int, b: int) -> None")
+    assert test_function7.__doc__ .endswith("A custom docstring\n")
+
+    # Suppression of user-defined docstrings for non-function objects
+    assert not DocstringTestFoo.__doc__
+    assert not DocstringTestFoo.value_prop.__doc__
diff --git a/pybind11/tests/test_eigen.cpp b/pybind11/tests/test_eigen.cpp
index a9cb9f21c..588cdceb3 100644
--- a/pybind11/tests/test_eigen.cpp
+++ b/pybind11/tests/test_eigen.cpp
@@ -40,7 +40,7 @@ test_initializer eigen([](py::module &m) {
     typedef Eigen::SparseMatrix<float, Eigen::RowMajor> SparseMatrixR;
     typedef Eigen::SparseMatrix<float> SparseMatrixC;
 
-    m.attr("have_eigen") = py::cast(true);
+    m.attr("have_eigen") = true;
 
     // Non-symmetric matrix with zero elements
     Eigen::MatrixXf mat(5, 6);
diff --git a/pybind11/tests/test_eigen.py b/pybind11/tests/test_eigen.py
index 2a58f8c40..b0092fc8b 100644
--- a/pybind11/tests/test_eigen.py
+++ b/pybind11/tests/test_eigen.py
@@ -110,9 +110,15 @@ def test_special_matrix_objects():
 def test_dense_signature(doc):
     from pybind11_tests import double_col, double_row, double_mat_rm
 
-    assert doc(double_col) == "double_col(arg0: numpy.ndarray[float32[m, 1]]) -> numpy.ndarray[float32[m, 1]]"
-    assert doc(double_row) == "double_row(arg0: numpy.ndarray[float32[1, n]]) -> numpy.ndarray[float32[1, n]]"
-    assert doc(double_mat_rm) == "double_mat_rm(arg0: numpy.ndarray[float32[m, n]]) -> numpy.ndarray[float32[m, n]]"
+    assert doc(double_col) == """
+        double_col(arg0: numpy.ndarray[float32[m, 1]]) -> numpy.ndarray[float32[m, 1]]
+    """
+    assert doc(double_row) == """
+        double_row(arg0: numpy.ndarray[float32[1, n]]) -> numpy.ndarray[float32[1, n]]
+    """
+    assert doc(double_mat_rm) == """
+        double_mat_rm(arg0: numpy.ndarray[float32[m, n]]) -> numpy.ndarray[float32[m, n]]
+    """
 
 
 @pytest.requires_eigen_and_scipy
@@ -131,5 +137,9 @@ def test_sparse():
 def test_sparse_signature(doc):
     from pybind11_tests import sparse_passthrough_r, sparse_passthrough_c
 
-    assert doc(sparse_passthrough_r) == "sparse_passthrough_r(arg0: scipy.sparse.csr_matrix[float32]) -> scipy.sparse.csr_matrix[float32]"
-    assert doc(sparse_passthrough_c) == "sparse_passthrough_c(arg0: scipy.sparse.csc_matrix[float32]) -> scipy.sparse.csc_matrix[float32]"
+    assert doc(sparse_passthrough_r) == """
+        sparse_passthrough_r(arg0: scipy.sparse.csr_matrix[float32]) -> scipy.sparse.csr_matrix[float32]
+    """  # noqa: E501 line too long
+    assert doc(sparse_passthrough_c) == """
+        sparse_passthrough_c(arg0: scipy.sparse.csc_matrix[float32]) -> scipy.sparse.csc_matrix[float32]
+    """  # noqa: E501 line too long
diff --git a/pybind11/tests/test_enum.cpp b/pybind11/tests/test_enum.cpp
index 87cb7d0d4..09f334cdb 100644
--- a/pybind11/tests/test_enum.cpp
+++ b/pybind11/tests/test_enum.cpp
@@ -19,6 +19,12 @@ enum class ScopedEnum {
     Three
 };
 
+enum Flags {
+    Read = 4,
+    Write = 2,
+    Execute = 1
+};
+
 class ClassWithUnscopedEnum {
 public:
     enum EMode {
@@ -38,15 +44,20 @@ std::string test_scoped_enum(ScopedEnum z) {
 test_initializer enums([](py::module &m) {
     m.def("test_scoped_enum", &test_scoped_enum);
 
-    py::enum_<UnscopedEnum>(m, "UnscopedEnum")
+    py::enum_<UnscopedEnum>(m, "UnscopedEnum", py::arithmetic())
         .value("EOne", EOne)
         .value("ETwo", ETwo)
         .export_values();
 
-    py::enum_<ScopedEnum>(m, "ScopedEnum")
+    py::enum_<ScopedEnum>(m, "ScopedEnum", py::arithmetic())
         .value("Two", ScopedEnum::Two)
-        .value("Three", ScopedEnum::Three)
-        ;
+        .value("Three", ScopedEnum::Three);
+
+    py::enum_<Flags>(m, "Flags", py::arithmetic())
+        .value("Read", Flags::Read)
+        .value("Write", Flags::Write)
+        .value("Execute", Flags::Execute)
+        .export_values();
 
     py::class_<ClassWithUnscopedEnum> exenum_class(m, "ClassWithUnscopedEnum");
     exenum_class.def_static("test_function", &ClassWithUnscopedEnum::test_function);
diff --git a/pybind11/tests/test_enum.py b/pybind11/tests/test_enum.py
index efabae7f7..de5f3c6f6 100644
--- a/pybind11/tests/test_enum.py
+++ b/pybind11/tests/test_enum.py
@@ -16,6 +16,20 @@ def test_unscoped_enum():
     assert int(UnscopedEnum.ETwo) == 2
     assert str(UnscopedEnum(2)) == "UnscopedEnum.ETwo"
 
+    # order
+    assert UnscopedEnum.EOne < UnscopedEnum.ETwo
+    assert UnscopedEnum.EOne < 2
+    assert UnscopedEnum.ETwo > UnscopedEnum.EOne
+    assert UnscopedEnum.ETwo > 1
+    assert UnscopedEnum.ETwo <= 2
+    assert UnscopedEnum.ETwo >= 2
+    assert UnscopedEnum.EOne <= UnscopedEnum.ETwo
+    assert UnscopedEnum.EOne <= 2
+    assert UnscopedEnum.ETwo >= UnscopedEnum.EOne
+    assert UnscopedEnum.ETwo >= 1
+    assert not (UnscopedEnum.ETwo < UnscopedEnum.EOne)
+    assert not (2 < UnscopedEnum.EOne)
+
 
 def test_scoped_enum():
     from pybind11_tests import ScopedEnum, test_scoped_enum
@@ -30,6 +44,14 @@ def test_scoped_enum():
     with pytest.raises(TypeError):
         assert z != 3
 
+    # order
+    assert ScopedEnum.Two < ScopedEnum.Three
+    assert ScopedEnum.Three > ScopedEnum.Two
+    assert ScopedEnum.Two <= ScopedEnum.Three
+    assert ScopedEnum.Two <= ScopedEnum.Two
+    assert ScopedEnum.Two >= ScopedEnum.Two
+    assert ScopedEnum.Three >= ScopedEnum.Two
+
 
 def test_implicit_conversion():
     from pybind11_tests import ClassWithUnscopedEnum
@@ -61,3 +83,26 @@ def test_implicit_conversion():
     x[f(second)] = 4
     # Hashing test
     assert str(x) == "{EMode.EFirstMode: 3, EMode.ESecondMode: 4}"
+
+
+def test_binary_operators():
+    from pybind11_tests import Flags
+
+    assert int(Flags.Read) == 4
+    assert int(Flags.Write) == 2
+    assert int(Flags.Execute) == 1
+    assert int(Flags.Read | Flags.Write | Flags.Execute) == 7
+    assert int(Flags.Read | Flags.Write) == 6
+    assert int(Flags.Read | Flags.Execute) == 5
+    assert int(Flags.Write | Flags.Execute) == 3
+    assert int(Flags.Write | 1) == 3
+
+    state = Flags.Read | Flags.Write
+    assert (state & Flags.Read) != 0
+    assert (state & Flags.Write) != 0
+    assert (state & Flags.Execute) == 0
+    assert (state & 1) == 0
+
+    state2 = ~state
+    assert state2 == -7
+    assert int(state ^ state2) == -1
diff --git a/pybind11/tests/test_eval_call.py b/pybind11/tests/test_eval_call.py
index a3349e281..53c7e721f 100644
--- a/pybind11/tests/test_eval_call.py
+++ b/pybind11/tests/test_eval_call.py
@@ -1,4 +1,4 @@
 # This file is called from 'test_eval.py'
 
 if 'call_test2' in locals():
-    call_test2(y)
+    call_test2(y)  # noqa: F821 undefined name
diff --git a/pybind11/tests/test_exceptions.cpp b/pybind11/tests/test_exceptions.cpp
index ca2afa642..706b500f2 100644
--- a/pybind11/tests/test_exceptions.cpp
+++ b/pybind11/tests/test_exceptions.cpp
@@ -88,7 +88,7 @@ void throws_logic_error() {
 
 struct PythonCallInDestructor {
     PythonCallInDestructor(const py::dict &d) : d(d) {}
-    ~PythonCallInDestructor() { d["good"] = py::cast(true); }
+    ~PythonCallInDestructor() { d["good"] = true; }
 
     py::dict d;
 };
diff --git a/pybind11/tests/test_exceptions.py b/pybind11/tests/test_exceptions.py
index a9b4b0574..0025e4eb6 100644
--- a/pybind11/tests/test_exceptions.py
+++ b/pybind11/tests/test_exceptions.py
@@ -69,6 +69,6 @@ def test_custom(msg):
     with pytest.raises(MyException5) as excinfo:
         try:
             throws5()
-        except MyException5_1 as e:
+        except MyException5_1:
             raise RuntimeError("Exception error: caught child from parent")
     assert msg(excinfo.value) == "this is a helper-defined translated exception"
diff --git a/pybind11/tests/test_inheritance.cpp b/pybind11/tests/test_inheritance.cpp
index f43edc261..2ec0b4a7a 100644
--- a/pybind11/tests/test_inheritance.cpp
+++ b/pybind11/tests/test_inheritance.cpp
@@ -83,4 +83,18 @@ test_initializer inheritance([](py::module &m) {
         return new BaseClass();
     });
     m.def("return_none", []() -> BaseClass* { return nullptr; });
+
+    m.def("test_isinstance", [](py::list l) {
+        struct Unregistered { }; // checks missing type_info code path
+
+        return py::make_tuple(
+            py::isinstance<py::tuple>(l[0]),
+            py::isinstance<py::dict>(l[1]),
+            py::isinstance<Pet>(l[2]),
+            py::isinstance<Pet>(l[3]),
+            py::isinstance<Dog>(l[4]),
+            py::isinstance<Rabbit>(l[5]),
+            py::isinstance<Unregistered>(l[6])
+        );
+    });
 });
diff --git a/pybind11/tests/test_inheritance.py b/pybind11/tests/test_inheritance.py
index 351fe6b2c..7bb52be02 100644
--- a/pybind11/tests/test_inheritance.py
+++ b/pybind11/tests/test_inheritance.py
@@ -45,3 +45,11 @@ def test_automatic_upcasting():
     assert type(return_class_n(2)).__name__ == "DerivedClass2"
     assert type(return_class_n(0)).__name__ == "BaseClass"
     assert type(return_class_n(1)).__name__ == "DerivedClass1"
+
+
+def test_isinstance():
+    from pybind11_tests import test_isinstance, Pet, Dog
+
+    objects = [tuple(), dict(), Pet("Polly", "parrot")] + [Dog("Molly")] * 4
+    expected = (True, True, True, True, True, False, False)
+    assert test_isinstance(objects) == expected
diff --git a/pybind11/tests/test_issues.cpp b/pybind11/tests/test_issues.cpp
index 29c4057f1..4c59a1b12 100644
--- a/pybind11/tests/test_issues.cpp
+++ b/pybind11/tests/test_issues.cpp
@@ -11,7 +11,7 @@
 #include "constructor_stats.h"
 #include <pybind11/stl.h>
 #include <pybind11/operators.h>
-
+#include <pybind11/complex.h>
 
 #define TRACKERS(CLASS) CLASS() { print_default_created(this); } ~CLASS() { print_destroyed(this); }
 struct NestABase { int value = -2; TRACKERS(NestABase) };
@@ -36,6 +36,45 @@ OpTest2 operator+(const OpTest2 &, const OpTest1 &) {
     return OpTest2();
 }
 
+// #461
+class Dupe1 {
+public:
+    Dupe1(int v) : v_{v} {}
+    int get_value() const { return v_; }
+private:
+    int v_;
+};
+class Dupe2 {};
+class Dupe3 {};
+class DupeException : public std::runtime_error {};
+
+// #478
+template <typename T> class custom_unique_ptr {
+public:
+    custom_unique_ptr() { print_default_created(this); }
+    custom_unique_ptr(T *ptr) : _ptr{ptr} { print_created(this, ptr); }
+    custom_unique_ptr(custom_unique_ptr<T> &&move) : _ptr{move._ptr} { move._ptr = nullptr; print_move_created(this); }
+    custom_unique_ptr &operator=(custom_unique_ptr<T> &&move) { print_move_assigned(this); if (_ptr) destruct_ptr(); _ptr = move._ptr; move._ptr = nullptr; return *this; }
+    custom_unique_ptr(const custom_unique_ptr<T> &) = delete;
+    void operator=(const custom_unique_ptr<T> &copy) = delete;
+    ~custom_unique_ptr() { print_destroyed(this); if (_ptr) destruct_ptr(); }
+private:
+    T *_ptr = nullptr;
+    void destruct_ptr() { delete _ptr; }
+};
+PYBIND11_DECLARE_HOLDER_TYPE(T, custom_unique_ptr<T>);
+
+/// Issue #528: templated constructor
+struct TplConstrClass {
+    template <typename T> TplConstrClass(const T &arg) : str{arg} {}
+    std::string str;
+    bool operator==(const TplConstrClass &t) const { return t.str == str; }
+};
+namespace std {
+template <> struct hash<TplConstrClass> { size_t operator()(const TplConstrClass &t) const { return std::hash<std::string>()(t.str); } };
+}
+
+
 void init_issues(py::module &m) {
     py::module m2 = m.def_submodule("issues");
 
@@ -237,6 +276,125 @@ void init_issues(py::module &m) {
     static std::vector<int> list = { 1, 2, 3 };
     m2.def("make_iterator_1", []() { return py::make_iterator<py::return_value_policy::copy>(list); });
     m2.def("make_iterator_2", []() { return py::make_iterator<py::return_value_policy::automatic>(list); });
+
+    static std::vector<std::string> nothrows;
+    // Issue 461: registering two things with the same name:
+    py::class_<Dupe1>(m2, "Dupe1")
+        .def("get_value", &Dupe1::get_value)
+        ;
+    m2.def("dupe1_factory", [](int v) { return new Dupe1(v); });
+
+    py::class_<Dupe2>(m2, "Dupe2");
+    py::exception<DupeException>(m2, "DupeException");
+
+    try {
+        m2.def("Dupe1", [](int v) { return new Dupe1(v); });
+        nothrows.emplace_back("Dupe1");
+    }
+    catch (std::runtime_error &) {}
+    try {
+        py::class_<Dupe3>(m2, "dupe1_factory");
+        nothrows.emplace_back("dupe1_factory");
+    }
+    catch (std::runtime_error &) {}
+    try {
+        py::exception<Dupe3>(m2, "Dupe2");
+        nothrows.emplace_back("Dupe2");
+    }
+    catch (std::runtime_error &) {}
+    try {
+        m2.def("DupeException", []() { return 30; });
+        nothrows.emplace_back("DupeException1");
+    }
+    catch (std::runtime_error &) {}
+    try {
+        py::class_<DupeException>(m2, "DupeException");
+        nothrows.emplace_back("DupeException2");
+    }
+    catch (std::runtime_error &) {}
+    m2.def("dupe_exception_failures", []() {
+        py::list l;
+        for (auto &e : nothrows) l.append(py::cast(e));
+        return l;
+    });
+
+    /// Issue #471: shared pointer instance not dellocated
+    class SharedChild : public std::enable_shared_from_this<SharedChild> {
+    public:
+        SharedChild() { print_created(this); }
+        ~SharedChild() { print_destroyed(this); }
+    };
+
+    class SharedParent {
+    public:
+        SharedParent() : child(std::make_shared<SharedChild>()) { }
+        const SharedChild &get_child() const { return *child; }
+
+    private:
+        std::shared_ptr<SharedChild> child;
+    };
+
+    py::class_<SharedChild, std::shared_ptr<SharedChild>>(m, "SharedChild");
+    py::class_<SharedParent, std::shared_ptr<SharedParent>>(m, "SharedParent")
+        .def(py::init<>())
+        .def("get_child", &SharedParent::get_child, py::return_value_policy::reference);
+
+    /// Issue/PR #478: unique ptrs constructed and freed without destruction
+    class SpecialHolderObj {
+    public:
+        int val = 0;
+        SpecialHolderObj *ch = nullptr;
+        SpecialHolderObj(int v, bool make_child = true) : val{v}, ch{make_child ? new SpecialHolderObj(val+1, false) : nullptr}
+        { print_created(this, val); }
+        ~SpecialHolderObj() { delete ch; print_destroyed(this); }
+        SpecialHolderObj *child() { return ch; }
+    };
+
+    py::class_<SpecialHolderObj, custom_unique_ptr<SpecialHolderObj>>(m, "SpecialHolderObj")
+        .def(py::init<int>())
+        .def("child", &SpecialHolderObj::child, pybind11::return_value_policy::reference_internal)
+        .def_readwrite("val", &SpecialHolderObj::val)
+        .def_static("holder_cstats", &ConstructorStats::get<custom_unique_ptr<SpecialHolderObj>>,
+                py::return_value_policy::reference);
+
+    /// Issue #484: number conversion generates unhandled exceptions
+    m2.def("test_complex", [](float x) { py::print("{}"_s.format(x)); });
+    m2.def("test_complex", [](std::complex<float> x) { py::print("({}, {})"_s.format(x.real(), x.imag())); });
+
+    /// Issue #511: problem with inheritance + overwritten def_static
+    struct MyBase {
+        static std::unique_ptr<MyBase> make() {
+            return std::unique_ptr<MyBase>(new MyBase());
+        }
+    };
+
+    struct MyDerived : MyBase {
+        static std::unique_ptr<MyDerived> make() {
+            return std::unique_ptr<MyDerived>(new MyDerived());
+        }
+    };
+
+    py::class_<MyBase>(m2, "MyBase")
+        .def_static("make", &MyBase::make);
+
+    py::class_<MyDerived, MyBase>(m2, "MyDerived")
+        .def_static("make", &MyDerived::make)
+        .def_static("make2", &MyDerived::make);
+
+    py::dict d;
+    std::string bar = "bar";
+    d["str"] = bar;
+    d["num"] = 3.7;
+
+    /// Issue #528: templated constructor
+    m2.def("tpl_constr_vector", [](std::vector<TplConstrClass> &) {});
+    m2.def("tpl_constr_map", [](std::unordered_map<TplConstrClass, TplConstrClass> &) {});
+    m2.def("tpl_constr_set", [](std::unordered_set<TplConstrClass> &) {});
+#if defined(PYBIND11_HAS_OPTIONAL)
+    m2.def("tpl_constr_optional", [](std::optional<TplConstrClass> &) {});
+#elif defined(PYBIND11_HAS_EXP_OPTIONAL)
+    m2.def("tpl_constr_optional", [](std::experimental::optional<TplConstrClass> &) {});
+#endif
 }
 
 // MSVC workaround: trying to use a lambda here crashes MSCV
diff --git a/pybind11/tests/test_issues.py b/pybind11/tests/test_issues.py
index e2ab0b45c..e60b5ca90 100644
--- a/pybind11/tests/test_issues.py
+++ b/pybind11/tests/test_issues.py
@@ -1,5 +1,5 @@
 import pytest
-import gc
+from pybind11_tests import ConstructorStats
 
 
 def test_regressions():
@@ -54,12 +54,12 @@ def test_shared_ptr_gc():
     el = ElementList()
     for i in range(10):
         el.add(ElementA(i))
-    gc.collect()
+    pytest.gc_collect()
     for i, v in enumerate(el.get()):
         assert i == v.value()
 
 
-def test_no_id(capture, msg):
+def test_no_id(msg):
     from pybind11_tests.issues import get_element, expect_float, expect_int
 
     with pytest.raises(TypeError) as excinfo:
@@ -82,7 +82,6 @@ def test_no_id(capture, msg):
     assert expect_float(12) == 12
 
 
-
 def test_str_issue(msg):
     """Issue #283: __str__ called on uninitialized instance when constructor arguments invalid"""
     from pybind11_tests.issues import StrIssue
@@ -96,7 +95,7 @@ def test_str_issue(msg):
             1. m.issues.StrIssue(arg0: int)
             2. m.issues.StrIssue()
 
-        Invoked with: no, such, constructor
+        Invoked with: 'no', 'such', 'constructor'
     """
 
 
@@ -130,13 +129,13 @@ def test_nested():
     assert c.b.a.as_base().value == 42
 
     del c
-    gc.collect()
+    pytest.gc_collect()
     del a  # Should't delete while abase is still alive
-    gc.collect()
+    pytest.gc_collect()
 
     assert abase.value == 42
     del abase, b
-    gc.collect()
+    pytest.gc_collect()
 
 
 def test_move_fallback():
@@ -146,13 +145,14 @@ def test_move_fallback():
     m1 = get_moveissue1(1)
     assert m1.value == 1
 
+
 def test_override_ref():
     from pybind11_tests.issues import OverrideTest
     o = OverrideTest("asdf")
 
     # Not allowed (see associated .cpp comment)
-    #i = o.str_ref()
-    #assert o.str_ref() == "asdf"
+    # i = o.str_ref()
+    # assert o.str_ref() == "asdf"
     assert o.str_value() == "asdf"
 
     assert o.A_value().value == "hi"
@@ -161,18 +161,22 @@ def test_override_ref():
     a.value = "bye"
     assert a.value == "bye"
 
+
 def test_operators_notimplemented(capture):
     from pybind11_tests.issues import OpTest1, OpTest2
     with capture:
-        C1, C2 = OpTest1(), OpTest2()
-        C1 + C1
-        C2 + C2
-        C2 + C1
-        C1 + C2
-    assert capture == """Add OpTest1 with OpTest1
-Add OpTest2 with OpTest2
-Add OpTest2 with OpTest1
-Add OpTest2 with OpTest1"""
+        c1, c2 = OpTest1(), OpTest2()
+        c1 + c1
+        c2 + c2
+        c2 + c1
+        c1 + c2
+    assert capture == """
+        Add OpTest1 with OpTest1
+        Add OpTest2 with OpTest2
+        Add OpTest2 with OpTest1
+        Add OpTest2 with OpTest1
+    """
+
 
 def test_iterator_rvpolicy():
     """ Issue 388: Can't make iterators via make_iterator() with different r/v policies """
@@ -181,4 +185,67 @@ def test_iterator_rvpolicy():
 
     assert list(make_iterator_1()) == [1, 2, 3]
     assert list(make_iterator_2()) == [1, 2, 3]
-    assert(type(make_iterator_1()) != type(make_iterator_2()))
+    assert not isinstance(make_iterator_1(), type(make_iterator_2()))
+
+
+def test_dupe_assignment():
+    """ Issue 461: overwriting a class with a function """
+    from pybind11_tests.issues import dupe_exception_failures
+    assert dupe_exception_failures() == []
+
+
+def test_enable_shared_from_this_with_reference_rvp():
+    """ Issue #471: shared pointer instance not dellocated """
+    from pybind11_tests import SharedParent, SharedChild
+
+    parent = SharedParent()
+    child = parent.get_child()
+
+    cstats = ConstructorStats.get(SharedChild)
+    assert cstats.alive() == 1
+    del child, parent
+    assert cstats.alive() == 0
+
+
+def test_non_destructed_holders():
+    """ Issue #478: unique ptrs constructed and freed without destruction """
+    from pybind11_tests import SpecialHolderObj
+
+    a = SpecialHolderObj(123)
+    b = a.child()
+
+    assert a.val == 123
+    assert b.val == 124
+
+    cstats = SpecialHolderObj.holder_cstats()
+    assert cstats.alive() == 1
+    del b
+    assert cstats.alive() == 1
+    del a
+    assert cstats.alive() == 0
+
+
+def test_complex_cast(capture):
+    """ Issue #484: number conversion generates unhandled exceptions """
+    from pybind11_tests.issues import test_complex
+
+    with capture:
+        test_complex(1)
+        test_complex(2j)
+
+    assert capture == """
+        1.0
+        (0.0, 2.0)
+    """
+
+
+def test_inheritance_override_def_static():
+    from pybind11_tests.issues import MyBase, MyDerived
+
+    b = MyBase.make()
+    d1 = MyDerived.make2()
+    d2 = MyDerived.make()
+
+    assert isinstance(b, MyBase)
+    assert isinstance(d1, MyDerived)
+    assert isinstance(d2, MyDerived)
diff --git a/pybind11/tests/test_keep_alive.py b/pybind11/tests/test_keep_alive.py
index c83d5d28b..bfd7d40c3 100644
--- a/pybind11/tests/test_keep_alive.py
+++ b/pybind11/tests/test_keep_alive.py
@@ -1,4 +1,4 @@
-import gc
+import pytest
 
 
 def test_keep_alive_argument(capture):
@@ -9,14 +9,14 @@ def test_keep_alive_argument(capture):
     assert capture == "Allocating parent."
     with capture:
         p.addChild(Child())
-        gc.collect()
+        pytest.gc_collect()
     assert capture == """
         Allocating child.
         Releasing child.
     """
     with capture:
         del p
-        gc.collect()
+        pytest.gc_collect()
     assert capture == "Releasing parent."
 
     with capture:
@@ -24,11 +24,11 @@ def test_keep_alive_argument(capture):
     assert capture == "Allocating parent."
     with capture:
         p.addChildKeepAlive(Child())
-        gc.collect()
+        pytest.gc_collect()
     assert capture == "Allocating child."
     with capture:
         del p
-        gc.collect()
+        pytest.gc_collect()
     assert capture == """
         Releasing parent.
         Releasing child.
@@ -43,14 +43,14 @@ def test_keep_alive_return_value(capture):
     assert capture == "Allocating parent."
     with capture:
         p.returnChild()
-        gc.collect()
+        pytest.gc_collect()
     assert capture == """
         Allocating child.
         Releasing child.
     """
     with capture:
         del p
-        gc.collect()
+        pytest.gc_collect()
     assert capture == "Releasing parent."
 
     with capture:
@@ -58,11 +58,11 @@ def test_keep_alive_return_value(capture):
     assert capture == "Allocating parent."
     with capture:
         p.returnChildKeepAlive()
-        gc.collect()
+        pytest.gc_collect()
     assert capture == "Allocating child."
     with capture:
         del p
-        gc.collect()
+        pytest.gc_collect()
     assert capture == """
         Releasing parent.
         Releasing child.
@@ -77,11 +77,11 @@ def test_return_none(capture):
     assert capture == "Allocating parent."
     with capture:
         p.returnNullChildKeepAliveChild()
-        gc.collect()
+        pytest.gc_collect()
     assert capture == ""
     with capture:
         del p
-        gc.collect()
+        pytest.gc_collect()
     assert capture == "Releasing parent."
 
     with capture:
@@ -89,10 +89,9 @@ def test_return_none(capture):
     assert capture == "Allocating parent."
     with capture:
         p.returnNullChildKeepAliveParent()
-        gc.collect()
+        pytest.gc_collect()
     assert capture == ""
     with capture:
         del p
-        gc.collect()
+        pytest.gc_collect()
     assert capture == "Releasing parent."
-
diff --git a/pybind11/tests/test_methods_and_attributes.cpp b/pybind11/tests/test_methods_and_attributes.cpp
index 82c81f724..f7d6d6855 100644
--- a/pybind11/tests/test_methods_and_attributes.cpp
+++ b/pybind11/tests/test_methods_and_attributes.cpp
@@ -50,6 +50,11 @@ public:
     int *internal4() { return &value; }                           // return by pointer
     const int *internal5() { return &value; }                     // return by const pointer
 
+    py::str overloaded(int, float) { return "(int, float)"; }
+    py::str overloaded(float, int) { return "(float, int)"; }
+    py::str overloaded(int, float) const { return "(int, float) const"; }
+    py::str overloaded(float, int) const { return "(float, int) const"; }
+
     int value = 0;
 };
 
@@ -66,6 +71,24 @@ struct TestProperties {
 
 int TestProperties::static_value = 1;
 
+struct SimpleValue { int value = 1; };
+
+struct TestPropRVP {
+    SimpleValue v1;
+    SimpleValue v2;
+    static SimpleValue sv1;
+    static SimpleValue sv2;
+
+    const SimpleValue &get1() const { return v1; }
+    const SimpleValue &get2() const { return v2; }
+    SimpleValue get_rvalue() const { return v2; }
+    void set1(int v) { v1.value = v; }
+    void set2(int v) { v2.value = v; }
+};
+
+SimpleValue TestPropRVP::sv1{};
+SimpleValue TestPropRVP::sv2{};
+
 class DynamicClass {
 public:
     DynamicClass() { print_default_created(this); }
@@ -99,11 +122,21 @@ test_initializer methods_and_attributes([](py::module &m) {
         .def("internal3", &ExampleMandA::internal3)
         .def("internal4", &ExampleMandA::internal4)
         .def("internal5", &ExampleMandA::internal5)
+#if defined(PYBIND11_OVERLOAD_CAST)
+        .def("overloaded", py::overload_cast<int, float>(&ExampleMandA::overloaded))
+        .def("overloaded", py::overload_cast<float, int>(&ExampleMandA::overloaded))
+        .def("overloaded_const", py::overload_cast<int, float>(&ExampleMandA::overloaded, py::const_))
+        .def("overloaded_const", py::overload_cast<float, int>(&ExampleMandA::overloaded, py::const_))
+#else
+        .def("overloaded", static_cast<py::str (ExampleMandA::*)(int, float)>(&ExampleMandA::overloaded))
+        .def("overloaded", static_cast<py::str (ExampleMandA::*)(float, int)>(&ExampleMandA::overloaded))
+        .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(int, float) const>(&ExampleMandA::overloaded))
+        .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(float, int) const>(&ExampleMandA::overloaded))
+#endif
         .def("__str__", &ExampleMandA::toString)
-        .def_readwrite("value", &ExampleMandA::value)
-        ;
+        .def_readwrite("value", &ExampleMandA::value);
 
-    py::class_<TestProperties>(m, "TestProperties")
+    py::class_<TestProperties>(m, "TestProperties", py::metaclass())
         .def(py::init<>())
         .def_readonly("def_readonly", &TestProperties::value)
         .def_readwrite("def_readwrite", &TestProperties::value)
@@ -117,9 +150,37 @@ test_initializer methods_and_attributes([](py::module &m) {
                              [](py::object) { return TestProperties::static_get(); },
                              [](py::object, int v) { return TestProperties::static_set(v); });
 
+    py::class_<SimpleValue>(m, "SimpleValue")
+        .def_readwrite("value", &SimpleValue::value);
+
+    auto static_get1 = [](py::object) -> const SimpleValue & { return TestPropRVP::sv1; };
+    auto static_get2 = [](py::object) -> const SimpleValue & { return TestPropRVP::sv2; };
+    auto static_set1 = [](py::object, int v) { TestPropRVP::sv1.value = v; };
+    auto static_set2 = [](py::object, int v) { TestPropRVP::sv2.value = v; };
+    auto rvp_copy = py::return_value_policy::copy;
+
+    py::class_<TestPropRVP>(m, "TestPropRVP", py::metaclass())
+        .def(py::init<>())
+        .def_property_readonly("ro_ref", &TestPropRVP::get1)
+        .def_property_readonly("ro_copy", &TestPropRVP::get2, rvp_copy)
+        .def_property_readonly("ro_func", py::cpp_function(&TestPropRVP::get2, rvp_copy))
+        .def_property("rw_ref", &TestPropRVP::get1, &TestPropRVP::set1)
+        .def_property("rw_copy", &TestPropRVP::get2, &TestPropRVP::set2, rvp_copy)
+        .def_property("rw_func", py::cpp_function(&TestPropRVP::get2, rvp_copy), &TestPropRVP::set2)
+        .def_property_readonly_static("static_ro_ref", static_get1)
+        .def_property_readonly_static("static_ro_copy", static_get2, rvp_copy)
+        .def_property_readonly_static("static_ro_func", py::cpp_function(static_get2, rvp_copy))
+        .def_property_static("static_rw_ref", static_get1, static_set1)
+        .def_property_static("static_rw_copy", static_get2, static_set2, rvp_copy)
+        .def_property_static("static_rw_func", py::cpp_function(static_get2, rvp_copy), static_set2)
+        .def_property_readonly("rvalue", &TestPropRVP::get_rvalue)
+        .def_property_readonly_static("static_rvalue", [](py::object) { return SimpleValue(); });
+
+#if !defined(PYPY_VERSION)
     py::class_<DynamicClass>(m, "DynamicClass", py::dynamic_attr())
         .def(py::init());
 
     py::class_<CppDerivedDynamicClass, DynamicClass>(m, "CppDerivedDynamicClass")
         .def(py::init());
+#endif
 });
diff --git a/pybind11/tests/test_methods_and_attributes.py b/pybind11/tests/test_methods_and_attributes.py
index f4116c3b6..840ee707b 100644
--- a/pybind11/tests/test_methods_and_attributes.py
+++ b/pybind11/tests/test_methods_and_attributes.py
@@ -31,6 +31,11 @@ def test_methods_and_attributes():
     assert instance1.internal4() == 320
     assert instance1.internal5() == 320
 
+    assert instance1.overloaded(1, 1.0) == "(int, float)"
+    assert instance1.overloaded(2.0, 2) == "(float, int)"
+    assert instance1.overloaded_const(3, 3.0) == "(int, float) const"
+    assert instance1.overloaded_const(4.0, 4) == "(float, int) const"
+
     assert instance1.value == 320
     instance1.value = 100
     assert str(instance1) == "ExampleMandA[value=100]"
@@ -85,6 +90,54 @@ def test_static_properties():
     assert Type.def_property_static == 3
 
 
+@pytest.mark.parametrize("access", ["ro", "rw", "static_ro", "static_rw"])
+def test_property_return_value_policies(access):
+    from pybind11_tests import TestPropRVP
+
+    if not access.startswith("static"):
+        obj = TestPropRVP()
+    else:
+        obj = TestPropRVP
+
+    ref = getattr(obj, access + "_ref")
+    assert ref.value == 1
+    ref.value = 2
+    assert getattr(obj, access + "_ref").value == 2
+    ref.value = 1  # restore original value for static properties
+
+    copy = getattr(obj, access + "_copy")
+    assert copy.value == 1
+    copy.value = 2
+    assert getattr(obj, access + "_copy").value == 1
+
+    copy = getattr(obj, access + "_func")
+    assert copy.value == 1
+    copy.value = 2
+    assert getattr(obj, access + "_func").value == 1
+
+
+def test_property_rvalue_policy():
+    """When returning an rvalue, the return value policy is automatically changed from
+    `reference(_internal)` to `move`. The following would not work otherwise.
+    """
+    from pybind11_tests import TestPropRVP
+
+    instance = TestPropRVP()
+    o = instance.rvalue
+    assert o.value == 1
+
+
+def test_property_rvalue_policy_static():
+    """When returning an rvalue, the return value policy is automatically changed from
+    `reference(_internal)` to `move`. The following would not work otherwise.
+    """
+    from pybind11_tests import TestPropRVP
+    o = TestPropRVP.static_rvalue
+    assert o.value == 1
+
+
+# https://bitbucket.org/pypy/pypy/issues/2447
+@pytest.unsupported_on_pypy
 def test_dynamic_attributes():
     from pybind11_tests import DynamicClass, CppDerivedDynamicClass
 
@@ -127,6 +180,8 @@ def test_dynamic_attributes():
         assert cstats.alive() == 0
 
 
+# https://bitbucket.org/pypy/pypy/issues/2447
+@pytest.unsupported_on_pypy
 def test_cyclic_gc():
     from pybind11_tests import DynamicClass
 
diff --git a/pybind11/tests/test_multiple_inheritance.cpp b/pybind11/tests/test_multiple_inheritance.cpp
index 3cb12b68d..c57cb852a 100644
--- a/pybind11/tests/test_multiple_inheritance.cpp
+++ b/pybind11/tests/test_multiple_inheritance.cpp
@@ -10,7 +10,6 @@
 
 #include "pybind11_tests.h"
 
-
 struct Base1 {
     Base1(int i) : i(i) { }
     int foo() { return i; }
diff --git a/pybind11/tests/test_multiple_inheritance.py b/pybind11/tests/test_multiple_inheritance.py
index 7e1e08633..c10298d70 100644
--- a/pybind11/tests/test_multiple_inheritance.py
+++ b/pybind11/tests/test_multiple_inheritance.py
@@ -1,7 +1,4 @@
-import pytest
-
-
-def test_multiple_inheritance_cpp(msg):
+def test_multiple_inheritance_cpp():
     from pybind11_tests import MIType
 
     mt = MIType(3, 4)
@@ -10,7 +7,7 @@ def test_multiple_inheritance_cpp(msg):
     assert mt.bar() == 4
 
 
-def test_multiple_inheritance_mix1(msg):
+def test_multiple_inheritance_mix1():
     from pybind11_tests import Base2
 
     class Base1:
@@ -31,7 +28,7 @@ def test_multiple_inheritance_mix1(msg):
     assert mt.bar() == 4
 
 
-def test_multiple_inheritance_mix2(msg):
+def test_multiple_inheritance_mix2():
     from pybind11_tests import Base1
 
     class Base2:
@@ -52,7 +49,7 @@ def test_multiple_inheritance_mix2(msg):
     assert mt.bar() == 4
 
 
-def test_multiple_inheritance_virtbase(msg):
+def test_multiple_inheritance_virtbase():
     from pybind11_tests import Base12a, bar_base2a, bar_base2a_sharedptr
 
     class MITypePy(Base12a):
diff --git a/pybind11/tests/test_numpy_array.cpp b/pybind11/tests/test_numpy_array.cpp
index ec4ddacb9..14c4c2999 100644
--- a/pybind11/tests/test_numpy_array.cpp
+++ b/pybind11/tests/test_numpy_array.cpp
@@ -18,11 +18,11 @@
 using arr = py::array;
 using arr_t = py::array_t<uint16_t, 0>;
 
-template<typename... Ix> arr data(const arr& a, Ix&&... index) {
+template<typename... Ix> arr data(const arr& a, Ix... index) {
     return arr(a.nbytes() - a.offset_at(index...), (const uint8_t *) a.data(index...));
 }
 
-template<typename... Ix> arr data_t(const arr_t& a, Ix&&... index) {
+template<typename... Ix> arr data_t(const arr_t& a, Ix... index) {
     return arr(a.size() - a.index_at(index...), a.data(index...));
 }
 
@@ -40,26 +40,26 @@ arr_t& mutate_data_t(arr_t& a) {
     return a;
 }
 
-template<typename... Ix> arr& mutate_data(arr& a, Ix&&... index) {
+template<typename... Ix> arr& mutate_data(arr& a, Ix... index) {
     auto ptr = (uint8_t *) a.mutable_data(index...);
     for (size_t i = 0; i < a.nbytes() - a.offset_at(index...); i++)
         ptr[i] = (uint8_t) (ptr[i] * 2);
     return a;
 }
 
-template<typename... Ix> arr_t& mutate_data_t(arr_t& a, Ix&&... index) {
+template<typename... Ix> arr_t& mutate_data_t(arr_t& a, Ix... index) {
     auto ptr = a.mutable_data(index...);
     for (size_t i = 0; i < a.size() - a.index_at(index...); i++)
         ptr[i]++;
     return a;
 }
 
-template<typename... Ix> size_t index_at(const arr& a, Ix&&... idx) { return a.index_at(idx...); }
-template<typename... Ix> size_t index_at_t(const arr_t& a, Ix&&... idx) { return a.index_at(idx...); }
-template<typename... Ix> size_t offset_at(const arr& a, Ix&&... idx) { return a.offset_at(idx...); }
-template<typename... Ix> size_t offset_at_t(const arr_t& a, Ix&&... idx) { return a.offset_at(idx...); }
-template<typename... Ix> size_t at_t(const arr_t& a, Ix&&... idx) { return a.at(idx...); }
-template<typename... Ix> arr_t& mutate_at_t(arr_t& a, Ix&&... idx) { a.mutable_at(idx...)++; return a; }
+template<typename... Ix> size_t index_at(const arr& a, Ix... idx) { return a.index_at(idx...); }
+template<typename... Ix> size_t index_at_t(const arr_t& a, Ix... idx) { return a.index_at(idx...); }
+template<typename... Ix> size_t offset_at(const arr& a, Ix... idx) { return a.offset_at(idx...); }
+template<typename... Ix> size_t offset_at_t(const arr_t& a, Ix... idx) { return a.offset_at(idx...); }
+template<typename... Ix> size_t at_t(const arr_t& a, Ix... idx) { return a.at(idx...); }
+template<typename... Ix> arr_t& mutate_at_t(arr_t& a, Ix... idx) { a.mutable_at(idx...)++; return a; }
 
 #define def_index_fn(name, type) \
     sm.def(#name, [](type a) { return name(a); }); \
@@ -124,4 +124,30 @@ test_initializer numpy_array([](py::module &m) {
             return py::array_t<int>({2}, {4}, a.data, obj);
         }
     );
+
+    sm.def("function_taking_uint64", [](uint64_t) { });
+
+    sm.def("isinstance_untyped", [](py::object yes, py::object no) {
+        return py::isinstance<py::array>(yes) && !py::isinstance<py::array>(no);
+    });
+
+    sm.def("isinstance_typed", [](py::object o) {
+        return py::isinstance<py::array_t<double>>(o) && !py::isinstance<py::array_t<int>>(o);
+    });
+
+    sm.def("default_constructors", []() {
+        return py::dict(
+            "array"_a=py::array(),
+            "array_t<int32>"_a=py::array_t<std::int32_t>(),
+            "array_t<double>"_a=py::array_t<double>()
+        );
+    });
+
+    sm.def("converting_constructors", [](py::object o) {
+        return py::dict(
+            "array"_a=py::array(o),
+            "array_t<int32>"_a=py::array_t<std::int32_t>(o),
+            "array_t<double>"_a=py::array_t<double>(o)
+        );
+    });
 });
diff --git a/pybind11/tests/test_numpy_array.py b/pybind11/tests/test_numpy_array.py
index ae1954a65..b96790c39 100644
--- a/pybind11/tests/test_numpy_array.py
+++ b/pybind11/tests/test_numpy_array.py
@@ -1,5 +1,4 @@
 import pytest
-import gc
 
 with pytest.suppress(ImportError):
     import numpy as np
@@ -144,10 +143,10 @@ def test_bounds_check(arr):
              mutate_data, mutate_data_t, at_t, mutate_at_t)
     for func in funcs:
         with pytest.raises(IndexError) as excinfo:
-            index_at(arr, 2, 0)
+            func(arr, 2, 0)
         assert str(excinfo.value) == 'index 2 is out of bounds for axis 0 with size 2'
         with pytest.raises(IndexError) as excinfo:
-            index_at(arr, 0, 4)
+            func(arr, 0, 4)
         assert str(excinfo.value) == 'index 4 is out of bounds for axis 1 with size 3'
 
 
@@ -166,50 +165,49 @@ def test_make_c_f_array():
 def test_wrap():
     from pybind11_tests.array import wrap
 
-    def assert_references(A, B):
-        assert A is not B
-        assert A.__array_interface__['data'][0] == \
-               B.__array_interface__['data'][0]
-        assert A.shape == B.shape
-        assert A.strides == B.strides
-        assert A.flags.c_contiguous == B.flags.c_contiguous
-        assert A.flags.f_contiguous == B.flags.f_contiguous
-        assert A.flags.writeable == B.flags.writeable
-        assert A.flags.aligned == B.flags.aligned
-        assert A.flags.updateifcopy == B.flags.updateifcopy
-        assert np.all(A == B)
-        assert not B.flags.owndata
-        assert B.base is A
-        if A.flags.writeable and A.ndim == 2:
-            A[0, 0] = 1234
-            assert B[0, 0] == 1234
-
-    A1 = np.array([1, 2], dtype=np.int16)
-    assert A1.flags.owndata and A1.base is None
-    A2 = wrap(A1)
-    assert_references(A1, A2)
-
-    A1 = np.array([[1, 2], [3, 4]], dtype=np.float32, order='F')
-    assert A1.flags.owndata and A1.base is None
-    A2 = wrap(A1)
-    assert_references(A1, A2)
-
-    A1 = np.array([[1, 2], [3, 4]], dtype=np.float32, order='C')
-    A1.flags.writeable = False
-    A2 = wrap(A1)
-    assert_references(A1, A2)
-
-    A1 = np.random.random((4, 4, 4))
-    A2 = wrap(A1)
-    assert_references(A1, A2)
-
-    A1 = A1.transpose()
-    A2 = wrap(A1)
-    assert_references(A1, A2)
-
-    A1 = A1.diagonal()
-    A2 = wrap(A1)
-    assert_references(A1, A2)
+    def assert_references(a, b):
+        assert a is not b
+        assert a.__array_interface__['data'][0] == b.__array_interface__['data'][0]
+        assert a.shape == b.shape
+        assert a.strides == b.strides
+        assert a.flags.c_contiguous == b.flags.c_contiguous
+        assert a.flags.f_contiguous == b.flags.f_contiguous
+        assert a.flags.writeable == b.flags.writeable
+        assert a.flags.aligned == b.flags.aligned
+        assert a.flags.updateifcopy == b.flags.updateifcopy
+        assert np.all(a == b)
+        assert not b.flags.owndata
+        assert b.base is a
+        if a.flags.writeable and a.ndim == 2:
+            a[0, 0] = 1234
+            assert b[0, 0] == 1234
+
+    a1 = np.array([1, 2], dtype=np.int16)
+    assert a1.flags.owndata and a1.base is None
+    a2 = wrap(a1)
+    assert_references(a1, a2)
+
+    a1 = np.array([[1, 2], [3, 4]], dtype=np.float32, order='F')
+    assert a1.flags.owndata and a1.base is None
+    a2 = wrap(a1)
+    assert_references(a1, a2)
+
+    a1 = np.array([[1, 2], [3, 4]], dtype=np.float32, order='C')
+    a1.flags.writeable = False
+    a2 = wrap(a1)
+    assert_references(a1, a2)
+
+    a1 = np.random.random((4, 4, 4))
+    a2 = wrap(a1)
+    assert_references(a1, a2)
+
+    a1 = a1.transpose()
+    a2 = wrap(a1)
+    assert_references(a1, a2)
+
+    a1 = a1.diagonal()
+    a2 = wrap(a1)
+    assert_references(a1, a2)
 
 
 @pytest.requires_numpy
@@ -221,7 +219,7 @@ def test_numpy_view(capture):
         ac_view_2 = ac.numpy_view()
         assert np.all(ac_view_1 == np.array([1, 2], dtype=np.int32))
         del ac
-        gc.collect()
+        pytest.gc_collect()
     assert capture == """
         ArrayClass()
         ArrayClass::numpy_view()
@@ -234,7 +232,43 @@ def test_numpy_view(capture):
     with capture:
         del ac_view_1
         del ac_view_2
-        gc.collect()
+        pytest.gc_collect()
+        pytest.gc_collect()
     assert capture == """
         ~ArrayClass()
     """
+
+
+@pytest.unsupported_on_pypy
+@pytest.requires_numpy
+def test_cast_numpy_int64_to_uint64():
+    from pybind11_tests.array import function_taking_uint64
+    function_taking_uint64(123)
+    function_taking_uint64(np.uint64(123))
+
+
+@pytest.requires_numpy
+def test_isinstance():
+    from pybind11_tests.array import isinstance_untyped, isinstance_typed
+
+    assert isinstance_untyped(np.array([1, 2, 3]), "not an array")
+    assert isinstance_typed(np.array([1.0, 2.0, 3.0]))
+
+
+@pytest.requires_numpy
+def test_constructors():
+    from pybind11_tests.array import default_constructors, converting_constructors
+
+    defaults = default_constructors()
+    for a in defaults.values():
+        assert a.size == 0
+    assert defaults["array"].dtype == np.array([]).dtype
+    assert defaults["array_t<int32>"].dtype == np.int32
+    assert defaults["array_t<double>"].dtype == np.float64
+
+    results = converting_constructors([1, 2, 3])
+    for a in results.values():
+        np.testing.assert_array_equal(a, [1, 2, 3])
+    assert results["array"].dtype == np.int_
+    assert results["array_t<int32>"].dtype == np.int32
+    assert results["array_t<double>"].dtype == np.float64
diff --git a/pybind11/tests/test_numpy_dtypes.cpp b/pybind11/tests/test_numpy_dtypes.cpp
index 86e6e68cc..3894f6a30 100644
--- a/pybind11/tests/test_numpy_dtypes.cpp
+++ b/pybind11/tests/test_numpy_dtypes.cpp
@@ -67,6 +67,11 @@ struct StringStruct {
     std::array<char, 3> b;
 };
 
+PYBIND11_PACKED(struct StructWithUglyNames {
+    int8_t __x__;
+    uint64_t __y__;
+});
+
 enum class E1 : int64_t { A = -1, B = 1 };
 enum E2 : uint8_t { X = 1, Y = 2 };
 
@@ -191,13 +196,14 @@ py::list print_format_descriptors() {
 
 py::list print_dtypes() {
     const auto dtypes = {
-        py::dtype::of<SimpleStruct>().str(),
-        py::dtype::of<PackedStruct>().str(),
-        py::dtype::of<NestedStruct>().str(),
-        py::dtype::of<PartialStruct>().str(),
-        py::dtype::of<PartialNestedStruct>().str(),
-        py::dtype::of<StringStruct>().str(),
-        py::dtype::of<EnumStruct>().str()
+        py::str(py::dtype::of<SimpleStruct>()),
+        py::str(py::dtype::of<PackedStruct>()),
+        py::str(py::dtype::of<NestedStruct>()),
+        py::str(py::dtype::of<PartialStruct>()),
+        py::str(py::dtype::of<PartialNestedStruct>()),
+        py::str(py::dtype::of<StringStruct>()),
+        py::str(py::dtype::of<EnumStruct>()),
+        py::str(py::dtype::of<StructWithUglyNames>())
     };
     auto l = py::list();
     for (const auto &s : dtypes) {
@@ -276,11 +282,24 @@ py::list test_dtype_ctors() {
     dict["itemsize"] = py::int_(20);
     list.append(py::dtype::from_args(dict));
     list.append(py::dtype(names, formats, offsets, 20));
-    list.append(py::dtype(py::buffer_info((void *) 0, 1, "I", 1)));
-    list.append(py::dtype(py::buffer_info((void *) 0, 1, "T{i:a:f:b:}", 1)));
+    list.append(py::dtype(py::buffer_info((void *) 0, sizeof(unsigned int), "I", 1)));
+    list.append(py::dtype(py::buffer_info((void *) 0, 0, "T{i:a:f:b:}", 1)));
     return list;
 }
 
+struct TrailingPaddingStruct {
+    int32_t a;
+    char b;
+};
+
+py::dtype trailing_padding_dtype() {
+    return py::dtype::of<TrailingPaddingStruct>();
+}
+
+py::dtype buffer_to_dtype(py::buffer& buf) {
+    return py::dtype(buf.request());
+}
+
 py::list test_dtype_methods() {
     py::list list;
     auto dt1 = py::dtype::of<int32_t>();
@@ -298,6 +317,9 @@ test_initializer numpy_dtypes([](py::module &m) {
         return;
     }
 
+    // typeinfo may be registered before the dtype descriptor for scalar casts to work...
+    py::class_<SimpleStruct>(m, "SimpleStruct");
+
     PYBIND11_NUMPY_DTYPE(SimpleStruct, x, y, z);
     PYBIND11_NUMPY_DTYPE(PackedStruct, x, y, z);
     PYBIND11_NUMPY_DTYPE(NestedStruct, a, b);
@@ -305,6 +327,12 @@ test_initializer numpy_dtypes([](py::module &m) {
     PYBIND11_NUMPY_DTYPE(PartialNestedStruct, a);
     PYBIND11_NUMPY_DTYPE(StringStruct, a, b);
     PYBIND11_NUMPY_DTYPE(EnumStruct, e1, e2);
+    PYBIND11_NUMPY_DTYPE(TrailingPaddingStruct, a, b);
+
+    // ... or after
+    py::class_<PackedStruct>(m, "PackedStruct");
+
+    PYBIND11_NUMPY_DTYPE_EX(StructWithUglyNames, __x__, "x", __y__, "y");
 
     m.def("create_rec_simple", &create_recarray<SimpleStruct>);
     m.def("create_rec_packed", &create_recarray<PackedStruct>);
@@ -324,6 +352,12 @@ test_initializer numpy_dtypes([](py::module &m) {
     m.def("test_array_ctors", &test_array_ctors);
     m.def("test_dtype_ctors", &test_dtype_ctors);
     m.def("test_dtype_methods", &test_dtype_methods);
+    m.def("trailing_padding_dtype", &trailing_padding_dtype);
+    m.def("buffer_to_dtype", &buffer_to_dtype);
+    m.def("f_simple", [](SimpleStruct s) { return s.y * 10; });
+    m.def("f_packed", [](PackedStruct s) { return s.y * 10; });
+    m.def("f_nested", [](NestedStruct s) { return s.a.y * 10; });
+    m.def("register_dtype", []() { PYBIND11_NUMPY_DTYPE(SimpleStruct, x, y, z); });
 });
 
 #undef PYBIND11_PACKED
diff --git a/pybind11/tests/test_numpy_dtypes.py b/pybind11/tests/test_numpy_dtypes.py
index 22f5c662f..52ebe0ede 100644
--- a/pybind11/tests/test_numpy_dtypes.py
+++ b/pybind11/tests/test_numpy_dtypes.py
@@ -1,11 +1,20 @@
+import re
 import pytest
+
 with pytest.suppress(ImportError):
     import numpy as np
 
-    simple_dtype = np.dtype({'names': ['x', 'y', 'z'],
-                             'formats': ['?', 'u4', 'f4'],
-                             'offsets': [0, 4, 8]})
-    packed_dtype = np.dtype([('x', '?'), ('y', 'u4'), ('z', 'f4')])
+
+@pytest.fixture(scope='module')
+def simple_dtype():
+    return np.dtype({'names': ['x', 'y', 'z'],
+                     'formats': ['?', 'u4', 'f4'],
+                     'offsets': [0, 4, 8]})
+
+
+@pytest.fixture(scope='module')
+def packed_dtype():
+    return np.dtype([('x', '?'), ('y', 'u4'), ('z', 'f4')])
 
 
 def assert_equal(actual, expected_data, expected_dtype):
@@ -18,31 +27,35 @@ def test_format_descriptors():
 
     with pytest.raises(RuntimeError) as excinfo:
         get_format_unbound()
-    assert 'unsupported buffer format' in str(excinfo.value)
+    assert re.match('^NumPy type info missing for .*UnboundStruct.*$', str(excinfo.value))
 
     assert print_format_descriptors() == [
-        "T{=?:x:3x=I:y:=f:z:}",
-        "T{=?:x:=I:y:=f:z:}",
-        "T{=T{=?:x:3x=I:y:=f:z:}:a:=T{=?:x:=I:y:=f:z:}:b:}",
-        "T{=?:x:3x=I:y:=f:z:12x}",
-        "T{8x=T{=?:x:3x=I:y:=f:z:12x}:a:8x}",
-        "T{=3s:a:=3s:b:}",
-        'T{=q:e1:=B:e2:}'
+        "T{?:x:3xI:y:f:z:}",
+        "T{?:x:=I:y:=f:z:}",
+        "T{T{?:x:3xI:y:f:z:}:a:T{?:x:=I:y:=f:z:}:b:}",
+        "T{?:x:3xI:y:f:z:12x}",
+        "T{8xT{?:x:3xI:y:f:z:12x}:a:8x}",
+        "T{3s:a:3s:b:}",
+        'T{q:e1:B:e2:}'
     ]
 
 
 @pytest.requires_numpy
-def test_dtype():
-    from pybind11_tests import print_dtypes, test_dtype_ctors, test_dtype_methods
+def test_dtype(simple_dtype):
+    from pybind11_tests import (print_dtypes, test_dtype_ctors, test_dtype_methods,
+                                trailing_padding_dtype, buffer_to_dtype)
 
     assert print_dtypes() == [
         "{'names':['x','y','z'], 'formats':['?','<u4','<f4'], 'offsets':[0,4,8], 'itemsize':12}",
         "[('x', '?'), ('y', '<u4'), ('z', '<f4')]",
-        "[('a', {'names':['x','y','z'], 'formats':['?','<u4','<f4'], 'offsets':[0,4,8], 'itemsize':12}), ('b', [('x', '?'), ('y', '<u4'), ('z', '<f4')])]",
+        "[('a', {'names':['x','y','z'], 'formats':['?','<u4','<f4'], 'offsets':[0,4,8],"
+        " 'itemsize':12}), ('b', [('x', '?'), ('y', '<u4'), ('z', '<f4')])]",
         "{'names':['x','y','z'], 'formats':['?','<u4','<f4'], 'offsets':[0,4,8], 'itemsize':24}",
-        "{'names':['a'], 'formats':[{'names':['x','y','z'], 'formats':['?','<u4','<f4'], 'offsets':[0,4,8], 'itemsize':24}], 'offsets':[8], 'itemsize':40}",
+        "{'names':['a'], 'formats':[{'names':['x','y','z'], 'formats':['?','<u4','<f4'],"
+        " 'offsets':[0,4,8], 'itemsize':24}], 'offsets':[8], 'itemsize':40}",
         "[('a', 'S3'), ('b', 'S3')]",
-        "[('e1', '<i8'), ('e2', 'u1')]"
+        "[('e1', '<i8'), ('e2', 'u1')]",
+        "[('x', 'i1'), ('y', '<u8')]"
     ]
 
     d1 = np.dtype({'names': ['a', 'b'], 'formats': ['int32', 'float64'],
@@ -54,9 +67,11 @@ def test_dtype():
     assert test_dtype_methods() == [np.dtype('int32'), simple_dtype, False, True,
                                     np.dtype('int32').itemsize, simple_dtype.itemsize]
 
+    assert trailing_padding_dtype() == buffer_to_dtype(np.zeros(1, trailing_padding_dtype()))
+
 
 @pytest.requires_numpy
-def test_recarray():
+def test_recarray(simple_dtype, packed_dtype):
     from pybind11_tests import (create_rec_simple, create_rec_packed, create_rec_nested,
                                 print_rec_simple, print_rec_packed, print_rec_nested,
                                 create_rec_partial, create_rec_partial_nested)
@@ -105,7 +120,8 @@ def test_recarray():
     ]
 
     arr = create_rec_partial(3)
-    assert str(arr.dtype) == "{'names':['x','y','z'], 'formats':['?','<u4','<f4'], 'offsets':[0,4,8], 'itemsize':24}"
+    assert str(arr.dtype) == \
+        "{'names':['x','y','z'], 'formats':['?','<u4','<f4'], 'offsets':[0,4,8], 'itemsize':24}"
     partial_dtype = arr.dtype
     assert '' not in arr.dtype.fields
     assert partial_dtype.itemsize > simple_dtype.itemsize
@@ -113,7 +129,9 @@ def test_recarray():
     assert_equal(arr, elements, packed_dtype)
 
     arr = create_rec_partial_nested(3)
-    assert str(arr.dtype) == "{'names':['a'], 'formats':[{'names':['x','y','z'], 'formats':['?','<u4','<f4'], 'offsets':[0,4,8], 'itemsize':24}], 'offsets':[8], 'itemsize':40}"
+    assert str(arr.dtype) == \
+        "{'names':['a'], 'formats':[{'names':['x','y','z'], 'formats':['?','<u4','<f4']," \
+        " 'offsets':[0,4,8], 'itemsize':24}], 'offsets':[8], 'itemsize':40}"
     assert '' not in arr.dtype.fields
     assert '' not in arr.dtype.fields['a'][0].fields
     assert arr.dtype.itemsize > partial_dtype.itemsize
@@ -174,3 +192,34 @@ def test_signature(doc):
     from pybind11_tests import create_rec_nested
 
     assert doc(create_rec_nested) == "create_rec_nested(arg0: int) -> numpy.ndarray[NestedStruct]"
+
+
+@pytest.requires_numpy
+def test_scalar_conversion():
+    from pybind11_tests import (create_rec_simple, f_simple,
+                                create_rec_packed, f_packed,
+                                create_rec_nested, f_nested,
+                                create_enum_array)
+
+    n = 3
+    arrays = [create_rec_simple(n), create_rec_packed(n),
+              create_rec_nested(n), create_enum_array(n)]
+    funcs = [f_simple, f_packed, f_nested]
+
+    for i, func in enumerate(funcs):
+        for j, arr in enumerate(arrays):
+            if i == j and i < 2:
+                assert [func(arr[k]) for k in range(n)] == [k * 10 for k in range(n)]
+            else:
+                with pytest.raises(TypeError) as excinfo:
+                    func(arr[0])
+                assert 'incompatible function arguments' in str(excinfo.value)
+
+
+@pytest.requires_numpy
+def test_register_dtype():
+    from pybind11_tests import register_dtype
+
+    with pytest.raises(RuntimeError) as excinfo:
+        register_dtype()
+    assert 'dtype is already registered' in str(excinfo.value)
diff --git a/pybind11/tests/test_numpy_vectorize.py b/pybind11/tests/test_numpy_vectorize.py
index 73da819a4..718646efa 100644
--- a/pybind11/tests/test_numpy_vectorize.py
+++ b/pybind11/tests/test_numpy_vectorize.py
@@ -71,4 +71,6 @@ def test_type_selection():
 def test_docs(doc):
     from pybind11_tests import vectorized_func
 
-    assert doc(vectorized_func) == "vectorized_func(arg0: numpy.ndarray[int], arg1: numpy.ndarray[float], arg2: numpy.ndarray[float]) -> object"
+    assert doc(vectorized_func) == """
+        vectorized_func(arg0: numpy.ndarray[int], arg1: numpy.ndarray[float], arg2: numpy.ndarray[float]) -> object
+    """  # noqa: E501 line too long
diff --git a/pybind11/tests/test_opaque_types.py b/pybind11/tests/test_opaque_types.py
index 8a6a4c3f3..7781943b4 100644
--- a/pybind11/tests/test_opaque_types.py
+++ b/pybind11/tests/test_opaque_types.py
@@ -39,7 +39,7 @@ def test_pointers(msg):
             1. (arg0: capsule) -> int
 
         Invoked with: [1, 2, 3]
-    """
+    """  # noqa: E501 line too long
 
     assert return_null_str() is None
     assert get_null_str_value(return_null_str()) is not None
diff --git a/pybind11/tests/test_operator_overloading.py b/pybind11/tests/test_operator_overloading.py
index e0d42391e..02ccb9633 100644
--- a/pybind11/tests/test_operator_overloading.py
+++ b/pybind11/tests/test_operator_overloading.py
@@ -1,4 +1,3 @@
-
 def test_operator_overloading():
     from pybind11_tests import Vector2, Vector, ConstructorStats
 
diff --git a/pybind11/tests/test_pickling.cpp b/pybind11/tests/test_pickling.cpp
index 3941dc593..52b1dbc30 100644
--- a/pybind11/tests/test_pickling.cpp
+++ b/pybind11/tests/test_pickling.cpp
@@ -57,6 +57,7 @@ test_initializer pickling([](py::module &m) {
             p.setExtra2(t[2].cast<int>());
         });
 
+#if !defined(PYPY_VERSION)
     py::class_<PickleableWithDict>(m, "PickleableWithDict", py::dynamic_attr())
         .def(py::init<std::string>())
         .def_readwrite("value", &PickleableWithDict::value)
@@ -70,7 +71,7 @@ test_initializer pickling([](py::module &m) {
                 throw std::runtime_error("Invalid state!");
             /* Cast and construct */
             auto& p = self.cast<PickleableWithDict&>();
-            new (&p) Pickleable(t[0].cast<std::string>());
+            new (&p) PickleableWithDict(t[0].cast<std::string>());
 
             /* Assign C++ state */
             p.extra = t[1].cast<int>();
@@ -78,4 +79,5 @@ test_initializer pickling([](py::module &m) {
             /* Assign Python state */
             self.attr("__dict__") = t[2];
         });
+#endif
 });
diff --git a/pybind11/tests/test_pickling.py b/pybind11/tests/test_pickling.py
index 5e62e1fcc..548c618af 100644
--- a/pybind11/tests/test_pickling.py
+++ b/pybind11/tests/test_pickling.py
@@ -1,3 +1,5 @@
+import pytest
+
 try:
     import cPickle as pickle  # Use cPickle on Python 2.7
 except ImportError:
@@ -18,6 +20,7 @@ def test_roundtrip():
     assert p2.extra2() == p.extra2()
 
 
+@pytest.unsupported_on_pypy
 def test_roundtrip_with_dict():
     from pybind11_tests import PickleableWithDict
 
diff --git a/pybind11/tests/test_python_types.cpp b/pybind11/tests/test_python_types.cpp
index 678a56d15..e1598e9ef 100644
--- a/pybind11/tests/test_python_types.cpp
+++ b/pybind11/tests/test_python_types.cpp
@@ -37,7 +37,8 @@ public:
     py::set get_set() {
         py::set set;
         set.add(py::str("key1"));
-        set.add(py::str("key2"));
+        set.add("key2");
+        set.add(std::string("key3"));
         return set;
     }
 
@@ -59,7 +60,7 @@ public:
     /* Create, manipulate, and return a Python list */
     py::list get_list() {
         py::list list;
-        list.append(py::str("value"));
+        list.append("value");
         py::print("Entry at position 0:", list[0]);
         list[0] = py::str("overwritten");
         return list;
@@ -77,6 +78,10 @@ public:
         return std::array<std::string, 2> {{ "array entry 1" , "array entry 2"}};
     }
 
+    std::valarray<int> get_valarray() {
+        return std::valarray<int>({ 1, 4, 9 });
+    }
+
     /* Easily iterate over a dictionary using a C++11 range-based for loop */
     void print_dict(py::dict dict) {
         for (auto item : dict)
@@ -132,6 +137,12 @@ public:
             py::print("array item {}: {}"_s.format(index++, item));
     }
 
+    void print_valarray(std::valarray<int> &varray) {
+        int index = 0;
+        for (auto item : varray)
+            py::print("valarray item {}: {}"_s.format(index++, item));
+    }
+
     void throw_exception() {
         throw std::runtime_error("This exception was intentionally thrown.");
     }
@@ -153,8 +164,8 @@ public:
     }
 
     void test_print(const py::object& obj) {
-        py::print(obj.str());
-        py::print(obj.repr());
+        py::print(py::str(obj));
+        py::print(py::repr(obj));
     }
 
     static int value;
@@ -164,10 +175,17 @@ public:
 int ExamplePythonTypes::value = 0;
 const int ExamplePythonTypes::value2 = 5;
 
+struct MoveOutContainer {
+    struct Value { int value; };
+
+    std::list<Value> move_list() const { return {{0}, {1}, {2}}; }
+};
+
+
 test_initializer python_types([](py::module &m) {
     /* No constructor is explicitly defined below. An exception is raised when
        trying to construct it directly from Python */
-    py::class_<ExamplePythonTypes>(m, "ExamplePythonTypes", "Example 2 documentation")
+    py::class_<ExamplePythonTypes>(m, "ExamplePythonTypes", "Example 2 documentation", py::metaclass())
         .def("get_dict", &ExamplePythonTypes::get_dict, "Return a Python dictionary")
         .def("get_dict_2", &ExamplePythonTypes::get_dict_2, "Return a C++ dictionary")
         .def("get_list", &ExamplePythonTypes::get_list, "Return a Python list")
@@ -175,6 +193,7 @@ test_initializer python_types([](py::module &m) {
         .def("get_set", &ExamplePythonTypes::get_set, "Return a Python set")
         .def("get_set2", &ExamplePythonTypes::get_set_2, "Return a C++ set")
         .def("get_array", &ExamplePythonTypes::get_array, "Return a C++ array")
+        .def("get_valarray", &ExamplePythonTypes::get_valarray, "Return a C++ valarray")
         .def("print_dict", &ExamplePythonTypes::print_dict, "Print entries of a Python dictionary")
         .def("print_dict_2", &ExamplePythonTypes::print_dict_2, "Print entries of a C++ dictionary")
         .def("print_set", &ExamplePythonTypes::print_set, "Print entries of a Python set")
@@ -182,6 +201,7 @@ test_initializer python_types([](py::module &m) {
         .def("print_list", &ExamplePythonTypes::print_list, "Print entries of a Python list")
         .def("print_list_2", &ExamplePythonTypes::print_list_2, "Print entries of a C++ list")
         .def("print_array", &ExamplePythonTypes::print_array, "Print entries of a C++ array")
+        .def("print_valarray", &ExamplePythonTypes::print_valarray, "Print entries of a C++ valarray")
         .def("pair_passthrough", &ExamplePythonTypes::pair_passthrough, "Return a pair in reversed order")
         .def("tuple_passthrough", &ExamplePythonTypes::tuple_passthrough, "Return a triple in reversed order")
         .def("throw_exception", &ExamplePythonTypes::throw_exception, "Throw an exception")
@@ -192,8 +212,7 @@ test_initializer python_types([](py::module &m) {
         .def("test_print", &ExamplePythonTypes::test_print, "test the print function")
         .def_static("new_instance", &ExamplePythonTypes::new_instance, "Return an instance")
         .def_readwrite_static("value", &ExamplePythonTypes::value, "Static value member")
-        .def_readonly_static("value2", &ExamplePythonTypes::value2, "Static value member (readonly)")
-        ;
+        .def_readonly_static("value2", &ExamplePythonTypes::value2, "Static value member (readonly)");
 
     m.def("test_print_function", []() {
         py::print("Hello, World!");
@@ -250,7 +269,7 @@ test_initializer python_types([](py::module &m) {
             d["missing_attr_chain"] = "raised"_s;
         }
 
-        d["is_none"] = py::cast(o.attr("basic_attr").is_none());
+        d["is_none"] = o.attr("basic_attr").is_none();
 
         d["operator()"] = o.attr("func")(1);
         d["operator*"] = o.attr("func")(*o.attr("begin_end"));
@@ -260,13 +279,13 @@ test_initializer python_types([](py::module &m) {
 
     m.def("test_tuple_accessor", [](py::tuple existing_t) {
         try {
-            existing_t[0] = py::cast(1);
+            existing_t[0] = 1;
         } catch (const py::error_already_set &) {
             // --> Python system error
             // Only new tuples (refcount == 1) are mutable
             auto new_t = py::tuple(3);
             for (size_t i = 0; i < new_t.size(); ++i) {
-                new_t[i] = py::cast(i);
+                new_t[i] = i;
             }
             return new_t;
         }
@@ -275,18 +294,136 @@ test_initializer python_types([](py::module &m) {
 
     m.def("test_accessor_assignment", []() {
         auto l = py::list(1);
-        l[0] = py::cast(0);
+        l[0] = 0;
 
         auto d = py::dict();
         d["get"] = l[0];
         auto var = l[0];
         d["deferred_get"] = var;
-        l[0] = py::cast(1);
+        l[0] = 1;
         d["set"] = l[0];
-        var = py::cast(99); // this assignment should not overwrite l[0]
+        var = 99; // this assignment should not overwrite l[0]
         d["deferred_set"] = l[0];
         d["var"] = var;
 
         return d;
     });
+
+    bool has_optional = false, has_exp_optional = false;
+#ifdef PYBIND11_HAS_OPTIONAL
+    has_optional = true;
+    using opt_int = std::optional<int>;
+    m.def("double_or_zero", [](const opt_int& x) -> int {
+        return x.value_or(0) * 2;
+    });
+    m.def("half_or_none", [](int x) -> opt_int {
+        return x ? opt_int(x / 2) : opt_int();
+    });
+    m.def("test_nullopt", [](opt_int x) {
+        return x.value_or(42);
+    }, py::arg_v("x", std::nullopt, "None"));
+#endif
+
+#ifdef PYBIND11_HAS_EXP_OPTIONAL
+    has_exp_optional = true;
+    using exp_opt_int = std::experimental::optional<int>;
+    m.def("double_or_zero_exp", [](const exp_opt_int& x) -> int {
+        return x.value_or(0) * 2;
+    });
+    m.def("half_or_none_exp", [](int x) -> exp_opt_int {
+        return x ? exp_opt_int(x / 2) : exp_opt_int();
+    });
+    m.def("test_nullopt_exp", [](exp_opt_int x) {
+        return x.value_or(42);
+    }, py::arg_v("x", std::experimental::nullopt, "None"));
+#endif
+
+    m.attr("has_optional") = has_optional;
+    m.attr("has_exp_optional") = has_exp_optional;
+
+    m.def("test_default_constructors", []() {
+        return py::dict(
+            "str"_a=py::str(),
+            "bool"_a=py::bool_(),
+            "int"_a=py::int_(),
+            "float"_a=py::float_(),
+            "tuple"_a=py::tuple(),
+            "list"_a=py::list(),
+            "dict"_a=py::dict(),
+            "set"_a=py::set()
+        );
+    });
+
+    m.def("test_converting_constructors", [](py::dict d) {
+        return py::dict(
+            "str"_a=py::str(d["str"]),
+            "bool"_a=py::bool_(d["bool"]),
+            "int"_a=py::int_(d["int"]),
+            "float"_a=py::float_(d["float"]),
+            "tuple"_a=py::tuple(d["tuple"]),
+            "list"_a=py::list(d["list"]),
+            "dict"_a=py::dict(d["dict"]),
+            "set"_a=py::set(d["set"]),
+            "memoryview"_a=py::memoryview(d["memoryview"])
+        );
+    });
+
+    m.def("test_cast_functions", [](py::dict d) {
+        // When converting between Python types, obj.cast<T>() should be the same as T(obj)
+        return py::dict(
+            "str"_a=d["str"].cast<py::str>(),
+            "bool"_a=d["bool"].cast<py::bool_>(),
+            "int"_a=d["int"].cast<py::int_>(),
+            "float"_a=d["float"].cast<py::float_>(),
+            "tuple"_a=d["tuple"].cast<py::tuple>(),
+            "list"_a=d["list"].cast<py::list>(),
+            "dict"_a=d["dict"].cast<py::dict>(),
+            "set"_a=d["set"].cast<py::set>(),
+            "memoryview"_a=d["memoryview"].cast<py::memoryview>()
+        );
+    });
+
+    py::class_<MoveOutContainer::Value>(m, "MoveOutContainerValue")
+        .def_readonly("value", &MoveOutContainer::Value::value);
+
+    py::class_<MoveOutContainer>(m, "MoveOutContainer")
+        .def(py::init<>())
+        .def_property_readonly("move_list", &MoveOutContainer::move_list);
+
+    m.def("get_implicit_casting", []() {
+        py::dict d;
+        d["char*_i1"] = "abc";
+        const char *c2 = "abc";
+        d["char*_i2"] = c2;
+        d["char*_e"] = py::cast(c2);
+        d["char*_p"] = py::str(c2);
+
+        d["int_i1"] = 42;
+        int i = 42;
+        d["int_i2"] = i;
+        i++;
+        d["int_e"] = py::cast(i);
+        i++;
+        d["int_p"] = py::int_(i);
+
+        d["str_i1"] = std::string("str");
+        std::string s2("str1");
+        d["str_i2"] = s2;
+        s2[3] = '2';
+        d["str_e"] = py::cast(s2);
+        s2[3] = '3';
+        d["str_p"] = py::str(s2);
+
+        py::list l(2);
+        l[0] = 3;
+        l[1] = py::cast(6);
+        l.append(9);
+        l.append(py::cast(12));
+        l.append(py::int_(15));
+
+        return py::dict(
+            "d"_a=d,
+            "l"_a=l
+        );
+    });
 });
diff --git a/pybind11/tests/test_python_types.py b/pybind11/tests/test_python_types.py
index 9bba8249c..cb28e1ff1 100644
--- a/pybind11/tests/test_python_types.py
+++ b/pybind11/tests/test_python_types.py
@@ -1,6 +1,12 @@
 import pytest
 
-from pybind11_tests import ExamplePythonTypes, ConstructorStats
+from pybind11_tests import ExamplePythonTypes, ConstructorStats, has_optional, has_exp_optional
+
+
+def test_repr():
+    # In Python 3.3+, repr() accesses __qualname__
+    assert "ExamplePythonTypes__Meta" in repr(type(ExamplePythonTypes))
+    assert "ExamplePythonTypes" in repr(ExamplePythonTypes)
 
 
 def test_static():
@@ -38,12 +44,13 @@ def test_instance(capture):
     """
     with capture:
         set_result = instance.get_set()
-        set_result.add('key3')
+        set_result.add('key4')
         instance.print_set(set_result)
     assert capture.unordered == """
         key: key1
         key: key2
         key: key3
+        key: key4
     """
     with capture:
         set_result = instance.get_set2()
@@ -87,12 +94,24 @@ def test_instance(capture):
         array item 0: array entry 1
         array item 1: array entry 2
     """
+    varray_result = instance.get_valarray()
+    assert varray_result == [1, 4, 9]
+    with capture:
+        instance.print_valarray(varray_result)
+    assert capture.unordered == """
+        valarray item 0: 1
+        valarray item 1: 4
+        valarray item 2: 9
+    """
     with pytest.raises(RuntimeError) as excinfo:
         instance.throw_exception()
     assert str(excinfo.value) == "This exception was intentionally thrown."
 
     assert instance.pair_passthrough((True, "test")) == ("test", True)
     assert instance.tuple_passthrough((True, "test", 5)) == (5, "test", True)
+    # Any sequence can be cast to a std::pair or std::tuple
+    assert instance.pair_passthrough([True, "test"]) == ("test", True)
+    assert instance.tuple_passthrough([True, "test", 5]) == (5, "test", True)
 
     assert instance.get_bytes_from_string().decode() == "foo"
     assert instance.get_bytes_from_str().decode() == "bar"
@@ -119,8 +138,12 @@ def test_instance(capture):
     assert cstats.alive() == 0
 
 
-def test_docs(doc):
+# PyPy does not seem to propagate the tp_docs field at the moment
+def test_class_docs(doc):
     assert doc(ExamplePythonTypes) == "Example 2 documentation"
+
+
+def test_method_docs(doc):
     assert doc(ExamplePythonTypes.get_dict) == """
         get_dict(self: m.ExamplePythonTypes) -> dict
 
@@ -161,6 +184,11 @@ def test_docs(doc):
 
         Return a C++ array
     """
+    assert doc(ExamplePythonTypes.get_valarray) == """
+        get_valarray(self: m.ExamplePythonTypes) -> List[int]
+
+        Return a C++ valarray
+    """
     assert doc(ExamplePythonTypes.print_dict) == """
         print_dict(self: m.ExamplePythonTypes, arg0: dict) -> None
 
@@ -205,7 +233,7 @@ def test_docs(doc):
         tuple_passthrough(self: m.ExamplePythonTypes, arg0: Tuple[bool, str, int]) -> Tuple[int, str, bool]
 
         Return a triple in reversed order
-    """
+    """  # noqa: E501 line too long
     assert doc(ExamplePythonTypes.throw_exception) == """
         throw_exception(self: m.ExamplePythonTypes) -> None
 
@@ -295,3 +323,90 @@ def test_accessors():
     assert d["set"] == 1
     assert d["deferred_set"] == 1
     assert d["var"] == 99
+
+
+@pytest.mark.skipif(not has_optional, reason='no <optional>')
+def test_optional():
+    from pybind11_tests import double_or_zero, half_or_none, test_nullopt
+
+    assert double_or_zero(None) == 0
+    assert double_or_zero(42) == 84
+    pytest.raises(TypeError, double_or_zero, 'foo')
+
+    assert half_or_none(0) is None
+    assert half_or_none(42) == 21
+    pytest.raises(TypeError, half_or_none, 'foo')
+
+    assert test_nullopt() == 42
+    assert test_nullopt(None) == 42
+    assert test_nullopt(42) == 42
+    assert test_nullopt(43) == 43
+
+
+@pytest.mark.skipif(not has_exp_optional, reason='no <experimental/optional>')
+def test_exp_optional():
+    from pybind11_tests import double_or_zero_exp, half_or_none_exp, test_nullopt_exp
+
+    assert double_or_zero_exp(None) == 0
+    assert double_or_zero_exp(42) == 84
+    pytest.raises(TypeError, double_or_zero_exp, 'foo')
+
+    assert half_or_none_exp(0) is None
+    assert half_or_none_exp(42) == 21
+    pytest.raises(TypeError, half_or_none_exp, 'foo')
+
+    assert test_nullopt_exp() == 42
+    assert test_nullopt_exp(None) == 42
+    assert test_nullopt_exp(42) == 42
+    assert test_nullopt_exp(43) == 43
+
+
+def test_constructors():
+    """C++ default and converting constructors are equivalent to type calls in Python"""
+    from pybind11_tests import (test_default_constructors, test_converting_constructors,
+                                test_cast_functions)
+
+    types = [str, bool, int, float, tuple, list, dict, set]
+    expected = {t.__name__: t() for t in types}
+    assert test_default_constructors() == expected
+
+    data = {
+        str: 42,
+        bool: "Not empty",
+        int: "42",
+        float: "+1e3",
+        tuple: range(3),
+        list: range(3),
+        dict: [("two", 2), ("one", 1), ("three", 3)],
+        set: [4, 4, 5, 6, 6, 6],
+        memoryview: b'abc'
+    }
+    inputs = {k.__name__: v for k, v in data.items()}
+    expected = {k.__name__: k(v) for k, v in data.items()}
+    assert test_converting_constructors(inputs) == expected
+    assert test_cast_functions(inputs) == expected
+
+
+def test_move_out_container():
+    """Properties use the `reference_internal` policy by default. If the underlying function
+    returns an rvalue, the policy is automatically changed to `move` to avoid referencing
+    a temporary. In case the return value is a container of user-defined types, the policy
+    also needs to be applied to the elements, not just the container."""
+    from pybind11_tests import MoveOutContainer
+
+    c = MoveOutContainer()
+    moved_out_list = c.move_list
+    assert [x.value for x in moved_out_list] == [0, 1, 2]
+
+
+def test_implicit_casting():
+    """Tests implicit casting when assigning or appending to dicts and lists."""
+    from pybind11_tests import get_implicit_casting
+
+    z = get_implicit_casting()
+    assert z['d'] == {
+        'char*_i1': 'abc', 'char*_i2': 'abc', 'char*_e': 'abc', 'char*_p': 'abc',
+        'str_i1': 'str', 'str_i2': 'str1', 'str_e': 'str2', 'str_p': 'str3',
+        'int_i1': 42, 'int_i2': 42, 'int_e': 43, 'int_p': 44
+    }
+    assert z['l'] == [3, 6, 9, 12, 15]
diff --git a/pybind11/tests/test_sequences_and_iterators.cpp b/pybind11/tests/test_sequences_and_iterators.cpp
index 0a88cef6f..323b4bf00 100644
--- a/pybind11/tests/test_sequences_and_iterators.cpp
+++ b/pybind11/tests/test_sequences_and_iterators.cpp
@@ -129,7 +129,8 @@ private:
 // map-like functionality.
 class StringMap {
 public:
-    StringMap(std::unordered_map<std::string, std::string> init = {})
+    StringMap() = default;
+    StringMap(std::unordered_map<std::string, std::string> init)
         : map(std::move(init)) {}
 
     void set(std::string key, std::string val) {
diff --git a/pybind11/tests/test_sequences_and_iterators.py b/pybind11/tests/test_sequences_and_iterators.py
index c83c4e57c..76b9f43f6 100644
--- a/pybind11/tests/test_sequences_and_iterators.py
+++ b/pybind11/tests/test_sequences_and_iterators.py
@@ -2,7 +2,7 @@ import pytest
 
 
 def isclose(a, b, rel_tol=1e-05, abs_tol=0.0):
-    """Like to math.isclose() from Python 3.5"""
+    """Like math.isclose() from Python 3.5"""
     return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
 
 
diff --git a/pybind11/tests/test_smart_ptr.cpp b/pybind11/tests/test_smart_ptr.cpp
index 7d50f0dd7..4d1e77e32 100644
--- a/pybind11/tests/test_smart_ptr.cpp
+++ b/pybind11/tests/test_smart_ptr.cpp
@@ -82,7 +82,11 @@ private:
 };
 
 /// Make pybind aware of the ref-counted wrapper type (s)
-PYBIND11_DECLARE_HOLDER_TYPE(T, ref<T>); // Required for custom holder type
+
+// ref<T> is a wrapper for 'Object' which uses intrusive reference counting
+// It is always possible to construct a ref<T> from an Object* pointer without
+// possible incosistencies, hence the 'true' argument at the end.
+PYBIND11_DECLARE_HOLDER_TYPE(T, ref<T>, true);
 PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr<T>); // Not required any more for std::shared_ptr,
                                                      // but it should compile without error
 
@@ -125,6 +129,18 @@ test_initializer smart_ptr([](py::module &m) {
     py::class_<MyObject1, ref<MyObject1>>(m, "MyObject1", obj)
         .def(py::init<int>());
 
+    m.def("test_object1_refcounting",
+        []() -> bool {
+            ref<MyObject1> o = new MyObject1(0);
+            bool good = o->getRefCount() == 1;
+            py::object o2 = py::cast(o, py::return_value_policy::reference);
+            // always request (partial) ownership for objects with intrusive
+            // reference counting even when using the 'reference' RVP
+            good &= o->getRefCount() == 2;
+            return good;
+        }
+    );
+
     m.def("make_object_1", &make_object_1);
     m.def("make_object_2", &make_object_2);
     m.def("make_myobject1_1", &make_myobject1_1);
@@ -165,3 +181,60 @@ test_initializer smart_ptr([](py::module &m) {
     // Expose constructor stats for the ref type
     m.def("cstats_ref", &ConstructorStats::get<ref_tag>);
 });
+
+struct SharedPtrRef {
+    struct A {
+        A() { print_created(this); }
+        A(const A &) { print_copy_created(this); }
+        A(A &&) { print_move_created(this); }
+        ~A() { print_destroyed(this); }
+    };
+
+    A value = {};
+    std::shared_ptr<A> shared = std::make_shared<A>();
+};
+
+struct SharedFromThisRef {
+    struct B : std::enable_shared_from_this<B> {
+        B() { print_created(this); }
+        B(const B &) : std::enable_shared_from_this<B>() { print_copy_created(this); }
+        B(B &&) : std::enable_shared_from_this<B>() { print_move_created(this); }
+        ~B() { print_destroyed(this); }
+    };
+
+    B value = {};
+    std::shared_ptr<B> shared = std::make_shared<B>();
+};
+
+test_initializer smart_ptr_and_references([](py::module &pm) {
+    auto m = pm.def_submodule("smart_ptr");
+
+    using A = SharedPtrRef::A;
+    py::class_<A, std::shared_ptr<A>>(m, "A");
+
+    py::class_<SharedPtrRef>(m, "SharedPtrRef")
+        .def(py::init<>())
+        .def_readonly("ref", &SharedPtrRef::value)
+        .def_property_readonly("copy", [](const SharedPtrRef &s) { return s.value; },
+                               py::return_value_policy::copy)
+        .def_readonly("holder_ref", &SharedPtrRef::shared)
+        .def_property_readonly("holder_copy", [](const SharedPtrRef &s) { return s.shared; },
+                               py::return_value_policy::copy)
+        .def("set_ref", [](SharedPtrRef &, const A &) { return true; })
+        .def("set_holder", [](SharedPtrRef &, std::shared_ptr<A>) { return true; });
+
+    using B = SharedFromThisRef::B;
+    py::class_<B, std::shared_ptr<B>>(m, "B");
+
+    py::class_<SharedFromThisRef>(m, "SharedFromThisRef")
+        .def(py::init<>())
+        .def_readonly("bad_wp", &SharedFromThisRef::value)
+        .def_property_readonly("ref", [](const SharedFromThisRef &s) -> const B & { return *s.shared; })
+        .def_property_readonly("copy", [](const SharedFromThisRef &s) { return s.value; },
+                               py::return_value_policy::copy)
+        .def_readonly("holder_ref", &SharedFromThisRef::shared)
+        .def_property_readonly("holder_copy", [](const SharedFromThisRef &s) { return s.shared; },
+                               py::return_value_policy::copy)
+        .def("set_ref", [](SharedFromThisRef &, const B &) { return true; })
+        .def("set_holder", [](SharedFromThisRef &, std::shared_ptr<B>) { return true; });
+});
diff --git a/pybind11/tests/test_smart_ptr.py b/pybind11/tests/test_smart_ptr.py
index acb0f6172..a6867b485 100644
--- a/pybind11/tests/test_smart_ptr.py
+++ b/pybind11/tests/test_smart_ptr.py
@@ -1,3 +1,4 @@
+import pytest
 from pybind11_tests import ConstructorStats
 
 
@@ -114,7 +115,13 @@ def test_smart_ptr(capture):
     assert cstats.copy_assignments == 30
     assert cstats.move_assignments == 0
 
-def test_unique_nodelete(capture):
+
+def test_smart_ptr_refcounting():
+    from pybind11_tests import test_object1_refcounting
+    assert test_object1_refcounting()
+
+
+def test_unique_nodelete():
     from pybind11_tests import MyObject4
     o = MyObject4(23)
     assert o.value == 23
@@ -122,4 +129,75 @@ def test_unique_nodelete(capture):
     assert cstats.alive() == 1
     del o
     cstats = ConstructorStats.get(MyObject4)
-    assert cstats.alive() == 1 # Leak, but that's intentional
+    assert cstats.alive() == 1  # Leak, but that's intentional
+
+
+def test_shared_ptr_and_references():
+    from pybind11_tests.smart_ptr import SharedPtrRef, A
+
+    s = SharedPtrRef()
+    stats = ConstructorStats.get(A)
+    assert stats.alive() == 2
+
+    ref = s.ref  # init_holder_helper(holder_ptr=false, owned=false)
+    assert stats.alive() == 2
+    assert s.set_ref(ref)
+    with pytest.raises(RuntimeError) as excinfo:
+        assert s.set_holder(ref)
+    assert "Unable to cast from non-held to held instance" in str(excinfo.value)
+
+    copy = s.copy  # init_holder_helper(holder_ptr=false, owned=true)
+    assert stats.alive() == 3
+    assert s.set_ref(copy)
+    assert s.set_holder(copy)
+
+    holder_ref = s.holder_ref  # init_holder_helper(holder_ptr=true, owned=false)
+    assert stats.alive() == 3
+    assert s.set_ref(holder_ref)
+    assert s.set_holder(holder_ref)
+
+    holder_copy = s.holder_copy  # init_holder_helper(holder_ptr=true, owned=true)
+    assert stats.alive() == 3
+    assert s.set_ref(holder_copy)
+    assert s.set_holder(holder_copy)
+
+    del ref, copy, holder_ref, holder_copy, s
+    assert stats.alive() == 0
+
+
+def test_shared_ptr_from_this_and_references():
+    from pybind11_tests.smart_ptr import SharedFromThisRef, B
+
+    s = SharedFromThisRef()
+    stats = ConstructorStats.get(B)
+    assert stats.alive() == 2
+
+    ref = s.ref  # init_holder_helper(holder_ptr=false, owned=false, bad_wp=false)
+    assert stats.alive() == 2
+    assert s.set_ref(ref)
+    assert s.set_holder(ref)  # std::enable_shared_from_this can create a holder from a reference
+
+    bad_wp = s.bad_wp  # init_holder_helper(holder_ptr=false, owned=false, bad_wp=true)
+    assert stats.alive() == 2
+    assert s.set_ref(bad_wp)
+    with pytest.raises(RuntimeError) as excinfo:
+        assert s.set_holder(bad_wp)
+    assert "Unable to cast from non-held to held instance" in str(excinfo.value)
+
+    copy = s.copy  # init_holder_helper(holder_ptr=false, owned=true, bad_wp=false)
+    assert stats.alive() == 3
+    assert s.set_ref(copy)
+    assert s.set_holder(copy)
+
+    holder_ref = s.holder_ref  # init_holder_helper(holder_ptr=true, owned=false, bad_wp=false)
+    assert stats.alive() == 3
+    assert s.set_ref(holder_ref)
+    assert s.set_holder(holder_ref)
+
+    holder_copy = s.holder_copy  # init_holder_helper(holder_ptr=true, owned=true, bad_wp=false)
+    assert stats.alive() == 3
+    assert s.set_ref(holder_copy)
+    assert s.set_holder(holder_copy)
+
+    del ref, bad_wp, copy, holder_ref, holder_copy, s
+    assert stats.alive() == 0
diff --git a/pybind11/tests/test_stl_binders.cpp b/pybind11/tests/test_stl_binders.cpp
index e390376dc..ce0b33257 100644
--- a/pybind11/tests/test_stl_binders.cpp
+++ b/pybind11/tests/test_stl_binders.cpp
@@ -11,8 +11,14 @@
 
 #include <pybind11/stl_bind.h>
 #include <map>
+#include <deque>
 #include <unordered_map>
 
+#ifdef _MSC_VER
+// We get some really long type names here which causes MSVC to emit warnings
+#  pragma warning(disable: 4503) // warning C4503: decorated name length exceeded, name was truncated
+#endif
+
 class El {
 public:
     El() = delete;
@@ -26,6 +32,32 @@ std::ostream & operator<<(std::ostream &s, El const&v) {
     return s;
 }
 
+/// Issue #487: binding std::vector<E> with E non-copyable
+class E_nc {
+public:
+    explicit E_nc(int i) : value{i} {}
+    E_nc(const E_nc &) = delete;
+    E_nc &operator=(const E_nc &) = delete;
+    E_nc(E_nc &&) = default;
+    E_nc &operator=(E_nc &&) = default;
+
+    int value;
+};
+
+template <class Container> Container *one_to_n(int n) {
+    auto v = new Container();
+    for (int i = 1; i <= n; i++)
+        v->emplace_back(i);
+    return v;
+}
+
+template <class Map> Map *times_ten(int n) {
+    auto m = new Map();
+    for (int i = 1; i <= n; i++)
+        m->emplace(int(i), E_nc(10*i));
+    return m;
+}
+
 test_initializer stl_binder_vector([](py::module &m) {
     py::class_<El>(m, "El")
         .def(py::init<int>());
@@ -36,6 +68,7 @@ test_initializer stl_binder_vector([](py::module &m) {
     py::bind_vector<std::vector<El>>(m, "VectorEl");
 
     py::bind_vector<std::vector<std::vector<El>>>(m, "VectorVectorEl");
+
 });
 
 test_initializer stl_binder_map([](py::module &m) {
@@ -44,4 +77,24 @@ test_initializer stl_binder_map([](py::module &m) {
 
     py::bind_map<std::map<std::string, double const>>(m, "MapStringDoubleConst");
     py::bind_map<std::unordered_map<std::string, double const>>(m, "UnorderedMapStringDoubleConst");
+
 });
+
+test_initializer stl_binder_noncopyable([](py::module &m) {
+    py::class_<E_nc>(m, "ENC")
+        .def(py::init<int>())
+        .def_readwrite("value", &E_nc::value);
+
+    py::bind_vector<std::vector<E_nc>>(m, "VectorENC");
+    m.def("get_vnc", &one_to_n<std::vector<E_nc>>, py::return_value_policy::reference);
+
+    py::bind_vector<std::deque<E_nc>>(m, "DequeENC");
+    m.def("get_dnc", &one_to_n<std::deque<E_nc>>, py::return_value_policy::reference);
+
+    py::bind_map<std::map<int, E_nc>>(m, "MapENC");
+    m.def("get_mnc", &times_ten<std::map<int, E_nc>>, py::return_value_policy::reference);
+
+    py::bind_map<std::unordered_map<int, E_nc>>(m, "UmapENC");
+    m.def("get_umnc", &times_ten<std::unordered_map<int, E_nc>>, py::return_value_policy::reference);
+});
+
diff --git a/pybind11/tests/test_stl_binders.py b/pybind11/tests/test_stl_binders.py
index 302635715..c9bcc7935 100644
--- a/pybind11/tests/test_stl_binders.py
+++ b/pybind11/tests/test_stl_binders.py
@@ -58,30 +58,17 @@ def test_map_string_double():
     m['a'] = 1
     m['b'] = 2.5
 
-    keys = []
-    for k in m: keys.append(k)
-    assert keys == ['a', 'b']
-
-    key_values = []
-    for k, v in m.items(): key_values.append( (k, v) )
-    assert key_values == [('a', 1), ('b', 2.5) ]
-
+    assert list(m) == ['a', 'b']
+    assert list(m.items()) == [('a', 1), ('b', 2.5)]
     assert str(m) == "MapStringDouble{a: 1, b: 2.5}"
 
-
     um = UnorderedMapStringDouble()
     um['ua'] = 1.1
     um['ub'] = 2.6
 
-    keys = []
-    for k in um: keys.append(k)
-    assert sorted(keys) == ['ua', 'ub']
-
-    key_values = []
-    for k, v in um.items(): key_values.append( (k, v) )
-    assert sorted(key_values) == [('ua', 1.1), ('ub', 2.6) ]
-
-    str(um)
+    assert sorted(list(um)) == ['ua', 'ub']
+    assert sorted(list(um.items())) == [('ua', 1.1), ('ub', 2.6)]
+    assert "UnorderedMapStringDouble" in str(um)
 
 
 def test_map_string_double_const():
@@ -97,3 +84,57 @@ def test_map_string_double_const():
     umc['b'] = 21.5
 
     str(umc)
+
+
+def test_noncopyable_vector():
+    from pybind11_tests import get_vnc
+
+    vnc = get_vnc(5)
+    for i in range(0, 5):
+        assert vnc[i].value == i + 1
+
+    for i, j in enumerate(vnc, start=1):
+        assert j.value == i
+
+
+def test_noncopyable_deque():
+    from pybind11_tests import get_dnc
+
+    dnc = get_dnc(5)
+    for i in range(0, 5):
+        assert dnc[i].value == i + 1
+
+    i = 1
+    for j in dnc:
+        assert(j.value == i)
+        i += 1
+
+
+def test_noncopyable_map():
+    from pybind11_tests import get_mnc
+
+    mnc = get_mnc(5)
+    for i in range(1, 6):
+        assert mnc[i].value == 10 * i
+
+    vsum = 0
+    for k, v in mnc.items():
+        assert v.value == 10 * k
+        vsum += v.value
+
+    assert vsum == 150
+
+
+def test_noncopyable_unordered_map():
+    from pybind11_tests import get_umnc
+
+    mnc = get_umnc(5)
+    for i in range(1, 6):
+        assert mnc[i].value == 10 * i
+
+    vsum = 0
+    for k, v in mnc.items():
+        assert v.value == 10 * k
+        vsum += v.value
+
+    assert vsum == 150
diff --git a/pybind11/tests/test_virtual_functions.py b/pybind11/tests/test_virtual_functions.py
index 5d55d5ece..b11c699df 100644
--- a/pybind11/tests/test_virtual_functions.py
+++ b/pybind11/tests/test_virtual_functions.py
@@ -36,7 +36,9 @@ def test_override(capture, msg):
     ex12 = ExampleVirt(10)
     with capture:
         assert runExampleVirt(ex12, 20) == 30
-    assert capture == "Original implementation of ExampleVirt::run(state=10, value=20, str1=default1, str2=default2)"
+    assert capture == """
+        Original implementation of ExampleVirt::run(state=10, value=20, str1=default1, str2=default2)
+    """  # noqa: E501 line too long
 
     with pytest.raises(RuntimeError) as excinfo:
         runExampleVirtVirtual(ex12)
@@ -48,7 +50,7 @@ def test_override(capture, msg):
     assert capture == """
         ExtendedExampleVirt::run(20), calling parent..
         Original implementation of ExampleVirt::run(state=11, value=21, str1=override1, str2=default2)
-    """
+    """  # noqa: E501 line too long
     with capture:
         assert runExampleVirtBool(ex12p) is False
     assert capture == "ExtendedExampleVirt::run_bool()"
@@ -62,7 +64,7 @@ def test_override(capture, msg):
     assert capture == """
         ExtendedExampleVirt::run(50), calling parent..
         Original implementation of ExampleVirt::run(state=17, value=51, str1=override1, str2=override2)
-    """
+    """  # noqa: E501 line too long
 
     cstats = ConstructorStats.get(ExampleVirt)
     assert cstats.alive() == 3
@@ -76,20 +78,20 @@ def test_override(capture, msg):
 def test_inheriting_repeat():
     from pybind11_tests import A_Repeat, B_Repeat, C_Repeat, D_Repeat, A_Tpl, B_Tpl, C_Tpl, D_Tpl
 
-    class VI_AR(A_Repeat):
+    class AR(A_Repeat):
         def unlucky_number(self):
             return 99
 
-    class VI_AT(A_Tpl):
+    class AT(A_Tpl):
         def unlucky_number(self):
             return 999
 
-    obj = VI_AR()
+    obj = AR()
     assert obj.say_something(3) == "hihihi"
     assert obj.unlucky_number() == 99
     assert obj.say_everything() == "hi 99"
 
-    obj = VI_AT()
+    obj = AT()
     assert obj.say_something(3) == "hihihi"
     assert obj.unlucky_number() == 999
     assert obj.say_everything() == "hi 999"
@@ -106,46 +108,46 @@ def test_inheriting_repeat():
         assert obj.lucky_number() == 888.0
         assert obj.say_everything() == "B says hi 1 times 4444"
 
-    class VI_CR(C_Repeat):
+    class CR(C_Repeat):
         def lucky_number(self):
             return C_Repeat.lucky_number(self) + 1.25
 
-    obj = VI_CR()
+    obj = CR()
     assert obj.say_something(3) == "B says hi 3 times"
     assert obj.unlucky_number() == 4444
     assert obj.lucky_number() == 889.25
     assert obj.say_everything() == "B says hi 1 times 4444"
 
-    class VI_CT(C_Tpl):
+    class CT(C_Tpl):
         pass
 
-    obj = VI_CT()
+    obj = CT()
     assert obj.say_something(3) == "B says hi 3 times"
     assert obj.unlucky_number() == 4444
     assert obj.lucky_number() == 888.0
     assert obj.say_everything() == "B says hi 1 times 4444"
 
-    class VI_CCR(VI_CR):
+    class CCR(CR):
         def lucky_number(self):
-            return VI_CR.lucky_number(self) * 10
+            return CR.lucky_number(self) * 10
 
-    obj = VI_CCR()
+    obj = CCR()
     assert obj.say_something(3) == "B says hi 3 times"
     assert obj.unlucky_number() == 4444
     assert obj.lucky_number() == 8892.5
     assert obj.say_everything() == "B says hi 1 times 4444"
 
-    class VI_CCT(VI_CT):
+    class CCT(CT):
         def lucky_number(self):
-            return VI_CT.lucky_number(self) * 1000
+            return CT.lucky_number(self) * 1000
 
-    obj = VI_CCT()
+    obj = CCT()
     assert obj.say_something(3) == "B says hi 3 times"
     assert obj.unlucky_number() == 4444
     assert obj.lucky_number() == 888000.0
     assert obj.say_everything() == "B says hi 1 times 4444"
 
-    class VI_DR(D_Repeat):
+    class DR(D_Repeat):
         def unlucky_number(self):
             return 123
 
@@ -158,15 +160,15 @@ def test_inheriting_repeat():
         assert obj.lucky_number() == 888.0
         assert obj.say_everything() == "B says hi 1 times 4444"
 
-    obj = VI_DR()
+    obj = DR()
     assert obj.say_something(3) == "B says hi 3 times"
     assert obj.unlucky_number() == 123
     assert obj.lucky_number() == 42.0
     assert obj.say_everything() == "B says hi 1 times 123"
 
-    class VI_DT(D_Tpl):
+    class DT(D_Tpl):
         def say_something(self, times):
-            return "VI_DT says:" + (' quack' * times)
+            return "DT says:" + (' quack' * times)
 
         def unlucky_number(self):
             return 1234
@@ -174,33 +176,39 @@ def test_inheriting_repeat():
         def lucky_number(self):
             return -4.25
 
-    obj = VI_DT()
-    assert obj.say_something(3) == "VI_DT says: quack quack quack"
+    obj = DT()
+    assert obj.say_something(3) == "DT says: quack quack quack"
     assert obj.unlucky_number() == 1234
     assert obj.lucky_number() == -4.25
-    assert obj.say_everything() == "VI_DT says: quack 1234"
+    assert obj.say_everything() == "DT says: quack 1234"
 
-    class VI_DT2(VI_DT):
+    class DT2(DT):
         def say_something(self, times):
-            return "VI_DT2: " + ('QUACK' * times)
+            return "DT2: " + ('QUACK' * times)
 
         def unlucky_number(self):
             return -3
 
-    class VI_BT(B_Tpl):
+    class BT(B_Tpl):
         def say_something(self, times):
-            return "VI_BT" * times
+            return "BT" * times
+
         def unlucky_number(self):
             return -7
+
         def lucky_number(self):
             return -1.375
 
-    obj = VI_BT()
-    assert obj.say_something(3) == "VI_BTVI_BTVI_BT"
+    obj = BT()
+    assert obj.say_something(3) == "BTBTBT"
     assert obj.unlucky_number() == -7
     assert obj.lucky_number() == -1.375
-    assert obj.say_everything() == "VI_BT -7"
+    assert obj.say_everything() == "BT -7"
+
 
+# PyPy: Reference count > 1 causes call with noncopyable instance
+# to fail in ncv1.print_nc()
+@pytest.unsupported_on_pypy
 @pytest.mark.skipif(not hasattr(pybind11_tests, 'NCVirt'),
                     reason="NCVirt test broken on ICPC")
 def test_move_support():
diff --git a/pybind11/tools/FindPythonLibsNew.cmake b/pybind11/tools/FindPythonLibsNew.cmake
index f303a0206..dc44a9df5 100644
--- a/pybind11/tools/FindPythonLibsNew.cmake
+++ b/pybind11/tools/FindPythonLibsNew.cmake
@@ -82,11 +82,12 @@ print(s.get_config_var('SO'));
 print(hasattr(sys, 'gettotalrefcount')+0);
 print(struct.calcsize('@P'));
 print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
+print(s.get_config_var('LIBDIR') or '');
+print(s.get_config_var('MULTIARCH') or '');
 "
     RESULT_VARIABLE _PYTHON_SUCCESS
     OUTPUT_VARIABLE _PYTHON_VALUES
-    ERROR_VARIABLE _PYTHON_ERROR_VALUE
-    OUTPUT_STRIP_TRAILING_WHITESPACE)
+    ERROR_VARIABLE _PYTHON_ERROR_VALUE)
 
 if(NOT _PYTHON_SUCCESS MATCHES 0)
     if(PythonLibsNew_FIND_REQUIRED)
@@ -108,6 +109,8 @@ list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
 list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
 list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
 list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
+list(GET _PYTHON_VALUES 8 PYTHON_LIBDIR)
+list(GET _PYTHON_VALUES 9 PYTHON_MULTIARCH)
 
 # Make sure the Python has the same pointer-size as the chosen compiler
 # Skip if CMAKE_SIZEOF_VOID_P is not defined
@@ -137,28 +140,25 @@ string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
 if(CMAKE_HOST_WIN32)
     set(PYTHON_LIBRARY
         "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
-        
-    # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the 
+
+    # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
     # original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
     if(NOT EXISTS "${PYTHON_LIBRARY}")
         get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
         set(PYTHON_LIBRARY
             "${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
     endif()
-        
+
     # raise an error if the python libs are still not found.
     if(NOT EXISTS "${PYTHON_LIBRARY}")
         message(FATAL_ERROR "Python libraries not found")
     endif()
-    
-elseif(APPLE)
-    set(PYTHON_LIBRARY
-        "${PYTHON_PREFIX}/lib/libpython${PYTHON_LIBRARY_SUFFIX}.dylib")
+
 else()
-    if(${PYTHON_SIZEOF_VOID_P} MATCHES 8)
-        set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib64" "${PYTHON_PREFIX}/lib")
+    if(PYTHON_MULTIARCH)
+        set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}/${PYTHON_MULTIARCH}" "${PYTHON_LIBDIR}")
     else()
-        set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib")
+        set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}")
     endif()
     #message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
     # Probably this needs to be more involved. It would be nice if the config
diff --git a/pybind11/tools/check-style.sh b/pybind11/tools/check-style.sh
index bc52dc863..b87cb16e6 100755
--- a/pybind11/tools/check-style.sh
+++ b/pybind11/tools/check-style.sh
@@ -5,8 +5,12 @@
 # This script currently checks for
 #
 # 1. use of tabs instead of spaces
-# 2. trailing spaces
-# 3. missing space between keyword and parenthesis, e.g.: for(, if(, while(
+# 2. MSDOS-style CRLF endings
+# 3. trailing spaces
+# 4. missing space between keyword and parenthesis, e.g.: for(, if(, while(
+# 5. Missing space between right parenthesis and brace, e.g. 'for (...){'
+# 6. opening brace on its own line. It should always be on the same line as the
+#    if/while/for/do statment.
 # 
 # Invoke as: tools/check-style.sh
 #
@@ -26,6 +30,19 @@ while read -u 3 f; do
     echo "    $f"
 done
 
+found=
+# The mt=41 sets a red background for matched MS-DOS CRLF line endings
+exec 3< <(GREP_COLORS='mt=41' grep -IUlr $'\r' include/ tests/*.{cpp,py,h} docs/*.rst --color=always)
+while read -u 3 f; do
+    if [ -z "$found" ]; then
+        echo -e '\e[31m\e[01mError: found CRLF characters in the following files:\e[0m'
+        found=1
+        errors=1
+    fi
+
+    echo "    $f"
+done
+
 found=
 # The mt=41 sets a red background for matched trailing spaces
 exec 3< <(GREP_COLORS='mt=41' grep '\s\+$' include/ tests/*.{cpp,py,h} docs/*.rst -rn --color=always)
@@ -40,7 +57,7 @@ while read -u 3 f; do
 done
 
 found=
-exec 3< <(grep '\<\(if\|for\|while\)(\|){' include/ tests/*.{cpp,py,h} -rn --color=always)
+exec 3< <(grep '\<\(if\|for\|while\|catch\)(\|){' include/ tests/*.{cpp,py,h} -rn --color=always)
 while read -u 3 line; do
     if [ -z "$found" ]; then
         echo -e '\e[31m\e[01mError: found the following coding style problems:\e[0m'
@@ -51,4 +68,16 @@ while read -u 3 line; do
     echo "    $line"
 done
 
+found=
+exec 3< <(GREP_COLORS='mt=41' grep '^\s*{\s*$' include/ docs/*.rst -rn --color=always)
+while read -u 3 f; do
+    if [ -z "$found" ]; then
+        echo -e '\e[31m\e[01mError: braces should occur on the same line as the if/while/.. statement. Found issues in the following files: \e[0m'
+        found=1
+        errors=1
+    fi
+
+    echo "    $f"
+done
+
 exit $errors
diff --git a/pybind11/tools/libsize.py b/pybind11/tools/libsize.py
new file mode 100644
index 000000000..5dcb8b0d0
--- /dev/null
+++ b/pybind11/tools/libsize.py
@@ -0,0 +1,38 @@
+from __future__ import print_function, division
+import os
+import sys
+
+# Internal build script for generating debugging test .so size.
+# Usage:
+#     python libsize.py file.so save.txt -- displays the size of file.so and, if save.txt exists, compares it to the
+#                                           size in it, then overwrites save.txt with the new size for future runs.
+
+if len(sys.argv) != 3:
+    sys.exit("Invalid arguments: usage: python libsize.py file.so save.txt")
+
+lib = sys.argv[1]
+save = sys.argv[2]
+
+if not os.path.exists(lib):
+    sys.exit("Error: requested file ({}) does not exist".format(lib))
+
+libsize = os.path.getsize(lib)
+
+print("------", os.path.basename(lib), "file size:", libsize, end='')
+
+if os.path.exists(save):
+    with open(save) as sf:
+        oldsize = int(sf.readline())
+
+    if oldsize > 0:
+        change = libsize - oldsize
+        if change == 0:
+            print(" (no change)")
+        else:
+            print(" (change of {:+} bytes = {:+.2%})".format(change, change / oldsize))
+else:
+    print()
+
+with open(save, 'w') as sf:
+    sf.write(str(libsize))
+
diff --git a/pybind11/tools/pybind11Config.cmake.in b/pybind11/tools/pybind11Config.cmake.in
new file mode 100644
index 000000000..a4206c166
--- /dev/null
+++ b/pybind11/tools/pybind11Config.cmake.in
@@ -0,0 +1,92 @@
+# pybind11Config.cmake
+# --------------------
+#
+# PYBIND11 cmake module.
+# This module sets the following variables in your project::
+#
+#   pybind11_FOUND - true if pybind11 and all required components found on the system
+#   pybind11_VERSION - pybind11 version in format Major.Minor.Release
+#   pybind11_INCLUDE_DIRS - Directories where pybind11 and python headers are located.
+#   pybind11_INCLUDE_DIR - Directory where pybind11 headers are located.
+#   pybind11_DEFINITIONS - Definitions necessary to use pybind11, namely USING_pybind11.
+#   pybind11_LIBRARIES - compile flags and python libraries (as needed) to link against.
+#   pybind11_LIBRARY - empty.
+#   CMAKE_MODULE_PATH - appends location of accompanying FindPythonLibsNew.cmake and
+#                       pybind11Tools.cmake modules.
+#
+#
+# Available components: None
+#
+#
+# Exported targets::
+#
+# If pybind11 is found, this module defines the following :prop_tgt:`IMPORTED`
+# target. Python headers, libraries (as needed by platform), and C++ standard
+# are attached to the target. Set PythonLibsNew variables to influence
+# python detection and PYBIND11_CPP_STANDARD (-std=c++11 or -std=c++14) to
+# influence standard setting. ::
+#
+#   pybind11::module - the main pybind11 interface library for extension modules (i.e., headers)
+#
+#   find_package(pybind11 CONFIG REQUIRED)
+#   message(STATUS "Found pybind11: ${pybind11_INCLUDE_DIR} (found version ${pybind11_VERSION} & Py${PYTHON_VERSION_STRING})")
+#   add_library(mylib MODULE main.cpp)
+#   target_link_libraries(mylib pybind11::module)
+#
+# Suggested usage::
+#
+# find_package with version info is not recommended except for release versions. ::
+#
+#   find_package(pybind11 CONFIG)
+#   find_package(pybind11 2.0 EXACT CONFIG REQUIRED)
+#
+#
+# The following variables can be set to guide the search for this package::
+#
+#   pybind11_DIR - CMake variable, set to directory containing this Config file
+#   CMAKE_PREFIX_PATH - CMake variable, set to root directory of this package
+#   PATH - environment variable, set to bin directory of this package
+#   CMAKE_DISABLE_FIND_PACKAGE_pybind11 - CMake variable, disables
+#     find_package(pybind11) when not REQUIRED, perhaps to force internal build
+
+@PACKAGE_INIT@
+
+set(PN pybind11)
+
+# location of pybind11/pybind11.h
+set(${PN}_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@")
+
+set(${PN}_LIBRARY "")
+set(${PN}_DEFINITIONS USING_${PN})
+
+check_required_components(${PN})
+
+# make detectable the FindPythonLibsNew.cmake module
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR})
+
+include(pybind11Tools)
+
+if(NOT (CMAKE_VERSION VERSION_LESS 3.0))
+#-----------------------------------------------------------------------------
+# Don't include targets if this file is being picked up by another
+# project which has already built this as a subproject
+#-----------------------------------------------------------------------------
+if(NOT TARGET ${PN}::pybind11)
+    include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake")
+
+    find_package(PythonLibsNew ${PYBIND11_PYTHON_VERSION} MODULE REQUIRED)
+    set_property(TARGET ${PN}::module APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PYTHON_INCLUDE_DIRS})
+    if(WIN32 OR CYGWIN)
+      set_property(TARGET ${PN}::module APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${PYTHON_LIBRARIES})
+    endif()
+
+    select_cxx_standard()
+    set_property(TARGET ${PN}::module APPEND PROPERTY INTERFACE_COMPILE_OPTIONS "${PYBIND11_CPP_STANDARD}")
+
+    get_property(_iid TARGET ${PN}::module PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
+    get_property(_ill TARGET ${PN}::module PROPERTY INTERFACE_LINK_LIBRARIES)
+    get_property(_ico TARGET ${PN}::module PROPERTY INTERFACE_COMPILE_OPTIONS)
+    set(${PN}_INCLUDE_DIRS ${_iid})
+    set(${PN}_LIBRARIES ${_ico} ${_ill})
+endif()
+endif()
diff --git a/pybind11/tools/pybind11Tools.cmake b/pybind11/tools/pybind11Tools.cmake
new file mode 100644
index 000000000..fba76adee
--- /dev/null
+++ b/pybind11/tools/pybind11Tools.cmake
@@ -0,0 +1,163 @@
+# tools/pybind11Tools.cmake -- Build system for the pybind11 modules
+#
+# Copyright (c) 2015 Wenzel Jakob <wenzel@inf.ethz.ch>
+#
+# All rights reserved. Use of this source code is governed by a
+# BSD-style license that can be found in the LICENSE file.
+
+cmake_minimum_required(VERSION 2.8.12)
+
+# Add a CMake parameter for choosing a desired Python version
+set(PYBIND11_PYTHON_VERSION "" CACHE STRING "Python version to use for compiling modules")
+
+set(Python_ADDITIONAL_VERSIONS 3.7 3.6 3.5 3.4)
+find_package(PythonLibsNew ${PYBIND11_PYTHON_VERSION} REQUIRED)
+
+include(CheckCXXCompilerFlag)
+include(CMakeParseArguments)
+
+function(select_cxx_standard)
+  if(NOT MSVC AND NOT PYBIND11_CPP_STANDARD)
+    check_cxx_compiler_flag("-std=c++14" HAS_CPP14_FLAG)
+    check_cxx_compiler_flag("-std=c++11" HAS_CPP11_FLAG)
+
+    if (HAS_CPP14_FLAG)
+      set(PYBIND11_CPP_STANDARD -std=c++14)
+    elseif (HAS_CPP11_FLAG)
+      set(PYBIND11_CPP_STANDARD -std=c++11)
+    else()
+      message(FATAL_ERROR "Unsupported compiler -- pybind11 requires C++11 support!")
+    endif()
+
+    set(PYBIND11_CPP_STANDARD ${PYBIND11_CPP_STANDARD} CACHE STRING
+        "C++ standard flag, e.g. -std=c++11 or -std=c++14. Defaults to latest available." FORCE)
+  endif()
+endfunction()
+
+# Internal: find the appropriate LTO flag for this compiler
+macro(_pybind11_find_lto_flag output_var prefer_thin_lto)
+  if(${prefer_thin_lto})
+    # Check for ThinLTO support (Clang)
+    check_cxx_compiler_flag("-flto=thin" HAS_THIN_LTO_FLAG)
+    set(${output_var} $<${HAS_THIN_LTO_FLAG}:-flto=thin>)
+  endif()
+
+  if(NOT ${prefer_thin_lto} OR NOT HAS_THIN_LTO_FLAG)
+    if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+      # Check for Link Time Optimization support (GCC/Clang)
+      check_cxx_compiler_flag("-flto" HAS_LTO_FLAG)
+      set(${output_var} $<${HAS_LTO_FLAG}:-flto>)
+    else()
+      # Intel equivalent to LTO is called IPO
+      check_cxx_compiler_flag("-ipo" HAS_IPO_FLAG)
+      set(${output_var} $<${HAS_IPO_FLAG}:-ipo>)
+    endif()
+  endif()
+endmacro()
+
+# Build a Python extension module:
+# pybind11_add_module(<name> [MODULE | SHARED] [EXCLUDE_FROM_ALL]
+#                     [NO_EXTRAS] [THIN_LTO] source1 [source2 ...])
+#
+function(pybind11_add_module target_name)
+  set(options MODULE SHARED EXCLUDE_FROM_ALL NO_EXTRAS THIN_LTO)
+  cmake_parse_arguments(ARG "${options}" "" "" ${ARGN})
+
+  if(ARG_MODULE AND ARG_SHARED)
+    message(FATAL_ERROR "Can't be both MODULE and SHARED")
+  elseif(ARG_SHARED)
+    set(lib_type SHARED)
+  else()
+    set(lib_type MODULE)
+  endif()
+
+  if(ARG_EXCLUDE_FROM_ALL)
+    set(exclude_from_all EXCLUDE_FROM_ALL)
+  endif()
+
+  add_library(${target_name} ${lib_type} ${exclude_from_all} ${ARG_UNPARSED_ARGUMENTS})
+
+  target_include_directories(${target_name}
+    PRIVATE ${PYBIND11_INCLUDE_DIR}  # from project CMakeLists.txt
+    PRIVATE ${pybind11_INCLUDE_DIR}  # from pybind11Config
+    PRIVATE ${PYTHON_INCLUDE_DIRS})
+
+  # The prefix and extension are provided by FindPythonLibsNew.cmake
+  set_target_properties(${target_name} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}")
+  set_target_properties(${target_name} PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}")
+
+  if(WIN32 OR CYGWIN)
+    # Link against the Python shared library on Windows
+    target_link_libraries(${target_name} PRIVATE ${PYTHON_LIBRARIES})
+  elseif(APPLE)
+    # It's quite common to have multiple copies of the same Python version
+    # installed on one's system. E.g.: one copy from the OS and another copy
+    # that's statically linked into an application like Blender or Maya.
+    # If we link our plugin library against the OS Python here and import it
+    # into Blender or Maya later on, this will cause segfaults when multiple
+    # conflicting Python instances are active at the same time (even when they
+    # are of the same version).
+
+    # Windows is not affected by this issue since it handles DLL imports
+    # differently. The solution for Linux and Mac OS is simple: we just don't
+    # link against the Python library. The resulting shared library will have
+    # missing symbols, but that's perfectly fine -- they will be resolved at
+    # import time.
+
+    target_link_libraries(${target_name} PRIVATE "-undefined dynamic_lookup")
+
+    if(ARG_SHARED)
+      # Suppress CMake >= 3.0 warning for shared libraries
+      set_target_properties(${target_name} PROPERTIES MACOSX_RPATH ON)
+    endif()
+  endif()
+
+  select_cxx_standard()
+  if(NOT MSVC)
+    # Make sure C++11/14 are enabled
+    target_compile_options(${target_name} PUBLIC ${PYBIND11_CPP_STANDARD})
+  endif()
+
+  if(ARG_NO_EXTRAS)
+    return()
+  endif()
+
+  if(NOT MSVC)
+    # Enable link time optimization and set the default symbol
+    # visibility to hidden (very important to obtain small binaries)
+    string(TOUPPER "${CMAKE_BUILD_TYPE}" U_CMAKE_BUILD_TYPE)
+    if (NOT ${U_CMAKE_BUILD_TYPE} MATCHES DEBUG)
+      # Link Time Optimization
+      if(NOT CYGWIN)
+        _pybind11_find_lto_flag(lto_flag ARG_THIN_LTO)
+        target_compile_options(${target_name} PRIVATE ${lto_flag})
+      endif()
+
+      # Default symbol visibility
+      target_compile_options(${target_name} PRIVATE "-fvisibility=hidden")
+
+      # Strip unnecessary sections of the binary on Linux/Mac OS
+      if(CMAKE_STRIP)
+        if(APPLE)
+          add_custom_command(TARGET ${target_name} POST_BUILD
+                             COMMAND ${CMAKE_STRIP} -u -r $<TARGET_FILE:${target_name}>)
+        else()
+          add_custom_command(TARGET ${target_name} POST_BUILD
+                             COMMAND ${CMAKE_STRIP} $<TARGET_FILE:${target_name}>)
+        endif()
+      endif()
+    endif()
+  elseif(MSVC)
+    # /MP enables multithreaded builds (relevant when there are many files), /bigobj is
+    # needed for bigger binding projects due to the limit to 64k addressable sections
+    target_compile_options(${target_name} PRIVATE /MP /bigobj)
+
+    # Enforce link time code generation on MSVC, except in debug mode
+    target_compile_options(${target_name} PRIVATE $<$<NOT:$<CONFIG:Debug>>:/GL>)
+
+    # Fancy generator expressions don't work with linker flags, for reasons unknown
+    set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS_RELEASE /LTCG)
+    set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS_MINSIZEREL /LTCG)
+    set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS_RELWITHDEBINFO /LTCG)
+  endif()
+endfunction()
-- 
GitLab