Merge remote-tracking branch 'origin/MSD-ExcitedStates' into New_MSD_algo

This commit is contained in:
Ye Luo 2021-02-05 19:15:52 -06:00
commit 05f0567916
466 changed files with 30254 additions and 3978 deletions

View File

@ -14,7 +14,8 @@ _Delete the items that do not apply_
- Code style update (formatting, renaming)
- Refactoring (no functional changes, no api changes)
- Build related changes
- Documentation content changes
- Testing changes (e.g. new unit/integration/performance tests)
- Documentation changes
- Other (please describe):
### Does this introduce a breaking change?

View File

@ -4,6 +4,10 @@ IF ( CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0 )
MESSAGE(FATAL_ERROR "Requires clang 7.0 or higher ")
ENDIF()
IF ( CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 11.0.0 AND QMC_CXX_STANDARD EQUAL 17 AND BUILD_AFQMC )
MESSAGE(FATAL_ERROR "Avoid Clang 11.0.0 which cannot compile AFQMC properly with C++17!")
ENDIF()
# Set the std
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
@ -12,10 +16,14 @@ IF(QMC_OMP)
SET(ENABLE_OPENMP 1)
IF(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
SET(OFFLOAD_TARGET "nvptx64-nvidia-cuda" CACHE STRING "Offload target architecture")
SET(CLANG_OPENMP_OFFLOAD_FLAGS "-fopenmp-targets=${OFFLOAD_TARGET}")
IF(DEFINED OFFLOAD_ARCH)
SET(CLANG_OPENMP_OFFLOAD_FLAGS "-fopenmp-targets=${OFFLOAD_TARGET} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}")
ELSE()
SET(CLANG_OPENMP_OFFLOAD_FLAGS "-fopenmp-targets=${OFFLOAD_TARGET}")
SET(CLANG_OPENMP_OFFLOAD_FLAGS "${CLANG_OPENMP_OFFLOAD_FLAGS} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}")
ENDIF()
IF(OFFLOAD_TARGET MATCHES "nvptx64")
SET(CLANG_OPENMP_OFFLOAD_FLAGS "${CLANG_OPENMP_OFFLOAD_FLAGS} -Wno-unknown-cuda-version")
ENDIF()
# Intel clang compiler needs a different flag for the host side OpenMP library when offload is used.

View File

@ -10,7 +10,12 @@ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
IF(QMC_OMP)
SET(ENABLE_OPENMP 1)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
IF(ENABLE_OFFLOAD)
SET(OFFLOAD_TARGET "nvptx-none" CACHE STRING "Offload target architecture")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -foffload=${OFFLOAD_TARGET} -foffload=\"-lm -latomic\"")
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
ENDIF()
ENDIF(QMC_OMP)
# Set gnu specific flags (which we always want)

View File

@ -9,7 +9,16 @@ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -c99")
IF(QMC_OMP)
SET(ENABLE_OPENMP 1)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mp=allcores")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=allcores")
IF(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
MESSAGE(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.")
IF(NOT DEFINED OFFLOAD_ARCH)
MESSAGE(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
"Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=gpu -gpu=${OFFLOAD_ARCH}")
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=allcores")
ENDIF()
ENDIF(QMC_OMP)
ADD_DEFINITIONS( -Drestrict=__restrict__ )

View File

@ -37,29 +37,31 @@ FUNCTION( COPY_DIRECTORY_USING_SYMLINK SRC_DIR DST_DIR )
ENDFOREACH()
ENDFUNCTION()
# Copy files, but symlink the *.h5 files (which are the large ones)
FUNCTION( COPY_DIRECTORY_SYMLINK_H5 SRC_DIR DST_DIR)
# Copy everything but *.h5 files and pseudopotential files
FILE(COPY "${SRC_DIR}/" DESTINATION "${DST_DIR}"
PATTERN "*.h5" EXCLUDE
PATTERN "*.opt.xml" EXCLUDE
PATTERN "*.ncpp.xml" EXCLUDE
PATTERN "*.BFD.xml" EXCLUDE)
# Now find and symlink the *.h5 files and psuedopotential files
FILE(GLOB_RECURSE H5 "${SRC_DIR}/*.h5" "${SRC_DIR}/*.opt.xml" "${SRC_DIR}/*.ncpp.xml" "${SRC_DIR}/*.BFD.xml")
FOREACH(F IN LISTS H5)
FILE(RELATIVE_PATH R "${SRC_DIR}" "${F}")
#MESSAGE("Creating symlink from ${SRC_DIR}/${R} to ${DST_DIR}/${R}")
EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E create_symlink "${SRC_DIR}/${R}" "${DST_DIR}/${R}")
# Copy selected files only. h5, pseudopotentials, wavefunction, structure and the used one input file are copied.
FUNCTION( COPY_DIRECTORY_USING_SYMLINK_LIMITED SRC_DIR DST_DIR ${ARGN})
FILE(MAKE_DIRECTORY "${DST_DIR}")
# Find all the files but not subdirectories
FILE(GLOB FILE_FOLDER_NAMES LIST_DIRECTORIES TRUE
"${SRC_DIR}/qmc_ref" "${SRC_DIR}/qmc-ref" "${SRC_DIR}/*.h5"
"${SRC_DIR}/*.opt.xml" "${SRC_DIR}/*.ncpp.xml" "${SRC_DIR}/*.BFD.xml"
"${SRC_DIR}/*.ccECP.xml"
"${SRC_DIR}/*.py" "${SRC_DIR}/*.sh" "${SRC_DIR}/*.restart.xml"
"${SRC_DIR}/Li.xml" "${SRC_DIR}/H.xml" "${SRC_DIR}/*.L2_test.xml" "${SRC_DIR}/*.opt_L2.xml"
"${SRC_DIR}/*.wfnoj.xml" "${SRC_DIR}/*.wfj.xml" "${SRC_DIR}/*.wfs*.xml"
"${SRC_DIR}/*.wfn*.xml" "${SRC_DIR}/*.cuspInfo.xml" "${SRC_DIR}/*.H*.xml"
"${SRC_DIR}/*.structure.xml" "${SRC_DIR}/*ptcl.xml")
FOREACH(F IN LISTS FILE_FOLDER_NAMES)
EXECUTE_PROCESS( COMMAND ln -sf "${F}" "." WORKING_DIRECTORY ${DST_DIR})
ENDFOREACH()
FOREACH(F IN LISTS ARGN)
EXECUTE_PROCESS( COMMAND ln -sf "${SRC_DIR}/${F}" "." WORKING_DIRECTORY ${DST_DIR})
ENDFOREACH()
ENDFUNCTION()
# Control copy vs. symlink with top-level variable
FUNCTION( COPY_DIRECTORY_MAYBE_USING_SYMLINK SRC_DIR DST_DIR )
FUNCTION( COPY_DIRECTORY_MAYBE_USING_SYMLINK SRC_DIR DST_DIR ${ARGN})
IF (QMC_SYMLINK_TEST_FILES)
COPY_DIRECTORY_USING_SYMLINK("${SRC_DIR}" "${DST_DIR}")
#COPY_DIRECTORY_SYMLINK_H5("${SRC_DIR}" "${DST_DIR}" )
COPY_DIRECTORY_USING_SYMLINK_LIMITED("${SRC_DIR}" "${DST_DIR}" ${ARGN})
ELSE()
COPY_DIRECTORY("${SRC_DIR}" "${DST_DIR}")
ENDIF()
@ -123,7 +125,13 @@ ENDFUNCTION()
# Runs qmcpack
# Note that TEST_ADDED is an output variable
FUNCTION( RUN_QMC_APP TESTNAME SRC_DIR PROCS THREADS TEST_ADDED TEST_LABELS ${ARGN} )
COPY_DIRECTORY_MAYBE_USING_SYMLINK( "${SRC_DIR}" "${CMAKE_CURRENT_BINARY_DIR}/${TESTNAME}" )
# restrict ARGN to only one file or empty
LIST(LENGTH ARGN INPUT_FILE_LENGTH)
IF(INPUT_FILE_LENGTH GREATER 1)
MESSAGE(FATAL_ERROR "Incorrect invocation of RUN_QMC_APP by ${TESTNAME}. ARGN value is \"${ARGN}\"")
ENDIF()
COPY_DIRECTORY_MAYBE_USING_SYMLINK( "${SRC_DIR}" "${CMAKE_CURRENT_BINARY_DIR}/${TESTNAME}" "${ARGN}")
SET( TEST_ADDED_TEMP FALSE )
SET( TEST_LABELS_TEMP "" )
RUN_QMC_APP_NO_COPY( ${TESTNAME} ${CMAKE_CURRENT_BINARY_DIR}/${TESTNAME} ${PROCS} ${THREADS} TEST_ADDED_TEMP TEST_LABELS_TEMP ${ARGN} )
@ -251,7 +259,7 @@ ENDFUNCTION()
function(QMC_RUN_AND_CHECK_CUSTOM_SCALAR)
set(OPTIONS SHOULD_FAIL)
set(ONE_VALUE_ARGS BASE_NAME BASE_DIR PREFIX INPUT_FILE PROCS THREADS SERIES SCALAR_VALUES)
set(ONE_VALUE_ARGS BASE_NAME BASE_DIR PREFIX INPUT_FILE PROCS THREADS SERIES SCALAR_VALUES EQUILIBRATION)
# Eventually many want to support multiple SERIES/SCALAR_VALUES pairs
#SET(MULTI_VALUE_ARGS SERIES SCALAR_VALUES)
@ -272,6 +280,11 @@ function(QMC_RUN_AND_CHECK_CUSTOM_SCALAR)
set(PREFIX ${QRC_PREFIX})
set(INPUT_FILE ${QRC_INPUT_FILE})
set(EQUIL 2)
if (DEFINED QRC_EQUILIBRATION)
set(EQUIL ${QRC_EQUILIBRATION})
endif()
set( TEST_ADDED FALSE )
set( TEST_LABELS "")
set( FULL_NAME "${BASE_NAME}-${PROCS}-${THREADS}" )
@ -307,7 +320,7 @@ function(QMC_RUN_AND_CHECK_CUSTOM_SCALAR)
else()
set( TEST_NAME "${FULL_NAME}-${SCALAR_NAME}" )
endif()
set(CHECK_CMD ${CMAKE_SOURCE_DIR}/tests/scripts/check_scalars.py --ns 3 --series ${SERIES} -p ${PREFIX} -e 2 --name ${SCALAR_NAME} --ref-value ${SCALAR_VALUE} --ref-error ${SCALAR_ERROR})
set(CHECK_CMD ${CMAKE_SOURCE_DIR}/tests/scripts/check_scalars.py --ns 3 --series ${SERIES} -p ${PREFIX} -e ${EQUIL} --name ${SCALAR_NAME} --ref-value ${SCALAR_VALUE} --ref-error ${SCALAR_ERROR})
add_test( NAME ${TEST_NAME}
COMMAND ${CHECK_CMD}
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${FULL_NAME}"
@ -394,6 +407,6 @@ ENDFUNCTION()
# Print THE_MESSAGE if verbose configuration is enabled
FUNCTION ( MESSAGE_VERBOSE THE_MESSAGE )
IF ( QMC_VERBOSE_CONFIGURATION )
MESSAGE( ${THE_MESSAGE} )
MESSAGE(STATUS ${THE_MESSAGE})
ENDIF()
ENDFUNCTION()

View File

@ -4,6 +4,7 @@
# MODULE_NAME - input, name of module to test for
# MODULE_PRESENT - output - True/False based on success of the import
FUNCTION (TEST_PYTHON_MODULE MODULE_NAME MODULE_PRESENT)
MESSAGE_VERBOSE("Checking import python module ${MODULE_NAME}")
EXECUTE_PROCESS(
COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/test_import.py ${MODULE_NAME}
OUTPUT_VARIABLE TMP_OUTPUT_VAR

View File

@ -29,6 +29,8 @@ SET (PROJECT_CMAKE ${qmcpack_SOURCE_DIR}/CMake)
SET(QMCPACK_UNIT_TEST_DIR ${qmcpack_BINARY_DIR}/tests/bin)
INCLUDE(CMake/macros.cmake)
######################################################################
# Verify Python3 available
######################################################################
@ -59,7 +61,6 @@ ENDIF()
######################################################################
# CTest
######################################################################
INCLUDE( "${qmcpack_SOURCE_DIR}/CMake/macros.cmake" )
SET( DROP_METHOD "http" )
SET( DROP_SITE "cdash.qmcpack.org" )
SET( DROP_LOCATION "/CDash/submit.php?project=QMCPACK" )
@ -261,6 +262,9 @@ SET(BUILD_FCIQMC 0 CACHE BOOL "Build with FCIQMC")
OPTION(QMC_BUILD_STATIC "Link to static libraries" OFF)
OPTION(ENABLE_TIMERS "Enable internal timers" ON)
OPTION(ENABLE_STACKTRACE "Enable use of boost::stacktrace" OFF)
OPTION(USE_VTUNE_API "Enable use of VTune ittnotify APIs" OFF)
CMAKE_DEPENDENT_OPTION(USE_VTUNE_TASKS "USE VTune ittnotify task annotation" OFF "ENABLE_TIMERS AND USE_VTUNE_API" OFF)
CMAKE_DEPENDENT_OPTION(USE_NVTX_API "Enable/disable NVTX regions in CUDA code." OFF "ENABLE_TIMERS AND (QMC_CUDA OR ENABLE_CUDA)" OFF)
######################################################################
# Install options
@ -709,22 +713,22 @@ IF(QMC_CUDA OR ENABLE_CUDA)
SET(HAVE_CUDA 1)
MESSAGE(" CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS}")
ELSE(QMC_CUDA OR ENABLE_CUDA)
MESSAGE(STATUS "Disabling CUDA")
MESSAGE(STATUS "CUDA disabled")
ENDIF(QMC_CUDA OR ENABLE_CUDA)
OPTION(USE_NVTX_API "Enable/disable NVTX regions in CUDA code." OFF)
IF(USE_NVTX_API)
IF(HAVE_CUDA OR ENABLE_OFFLOAD)
FIND_LIBRARY(NVTX_API_LIB
NAME nvToolsExt
HINTS ${CUDA_TOOLKIT_ROOT_DIR}
PATH_SUFFIXES lib lib64)
IF(NOT NVTX_API_LIB)
MESSAGE(FATAL_ERROR "USE_NVTX_API set but NVTX_API_LIB not found")
ENDIF(NOT NVTX_API_LIB)
MESSAGE("CUDA nvToolsExt library: ${NVTX_API_LIB}")
LINK_LIBRARIES(${NVTX_API_LIB})
ENDIF(HAVE_CUDA OR ENABLE_OFFLOAD)
MESSAGE(STATUS "Enabling use of CUDA NVTX APIs")
FIND_LIBRARY(NVTX_API_LIB
NAME nvToolsExt
HINTS ${CUDA_TOOLKIT_ROOT_DIR}
PATH_SUFFIXES lib lib64)
IF(NOT NVTX_API_LIB)
MESSAGE(FATAL_ERROR "USE_NVTX_API set but NVTX_API_LIB not found")
ENDIF(NOT NVTX_API_LIB)
MESSAGE("CUDA nvToolsExt library: ${NVTX_API_LIB}")
LINK_LIBRARIES(${NVTX_API_LIB})
ELSE()
MESSAGE(STATUS "CUDA NVTX APIs disabled")
ENDIF(USE_NVTX_API)
#-------------------------------------------------------------------
@ -762,25 +766,20 @@ ENDIF(ENABLE_HIP)
#include qmcpack/src build/src
INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src)
IF (USE_VTUNE_TASKS)
IF (NOT ENABLE_TIMERS)
MESSAGE(FATAL_ERROR "USE_VTUNE_TASKS is set, but timers are not enabled. Set ENABLE_TIMERS=ON.")
ENDIF()
SET(USE_VTUNE_API 1)
ENDIF()
IF (USE_VTUNE_API)
include(CheckIncludeFileCXX)
CHECK_INCLUDE_FILE_CXX(ittnotify.h HAVE_ITTNOTIFY_H)
IF (NOT HAVE_ITTNOTIFY_H)
MESSAGE(FATAL_ERROR "USE_VTUNE_API is defined, but the ittnotify.h include file is not found. Check that the correct include directory is present in CMAKE_CXX_FLAGS.")
ENDIF()
MESSAGE(STATUS "Enabling use of VTune ittnotify APIs")
FIND_PATH(VTUNE_ITTNOTIFY_INCLUDE_DIR ittnotify.h HINTS ${VTUNE_ROOT} $ENV{VTUNE_ROOT} PATH_SUFFIXES include REQUIRED)
MESSAGE(STATUS "Found VTUNE_ITTNOTIFY_INCLUDE_DIR ${VTUNE_ITTNOTIFY_INCLUDE_DIR}")
FIND_LIBRARY(VTUNE_ITTNOTIFY_LIBRARY ittnotify HINTS ${VTUNE_ROOT} $ENV{VTUNE_ROOT} PATH_SUFFIXES lib64 lib REQUIRED)
MESSAGE(STATUS "Found VTUNE_ITTNOTIFY_LIBRARY ${VTUNE_ITTNOTIFY_LIBRARY}")
FIND_LIBRARY(VTUNE_ITTNOTIFY_LIBRARY ittnotify)
IF (NOT VTUNE_ITTNOTIFY_LIBRARY)
MESSAGE(FATAL_ERROR "USE_VTUNE_API is defined, but the ittnotify library is not found. Check that correct library path is present in CMAKE_LIBRARY_PATH.")
INCLUDE_DIRECTORIES(${VTUNE_ITTNOTIFY_INCLUDE_DIR})
LINK_LIBRARIES(${VTUNE_ITTNOTIFY_LIBRARY})
IF (USE_VTUNE_TASKS)
MESSAGE(STATUS "VTune ittnotify tasks enabled")
ENDIF()
LINK_LIBRARIES("${VTUNE_ITTNOTIFY_LIBRARY}")
ELSE()
MESSAGE(STATUS "VTune ittnotify APIs disabled")
ENDIF()
OPTION(QMC_EXP_THREADING "Experimental non openmp threading models" OFF)

View File

@ -3,22 +3,33 @@
Development Guide
=================
The section gives guidance on how to extend the functionality of QMCPACK. Future examples will likely include topics such as the addition of a Jastrow function or a new QMC method.
The section gives guidance on how to extend the functionality of QMCPACK. Future examples will likely include topics such as the
addition of a Jastrow function or a new QMC method.
QMCPACK coding standards
------------------------
This chapter presents what we collectively have agreed are best practices for the code. This includes formatting style, naming conventions, documentation conventions, and certain prescriptions for C++ language use. At the moment only the formatting can be enforced in an objective fashion.
This chapter presents what we collectively have agreed are best practices for the code. This includes formatting style, naming
conventions, documentation conventions, and certain prescriptions for C++ language use. At the moment only the formatting can be
enforced in an objective fashion.
New development should follow these guidelines, and contributors are expected to adhere to them as they represent an integral part of our effort to continue QMCPACK as a world-class, sustainable QMC code. Although some of the source code has a ways to go to live up to these ideas, new code, even in old files, should follow the new conventions not the local conventions of the file whenever possible. Work on the code with continuous improvement in mind rather than a commitment to stasis.
New development should follow these guidelines, and contributors are expected to adhere to them as they represent an integral part
of our effort to continue QMCPACK as a world-class, sustainable QMC code. Although some of the source code has a ways to go to
live up to these ideas, new code, even in old files, should follow the new conventions not the local conventions of the file
whenever possible. Work on the code with continuous improvement in mind rather than a commitment to stasis.
The `current workflow conventions`_ for the project are described in the wiki on the GitHub repository. It will save you and all the maintainers considerable time if you read these and ask questions up front.
The `current workflow conventions`_ for the project are described in the wiki on the GitHub repository. It will save you and all
the maintainers considerable time if you read these and ask questions up front.
A PR should follow these standards before inclusion in the mainline. You can be sure of properly following the formatting conventions if you use clang-format. The mechanics of clang-format setup and use can be found at https://github.com/QMCPACK/qmcpack/wiki/Source-formatting.
A PR should follow these standards before inclusion in the mainline. You can be sure of properly following the formatting
conventions if you use clang-format. The mechanics of clang-format setup and use can be found at
https://github.com/QMCPACK/qmcpack/wiki/Source-formatting.
The clang-format file found at ``qmcpack/src/.clang-format`` should be run over all code touched in a PR before a pull request is prepared. We also encourage developers to run clang-tidy with the ``qmcpack/src/.clang-tidy`` configuration over all new code.
The clang-format file found at ``qmcpack/src/.clang-format`` should be run over all code touched in a PR before a pull request is
prepared. We also encourage developers to run clang-tidy with the ``qmcpack/src/.clang-tidy`` configuration over all new code.
As much as possible, try to break up refactoring, reformatting, feature, and bugs into separate, small PRs. Aim for something that would take a reviewer no more than an hour. In this way we can maintain a good collective development velocity.
As much as possible, try to break up refactoring, reformatting, feature, and bugs into separate, small PRs. Aim for something that
would take a reviewer no more than an hour. In this way we can maintain a good collective development velocity.
.. _current workflow conventions: https://github.com/QMCPACK/qmcpack/wiki/Development-workflow
@ -33,7 +44,7 @@ Each file should start with the header.
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers
// Copyright (c) 2021 QMCPACK developers
//
// File developed by: Name, email, affiliation
//
@ -45,37 +56,78 @@ If you make significant changes to an existing file, add yourself to the list of
File organization
~~~~~~~~~~~~~~~~~
Header files should be placed in the same directory as their implementations.
Unit tests should be written for all new functionality. These tests should be placed in a ``tests`` subdirectory below the implementations.
Header files should be placed in the same directory as their implementations. Unit tests should be written for all new
functionality. These tests should be placed in a ``tests`` subdirectory below the implementations.
File names
~~~~~~~~~~
Each class should be defined in a separate file with the same name as the class name. Use separate ``.cpp`` implementation files whenever possible to aid in incremental compilation.
Each class should be defined in a separate file with the same name as the class name. Use separate ``.cpp`` implementation files
whenever possible to aid in incremental compilation.
The filenames of tests are composed by the filename of the object tested and the prefix ``test_``.
The filenames of *fake* and *mock* objects used in tests are composed by the prefixes ``fake_`` and ``mock_``, respectively, and the filename of the object that is imitated.
The filenames of tests are composed by the filename of the object tested and the prefix ``test_``. The filenames of *fake* and
*mock* objects used in tests are composed by the prefixes ``fake_`` and ``mock_``, respectively, and the filename of the object
that is imitated.
Header files
~~~~~~~~~~~~
All header files should be self-contained (i.e., not dependent on following any other header when it is included). Nor should they include files that are not necessary for their use (i.e., headers needed only by the implementation). Implementation files should not include files only for the benefit of files they include.
All header files should be self-contained (i.e., not dependent on following any other header when it is included). Nor should they
include files that are not necessary for their use (i.e., headers needed only by the implementation). Implementation files should
not include files only for the benefit of files they include.
There are many header files that currently violate this.
Each header must use ``#define`` guards to prevent multiple inclusion.
There are many header files that currently violate this. Each header must use ``#define`` guards to prevent multiple inclusion.
The symbol name of the ``#define`` guards should be ``NAMESPACE(s)_CLASSNAME_H``.
Includes
~~~~~~~~
Header files should be included with the full path based on the ``src`` directory.
For example, the file ``qmcpack/src/QMCWaveFunctions/SPOSet.h`` should be included as
Related header files should be included without any path. Header files from external projects and standard libraries should be
includes using the ``<iostream>`` convention, while headers that are part of the QMCPACK project should be included using the
``"our_header.h"`` convention.
We are now using a new header file inclusion style following the modern CMake transition in QMCPACK, while the legacy code may
still use the legacy style. Newly written code and refactored code should be transitioned to the new style.
New style for modern CMake
^^^^^^^^^^^^^^^^^^^^^^^^^^
In QMCPACK, include paths are handled by modern CMake target dependency. Every top level folder is at least one target. For
example, ``src/Particle/CMakeLists.txt`` defines `qmcparticle` target. It propagates include path ``qmcpack/src/Particle`` to
compiling command lines in CMake via
::
TARGET_INCLUDE_DIRECTORIES(qmcparticle PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
For this reason, the file ``qmcpack/src/Particle/Lattice/ParticleBConds3DSoa.h`` should be included as
::
#include "Lattice/ParticleBConds3DSoa.h"
If the compiled file is not part of the same target as `qmcparticle`, the target it belongs to should have a dependency on
`qmcparticle`. For example, test source files under ``qmcpack/src/Particle/tests`` are not part of `qmcparticle` and thus requires
the following additional CMake setting
::
TARGET_LINK_LIBRARIES(${UTEST_EXE} qmcparticle)
Legacy style
^^^^^^^^^^^^
Header files should be included with the full path based on the ``src`` directory. For example, the file
``qmcpack/src/QMCWaveFunctions/SPOSet.h`` should be included as
::
#include "QMCWaveFunctions/SPOSet.h"
Even if the included file is located in the same directory as the including file, this rule should be obeyed. Header files from external projects and standard libraries should be includes using the ``<iostream>`` convention, while headers that are part of the QMCPACK project should be included using the ``"our_header.h"`` convention.
Even if the included file is located in the same directory as the including file, this rule should be obeyed.
Ordering
^^^^^^^^
For readability, we suggest using the following standard order of includes:
@ -94,7 +146,9 @@ In each section the included files should be sorted in alphabetical order.
Naming
------
The balance between description and ease of implementation should be balanced such that the code remains self-documenting within a single terminal window. If an extremely short variable name is used, its scope must be shorter than :math:`\sim 40` lines. An exception is made for template parameters, which must be in all CAPS.
The balance between description and ease of implementation should be balanced such that the code remains self-documenting within a
single terminal window. If an extremely short variable name is used, its scope must be shorter than :math:`\sim 40` lines. An
exception is made for template parameters, which must be in all CAPS.
Namespace names
~~~~~~~~~~~~~~~
@ -104,13 +158,14 @@ Namespace names should be one word, lowercase.
Type and class names
~~~~~~~~~~~~~~~~~~~~
Type and class names should start with a capital letter and have a capital letter for each new word.
Underscores (``_``) are not allowed.
Type and class names should start with a capital letter and have a capital letter for each new word. Underscores (``_``) are not
allowed.
Variable names
~~~~~~~~~~~~~~
Variable names should not begin with a capital letter, which is reserved for type and class names. Underscores (``_``) should be used to separate words.
Variable names should not begin with a capital letter, which is reserved for type and class names. Underscores (``_``) should be
used to separate words.
Class data members
~~~~~~~~~~~~~~~~~~
@ -134,8 +189,7 @@ Named lambda expressions follow the naming convention for functions:
Macro names
~~~~~~~~~~~
Macro names should be all uppercase and can include underscores (``_``).
The underscore is not allowed as first or last character.
Macro names should be all uppercase and can include underscores (``_``). The underscore is not allowed as first or last character.
Test case and test names
~~~~~~~~~~~~~~~~~~~~~~~~
@ -197,15 +251,15 @@ Do not put the file name after the ``\file`` Doxygen command. Doxygen will fill
Class docs
^^^^^^^^^^
Every class should have a short description (in the header of the file) of what it is and what is does.
Comments for public class member functions follow the same rules as general function comments.
Comments for private members are allowed but are not mandatory.
Every class should have a short description (in the header of the file) of what it is and what is does. Comments for public class
member functions follow the same rules as general function comments. Comments for private members are allowed but are not
mandatory.
Function docs
^^^^^^^^^^^^^
For function parameters whose type is non-const reference or pointer to non-const memory,
it should be specified if they are input (In:), output (Out:) or input-output parameters (InOut:).
For function parameters whose type is non-const reference or pointer to non-const memory, it should be specified if they are input
(In:), output (Out:) or input-output parameters (InOut:).
Example:
@ -238,7 +292,9 @@ Formatting and "style"
Use the provided clang-format style in ``src/.clang-format`` to format ``.h``, ``.hpp``, ``.cu``, and ``.cpp`` files. Many of the following rules will be applied to the code by clang-format, which should allow you to ignore most of them if you always run it on your modified code.
You should use clang-format support and the ``.clangformat`` file with your editor, use a Git precommit hook to run clang-format or run clang-format manually on every file you modify. However, if you see numerous formatting updates outside of the code you have modified, first commit the formatting changes in a separate PR.
You should use clang-format support and the ``.clangformat`` file with your editor, use a Git precommit hook to run clang-format
or run clang-format manually on every file you modify. However, if you see numerous formatting updates outside of the code you
have modified, first commit the formatting changes in a separate PR.
Indentation
~~~~~~~~~~~
@ -253,8 +309,8 @@ The length of each line of your code should be at most *120* characters.
Horizontal spacing
~~~~~~~~~~~~~~~~~~
No trailing white spaces should be added to any line.
Use no space before a comma (``,``) and a semicolon (``;``), and add a space after them if they are not at the end of a line.
No trailing white spaces should be added to any line. Use no space before a comma (``,``) and a semicolon (``;``), and add a space
after them if they are not at the end of a line.
Preprocessor directives
~~~~~~~~~~~~~~~~~~~~~~~
@ -275,8 +331,8 @@ Do not put any space between an unary operator and its argument.
Types
~~~~~
The ``using`` syntax is preferred to ``typedef`` for type aliases.
If the actual type is not excessively long or complex, simply use it; renaming simple types makes code less understandable.
The ``using`` syntax is preferred to ``typedef`` for type aliases. If the actual type is not excessively long or complex, simply
use it; renaming simple types makes code less understandable.
Pointers and references
~~~~~~~~~~~~~~~~~~~~~~~
@ -306,10 +362,8 @@ The angle brackets of templates should not have any external or internal padding
Vertical spacing
~~~~~~~~~~~~~~~~
Use empty lines when it helps to improve the readability of the code, but do not use too many.
Do not use empty lines after a brace that opens a scope
or before a brace that closes a scope.
Each file should contain an empty line at the end of the file.
Use empty lines when it helps to improve the readability of the code, but do not use too many. Do not use empty lines after a
brace that opens a scope or before a brace that closes a scope. Each file should contain an empty line at the end of the file.
Some editors add an empty line automatically, some do not.
Variable declarations and definitions
@ -352,9 +406,8 @@ Variable declarations and definitions
Function declarations and definitions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The return type should be on the same line as the function name.
Parameters should also be on the same line unless they do not fit on it, in which case one parameter
per line aligned with the first parameter should be used.
The return type should be on the same line as the function name. Parameters should also be on the same line unless they do not fit
on it, in which case one parameter per line aligned with the first parameter should be used.
Also include the parameter names in the declaration of a function, that is,
@ -366,11 +419,11 @@ Also include the parameter names in the declaration of a function, that is,
// avoid
double function(double, double, double);
//dont do this
// dont do this
double function(BigTemplatedSomething<double> a, BigTemplatedSomething<double> b,
BigTemplatedSomething<double> c);
//do this
// do this
double function(BigTemplatedSomething<double> a,
BigTemplatedSomething<double> b,
BigTemplatedSomething<double> c);
@ -530,9 +583,9 @@ Examples:
Namespace formatting
~~~~~~~~~~~~~~~~~~~~
The content of namespaces is not indented.
A comment should indicate when a namespace is closed. (clang-format will add these if absent).
If nested namespaces are used, a comment with the full namespace is required after opening a set of namespaces or an inner namespace.
The content of namespaces is not indented. A comment should indicate when a namespace is closed. (clang-format will add these if
absent). If nested namespaces are used, a comment with the full namespace is required after opening a set of namespaces or an
inner namespace.
Examples:
@ -572,22 +625,28 @@ Examples:
QMCPACK C++ guidance
--------------------
The guidance here, like any advice on how to program, should not be treated as a set of rules but rather the hard-won wisdom of many hours of suffering development. In the past, many rules were ignored, and the absolute worst results of that will affect whatever code you need to work with. Your PR should go much smoother if you do not ignore them.
The guidance here, like any advice on how to program, should not be treated as a set of rules but rather the hard-won wisdom of
many hours of suffering development. In the past, many rules were ignored, and the absolute worst results of that will affect
whatever code you need to work with. Your PR should go much smoother if you do not ignore them.
Encapsulation
~~~~~~~~~~~~~
A class is not just a naming scheme for a set of variables and functions. It should provide a logical set of methods, could contain the state of a logical object, and might allow access to object data through a well-defined interface related variables, while preserving maximally ability to change internal implementation of the class.
A class is not just a naming scheme for a set of variables and functions. It should provide a logical set of methods, could
contain the state of a logical object, and might allow access to object data through a well-defined interface related variables,
while preserving maximally ability to change internal implementation of the class.
Do not use ``struct`` as a way to avoid controlling access to the class. Only in rare cases where a class is a fully public data structure ``struct`` is this appropriate. Ignore (or fix one) the many examples of this in QMCPACK.
Do not use ``struct`` as a way to avoid controlling access to the class. Only in rare cases where a class is a fully public data
structure ``struct`` is this appropriate. Ignore (or fix one) the many examples of this in QMCPACK.
Do not use inheritance primarily as a means to break encapsulation. If your class could aggregate or compose another class, do that, and access it solely through its public interface. This will reduce dependencies.
Do not use inheritance primarily as a means to break encapsulation. If your class could aggregate or compose another class, do
that, and access it solely through its public interface. This will reduce dependencies.
Casting
~~~~~~~
In C++ source, avoid C style casts; they are difficult to search for and imprecise in function.
An exception is made for controlling implicit conversion of simple numerical types.
In C++ source, avoid C style casts; they are difficult to search for and imprecise in function. An exception is made for
controlling implicit conversion of simple numerical types.
Explicit C++ style casts make it clear what the safety of the cast is and what sort of conversion is expected to be possible.
@ -609,8 +668,8 @@ Explicit C++ style casts make it clear what the safety of the cast is and what s
Pre-increment and pre-decrement
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use the pre-increment (pre-decrement) operator when a variable is incremented (decremented) and the value of the expression is not used.
In particular, use the pre-increment (pre-decrement) operator for loop counters where i is not used:
Use the pre-increment (pre-decrement) operator when a variable is incremented (decremented) and the value of the expression is not
used. In particular, use the pre-increment (pre-decrement) operator for loop counters where i is not used:
::
@ -624,7 +683,8 @@ In particular, use the pre-increment (pre-decrement) operator for loop counters
doSomething(i);
}
The post-increment and post-decrement operators create an unnecessary copy that the compiler cannot optimize away in the case of iterators or other classes with overloaded increment and decrement operators.
The post-increment and post-decrement operators create an unnecessary copy that the compiler cannot optimize away in the case of
iterators or other classes with overloaded increment and decrement operators.
Alternative operator representations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -664,23 +724,47 @@ Use of const
int getCount() const { return count_;}
}
Smart pointers
~~~~~~~~~~~~~~
Use of smart pointers is being adopted to help make QMCPACK memory leak free. Prior to C++11, C++ uses C-style pointers. A C-style
pointer can have several meanings and the ownership of a piece of help memory may not be clear. This leads to confusion and causes
memory leaks if pointers are not managed properly. Since C++11, smart pointers were introduced to resolve this issue. In addition,
it demands developers to think about the ownership and lifetime of declared pointer objects.
std::unique_ptr
^^^^^^^^^^^^^^^
A unique pointer is the unique owner of a piece of allocated memory. Pointers in per-walker data structure with distinct contents
should be unique pointers. For example, every walker has a trial wavefunction object which contains an SPO object pointer. Because
the SPO object has a vector to store SPO evaluation results, it cannot be shared between two trial wavefunction objects. For this
reason the SPO object pointer should be an unique pointer.
In QMCPACK, most raw pointers can be directly replaced with ``std::unique_ptr``.
Corresponding use of ``new`` operator can be replaced with ``std:make_unique``.
std::shared_ptr
^^^^^^^^^^^^^^^
A shared pointer is the shared owner of a piece of allocated memory. Moving a pointer ownership from one place to another should
not use shared pointers but C++ move semantics. Shared contents between walkers may be candidates for shared pointers. For example,
although the Jastrow factor object must be unique per walker, the pointer to the parameter data structure can be a shared pointer.
During Jastrow optimization, any update to the parameter data managed by the shared pointer will be effective immediately in all
the Jastrow objects. In another example, spline coefficients are managed by a shared pointer which achieves a single copy in
memory shared by an SPOSet and all of its clones.
Scalar estimator implementation
-------------------------------
Introduction: Life of a specialized OperatorBase
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Almost all observables in QMCPACK are implemented as specialized derived
classes of the OperatorBase base class. Each observable is instantiated
in HamiltonianFactory and added to QMCHamiltonian for tracking.
QMCHamiltonian tracks two types of observables: main and auxiliary. Main
observables contribute to the local energy. These observables are
elements of the simulated Hamiltonian such as kinetic or potential
energy. Auxiliary observables are expectation values of matrix elements
that do not contribute to the local energy. These Hamiltonians do not
affect the dynamics of the simulation. In the code, the main observables
are labeled by “physical” flag; the auxiliary observables have
“physical” set to false.
Almost all observables in QMCPACK are implemented as specialized derived classes of the OperatorBase base class. Each observable
is instantiated in HamiltonianFactory and added to QMCHamiltonian for tracking. QMCHamiltonian tracks two types of observables:
main and auxiliary. Main observables contribute to the local energy. These observables are elements of the simulated Hamiltonian
such as kinetic or potential energy. Auxiliary observables are expectation values of matrix elements that do not contribute to the
local energy. These Hamiltonians do not affect the dynamics of the simulation. In the code, the main observables are labeled by
“physical” flag; the auxiliary observables have“physical” set to false.
Initialization
^^^^^^^^^^^^^^

View File

@ -30,29 +30,15 @@ VTune API
~~~~~~~~~
If the variable ``USE_VTUNE_API`` is set, QMCPACK will check that the
include file (``ittnotify.h``) and the library (``libittnotify.a``) can
be found.
To provide CMake with the VTune paths, add the include path to ``CMAKE_CXX_FLAGS`` and the library path to ``CMAKE_LIBRARY_PATH``.
include file (``ittnotify.h``) and the library (``libittnotify.a``) can be found.
To provide CMake with the VTune search paths, add ``VTUNE_ROOT`` which contains ``include`` and ``lib64`` sub-directories.
An example of options to be passed to CMake:
::
-DCMAKE_CXX_FLAGS=-I/opt/intel/vtune_amplifier_xe/include \
-DCMAKE_LIBRARY_PATH=/opt/intel/vtune_amplifier_xe/lib64
NVIDIA Tools Extensions
-----------------------
NVIDIA's Tools Extensions (NVTX) API enables programmers to annotate their source code when used with the NVIDIA profilers.
NVTX API
~~~~~~~~
If the variable ``USE_NVTX_API`` is set, QMCPACK will add the library (``libnvToolsExt.so``) to the QMCPACK target. To add NVTX annotations
to a function, it is necessary to include the ``nvToolsExt.h`` header file and then make the appropriate calls into the NVTX API. For more information
about the NVTX API, see https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx. Any additional calls to the NVTX API should be guarded by
the ``USE_NVTX_API`` compiler define.
-DUSE_VTUNE_API=ON \
-DVTUNE_ROOT=/opt/intel/vtune_amplifier_xe
Timers as Tasks
~~~~~~~~~~~~~~~
@ -71,6 +57,19 @@ For the command line, set the ``enable-user-tasks`` knob to ``true``. For exampl
Collection with the timers set at "fine" can generate too much task data in the profile.
Collection with the timers at "medium" collects a more reasonable amount of task data.
NVIDIA Tools Extensions
-----------------------
NVIDIA's Tools Extensions (NVTX) API enables programmers to annotate their source code when used with the NVIDIA profilers.
NVTX API
~~~~~~~~
If the variable ``USE_NVTX_API`` is set, QMCPACK will add the library (``libnvToolsExt.so``) to the QMCPACK target. To add NVTX annotations
to a function, it is necessary to include the ``nvToolsExt.h`` header file and then make the appropriate calls into the NVTX API. For more information
about the NVTX API, see https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx. Any additional calls to the NVTX API should be guarded by
the ``USE_NVTX_API`` compiler define.
Scitools Understand
-------------------

View File

@ -43,6 +43,7 @@ User's Guide and Developer's Manual
external_tools
contributing
unit_testing
integration_tests
design_features
developing
appendices

View File

@ -0,0 +1,76 @@
.. _integration_tests:
Integration Tests
=================
Unlike unit tests requiring only a specific part of QMCPACK being built for testing, integration tests require the qmcpack
executable. In this category, tests are made based on realistic simulations although the amount of statistics collected depends on
sub-categories:
* Deterministic integration tests must be 100% reliable, quick to run, and always pass. They usually run one or a few walkers for
a very few steps in a few seconds. They are used to rapidly identify changes as part of the continuous integration testing, to
verify installations, and for development work.
* Short integration tests mostly run 16 walkers in a few hundred steps within a minutes. These are usually stochastic and should
pass with very high reliability.
* Long integration tests mostly run 16 walkers in a few thousand steps within 10 minutes. These are usually stochastic and should
pass with very high reliability.
To keep overall testing costs down, electron counts are usually kept small while still being large enough to comprehensively test
the code e.g. 3-10. The complete test set except for the long tests has to be able to be run on a laptop or modest workstation in
a reasonable amount of time.
Integration test organization
-----------------------------
Integration tests are placed under directories such as ``tests/heg``, ``tests/solids`` and ``tests/molecules`` from the top
directory and one sub-directory for each simulation system. Each test source directory contains input XML files, orbital h5 files,
pseudo-potential files and reference data (qmc_ref). These files may be shared by a few tests to minimize duplicated files. When
cmake is invoked in the build directory, one directory per test is created and necessary files correspond to a given test are
softlinked. It serves as a working directory when that test is being executed. To minimize the number file operation and make the
cmake execution fast, there is limitation on file names used by tests. The filenames are given below and implemented in the
COPY_DIRECTORY_USING_SYMLINK_LIMITED function in Cmake/macros.cmake.
::
qmc-ref/qmc_ref for reference data folder.
*.opt.xml/*.ncpp.xml/*.BFD.xml/*.ccECP.xml for pseudo-potential files.
*.py/*.sh for result checking helper scripts.
*.wfj.xml/*.wfnoj.xml/*.wfs.xml for standalone wavefunction input files.
*.structure.xml/*.ptcl.xml for standalone structure/particleset input files.
How to add a integration test
-----------------------------
#. Generate reference data using a very long (many blocks >=2000) and possibly wide run (many nodes). This reduces both the
error bar and the error bar of the error bar (10x samples than long test, 100x samples than short test). A folder named qmc-ref
containing input.xml, scalar.dat and output file is required with the commit. The number of blocks should be about 200 to avoid
large text files (a simple way to obtain these files is to repeat the reference run with 10x fewer blocks and 10x more steps).
#. Generate the short/long run input files. Use the reference error bar to appropriately estimate the error bar for the long and
short tests. These error bars are sqrt(10+1) and sqrt(100+1) times larger than the very long reference. 10x grade is not a hard
requirement but ref >= 10 long, long >= 10 short are required.
#. Short tests must be less than 20 sec VMC, 1 min OPT/DMC on a 16core Xeon processor. Long tests are preferably in the 5-10min
range. For systems containing more than just a few electrons submitting only a long test may be appropriate.
#. Deterministic tests require a different approach: use of a fixed seed value, and for example, 3 blocks of 2 steps and a single
walker. The intent of these tests is to exercise the code paths but keep the run short enough that the numerical deviations do
not build up. Different reference data may be needed for mixed precision vs full precision runs.
Suggested procedure to add a test
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#. Study some of the existing tests and their CMakeLists.txt configuration file to see what is required and the typical system
sizes and run lengths used.
#. Perform a run ~30s in length on a 16 core machine (200 blocks min) using the CPU version of the code with 16 MPI and 1 thread
per MPI. Decide if the resulting error bar is meaningful for a test. If so, short and long tests should be created. If not,
possibly only a long test is appropriate.
#. Perform a reference run by increasing steps and blocks by 10x each (2000 blocks) and obtain reference mean and error bars.
Long and short test error bars are then sqrt(100+1) and sqrt(10+1) of the reference.
#. Generate reference scalar data by redoing the reference run with 200 blocks and 100x steps. These data are should be committed
in a qmc-ref directory with the test.
#. Create short (1x blocks, 1x steps) and long (1x blocks, 10x steps) input files (200 blocks each). Make one set of input files
for CPU runs (walkers=1) and another for GPU runs (walkers=16).
#. Create CMakeLists.txt by following the example in other tests. CPU runs should include at least a 4 MPI, 4 thread test since
this tests OpenMP, MPI, and any possible interactions between them. A GPU test should have 1 MPI and 16 threads.
#. Create a README file with information describing the tests and the reference data.
#. Check that the tests run properly with ctest on your local machine.
#. Submit a pull request with the final tests.

View File

@ -222,6 +222,8 @@ attribute:
+-----------------------------+------------+--------------------------+---------+-------------------------------------------+
| ``source`` | Text | Any | Ion0 | Particle set with atomic positions. |
+-----------------------------+------------+--------------------------+---------+-------------------------------------------+
| ``skip_checks`` | Text | Yes/no | No | skips checks for ion information in h5 |
+-----------------------------+------------+--------------------------+---------+-------------------------------------------+
.. centered:: Table 3 Options for the ``determinantset`` xml-block associated with B-spline single particle orbital sets.
@ -272,6 +274,10 @@ Additional information:
access host memory via zero-copy. Although the performance penalty
introduced by it is significant, it allows large calculations to go
through.
- ``skip_checks``. When converting the wave function from convertpw4qmc instead
of pw2qmcpack, there is missing ionic information. This flag bypasses the requirement
that the ionic information in the eshdf.h5 file match the input xml.
.. _gaussianbasis:

View File

@ -36,6 +36,8 @@ Quantum Monte Carlo Methods
+----------------+--------------+--------------+-------------+---------------------------------+
| ``trace`` | text | | no | ??? |
+----------------+--------------+--------------+-------------+---------------------------------+
| ``profiling`` | text | yes/no | no | Activate resume/pause control |
+----------------+--------------+--------------+-------------+---------------------------------+
| ``checkpoint`` | integer | -1, 0, n | -1 | Checkpoint frequency |
+----------------+--------------+--------------+-------------+---------------------------------+
| ``record`` | integer | n | 0 | Save configuration ever n steps |
@ -59,6 +61,17 @@ Additional information:
computing device can be chosen by this switch. With a regular
CPU-only compilation, this option is not effective.
- ``profiling``: Performance profiling tools by default profile complete application executions.
This is largely unnecessary if the focus is a QMC section instead of any initialization
and additional QMC sections for equilibrating walkers.
Setting this flag to ``yes`` for the QMC sections of interest and starting the tool with
data collection paused from the beginning help reducing the profiling workflow
and amount of collected data. Additional restriction may be imposed by profiling tools.
For example, NVIDIA profilers can only be turned on and off once and thus only the first QMC
section with ``profiling="yes"`` will be profiled.
VTune instead allows pause and resume for unlimited times and thus multiple selected QMC sections
can be profiled in a single run.
- ``checkpoint``: This enables and disables checkpointing and
specifying the frequency of output. Possible values are:
@ -214,11 +227,11 @@ Additional information:
recompute) by default when not using mixed precision. Recomputing
introduces a performance penalty dependent on system size.
- ``spinMoves`` Determines whether or not the spin variables are sampled following
- ``spinMoves`` Determines whether or not the spin variables are sampled following
:cite:`Melton2016-1` and :cite:`Melton2016-2`. If a relativistic calculation is desired using pseudopotentials,
spin variable sampling is required.
- ``spinMass`` If spin sampling is on using ``spinMoves`` == yes, the spin mass determines the rate
- ``spinMass`` If spin sampling is on using ``spinMoves`` == yes, the spin mass determines the rate
of spin sampling, resulting in an effective spin timestep :math:`\tau_s = \frac{\tau}{\mu_s}`.
An example VMC section for a simple VMC run:
@ -765,7 +778,7 @@ Additional information and recommendations:
- For reporting quantities such as a final energy and associated uncertainty,
an average over many descent steps can be taken. The parameters for
``collection_step`` and ``compute_step`` help automate this task.
``collection_step`` and ``compute_step`` help automate this task.
After the descent iteration specified by ``collection_step``, a
history of local energy values will be kept for determining a final
error and average, which will be computed and given in the output

View File

@ -1 +1,4 @@
sphinxcontrib.bibtex
sphinx>=3.0
sphinx_rtd_theme
sphinxcontrib-bibtex<2.0

View File

@ -65,7 +65,7 @@ where we now utilize determinants of spinors, as opposed to the usual product of
</sposet_builder>
<determinantset>
<slaterdeterminant>
<determinant id="det" group="u" sposet="myspo" size="10"\>
<determinant id="det" group="u" sposet="myspo" size="10"/>
</slaterdeterminant>
</determinantset>
<jastrow type="One-Body" name="J1" function="bspline" source="ion0" print="yes">

View File

@ -2,6 +2,7 @@
#define CATCH_COMPLEX_APPROX
#include <complex>
#include <limits>
// Copy and modify the Approx class to handle complex numbers

View File

@ -3,6 +3,7 @@
#include <complex>
#include <cmath>
#include <limits>
// Copy and modify the ComplexApprox class to handle complex numbers for log(complex)

View File

@ -60,6 +60,7 @@ try:
qmcpack_input = import_nexus_module('qmcpack_input')
QmcpackInput = qmcpack_input.QmcpackInput
spindensity_xml = qmcpack_input.spindensity
spindensity_new_xml = qmcpack_input.spindensity_new # temporary
del qmcpack_input
except:
from generic import obj
@ -71,6 +72,7 @@ except:
from numerics import simplestats,simstats
from qmcpack_input import QmcpackInput
from qmcpack_input import spindensity as spindensity_xml
from qmcpack_input import spindensity_new as spindensity_new_xml # temporary
#end try
@ -901,29 +903,55 @@ class QMCDensityProcessor(QDBase):
structure = s
cell = s.axes.copy()
qi.pluralize()
est = qi.get('estimators')
if est is not None and len(est)==1 and 'LocalEnergy' in est:
ham = qi.get('hamiltonian')
# collect every bunch of estimators strewn throughout the input file
est_sources = []
ham = qi.get('hamiltonian')
if ham is not None:
if 'estimators' in ham:
est = ham.estimators
elif len(ham)==1:
est = ham.first().get('estimators')
est_sources.append(ham.estimators)
elif len(ham)>0:
ham = ham.first() # hamiltonian collection
if 'estimators' in ham:
est_sources.append(ham.estimators)
#end if
#end if
#end if
if est is not None:
opt.grids = obj()
calcs = qi.get('calculations')
if calcs is not None:
for series in sorted(calcs.keys()):
qmc = calcs[series]
if 'estimators' in qmc:
est_sources.append(qmc.estimators)
#end if
#end for
#end if
# collect input information from the first spin density instance with a particular name
# assume all other instances that share this name have identical inputs
grids = obj()
for est in est_sources:
for name,xml in est.items():
if isinstance(xml,spindensity_xml):
# the if statement below is a nasty hack to temporarily support
# the name mismatch between the input file and stat.h5 currently
# enforced by the (new) batched spin denisity class in qmcpack
if isinstance(xml,spindensity_new_xml):
name = 'SpinDensity' # override all user input, just like qmcpack
#end if
if name not in grids and isinstance(xml,(spindensity_xml,spindensity_new_xml)):
sd = xml
if 'grid' in sd:
opt.grids[name] = sd.grid
grids[name] = sd.grid
elif 'dr' in sd:
opt.grids[name] = get_grid(s,sd.dr)
grids[name] = get_grid(s,sd.dr)
else:
self.error('could not identify grid data for spin density named "{0}"\bin QMCPACK input file: {1}'.format(name,opt.input))
#end if
#end if
#end for
#end for
if len(grids)>0:
opt.grids = grids
else:
self.error('Could not find any spin density estimators in the input file provided.\nInput file provided: {}'.format(opt.input))
#end if
#end if

View File

@ -191,7 +191,7 @@ def process_scalar_files(scalar_files,equils=None,reblock_factors=None,series_st
error('must provide one equilibration length per scalar file\nnumber of equils provided: {0}\nnumber of scalar files provided: {1}\nequils provided: {2}\nscalar files provided: {3}'.format(len(equils),len(scalar_files),equils,scalar_files))
#end if
if isinstance(reblock_factors,int):
if isinstance(reblock_factors,(int,np.int_)):
reblock_factors = len(scalar_files)*[reblock_factors]
elif reblock_factors is not None and len(reblock_factors)!=len(scalar_files):
error('must provide one reblocking factor per scalar file\nnumber of reblock_factors provided: {0}\nnumber of scalar files provided: {1}\nreblock_factors provided: {2}\nscalar files provided: {3}'.format(len(reblock_factors),len(scalar_files),reblock_factors,scalar_files))

View File

@ -2114,8 +2114,13 @@ class Cori(NerscMachine):
self.procs_per_node = 2
self.cores_per_node = 32
self.ram_per_node = 128
elif 'amd' in job.constraint:
self.nodes = 20
self.procs_per_node = 2
self.cores_per_node = 32
self.ram_per_node = 2048
else:
self.error('SLURM input "constraint" must contain either "knl" or "haswell"\nyou provided: {0}'.format(job.constraint))
self.error('SLURM input "constraint" must contain either "knl", "haswell", or "amd" on Cori\nyou provided: {0}'.format(job.constraint))
#end if
if job.core_spec is not None:
self.cores_per_node -= job.core_spec
@ -2128,8 +2133,10 @@ class Cori(NerscMachine):
hyperthreads = 4
elif 'haswell' in job.constraint:
hyperthreads = 2
elif 'amd' in job.constraint:
hyperthreads = 2
else:
self.error('SLURM input "constraint" must contain either "knl" or "haswell"\nyou provided: {0}'.format(job.constraint))
self.error('SLURM input "constraint" must contain either "knl", "haswell" or "amd" on Cori\nyou provided: {0}'.format(job.constraint))
#end if
cpus_per_task = int(floor(float(self.cores_per_node)/job.processes_per_node))*hyperthreads
c='#!/bin/bash\n'

View File

@ -2259,7 +2259,9 @@ def generate_scf_input(prefix = 'pwscf',
use_folded = True,
group_atoms = False,
la2F = None,
nbnd = None
nbnd = None,
lspinorb = False,
noncolin = False,
):
if pseudos is None:
pseudos = []
@ -2328,6 +2330,12 @@ def generate_scf_input(prefix = 'pwscf',
pseudopotentials = pseudopotentials
)
if noncolin or lspinorb:
pw.system.set(
noncolin = noncolin or lspinorb,
lspinorb = lspinorb
)
#end if
if input_dft!=None:
pw.system.input_dft = input_dft
#end if
@ -2384,7 +2392,7 @@ def generate_scf_input(prefix = 'pwscf',
if not isinstance(start_mag,(dict,obj)):
PwscfInput.class_error('input start_mag must be of type dict or obj')
#end if
pw.system.start_mag = deepcopy(start_mag)
pw.system.starting_magnetization = deepcopy(start_mag)
#if 'tot_magnetization' in pw.system:
# del pw.system.tot_magnetization
##end if
@ -2582,7 +2590,7 @@ def generate_relax_input(prefix = 'pwscf',
if not isinstance(start_mag,(dict,obj)):
PwscfInput.class_error('input start_mag must be of type dict or obj')
#end if
pw.system.start_mag = deepcopy(start_mag)
pw.system.starting_magnetization = deepcopy(start_mag)
#if 'tot_magnetization' in pw.system:
# del pw.system.tot_magnetization
##end if
@ -2648,6 +2656,7 @@ def generate_relax_input(prefix = 'pwscf',
def generate_vcrelax_input(
press = None, # None = use pw.x default
cell_factor = None,
cell_dofree = None,
forc_conv_thr = None,
ion_dynamics = None,
press_conv_thr = None,
@ -2675,6 +2684,9 @@ def generate_vcrelax_input(
if press_conv_thr is not None:
pw.cell.set(press_conv_thr=press_conv_thr)
# end if
if cell_dofree is not None:
pw.cell.set(cell_dofree=cell_dofree)
# end if
return pw
# end def

View File

@ -2223,6 +2223,14 @@ class spindensity(QIxml):
identifier = 'name'
#end class spindensity
class spindensity_new(QIxml): # temporary
tag = 'estimator'
attributes = ['type','name','report','save_memory']
parameters = ['dr','grid','cell','center','corner','voronoi','test_moves']
write_types = obj(report=yesno,save_memory=yesno)
identifier = 'name'
#end class spindensity_new
class structurefactor(QIxml):
tag = 'estimator'
attributes = ['type','name','report']
@ -2323,6 +2331,7 @@ estimator = QIxmlFactory(
nearestneighbors = nearestneighbors,
dm1b = dm1b,
spindensity = spindensity,
spindensity_new = spindensity_new, # temporary
structurefactor = structurefactor,
force = force,
forwardwalking = forwardwalking,
@ -2520,6 +2529,15 @@ class rmc(QIxml):
write_types = obj(collect=yesno)
#end class rmc
class vmc_batch(QIxml):
collection_id = 'qmc'
tag = 'qmc'
attributes = ['method','move']
elements = ['estimator']
parameters = ['walkers','warmupsteps','blocks','steps','substeps','timestep','usedrift']
write_types = obj(usedrift=yesno)
#end class vmc_batch
class wftest(QIxml):
collection_id = 'qmc'
tag = 'qmc'
@ -2539,7 +2557,7 @@ class setparams(QIxml):
qmc = QIxmlFactory(
name = 'qmc',
types = dict(linear=linear,cslinear=cslinear,vmc=vmc,dmc=dmc,loop=loop,optimize=optimize_qmc,wftest=wftest,rmc=rmc,setparams=setparams),
types = dict(linear=linear,cslinear=cslinear,vmc=vmc,dmc=dmc,loop=loop,optimize=optimize_qmc,wftest=wftest,rmc=rmc,setparams=setparams,vmc_batch=vmc_batch),
typekey = 'method',
default = 'loop'
)
@ -2596,10 +2614,11 @@ classes = [ #standard classes
localenergy,energydensity,spacegrid,origin,axis,wavefunction,
determinantset,slaterdeterminant,basisset,grid,determinant,occupation,
jastrow1,jastrow2,jastrow3,
correlation,coefficients,loop,linear,cslinear,vmc,dmc,
correlation,coefficients,loop,linear,cslinear,vmc,dmc,vmc_batch,
atomicbasisset,basisgroup,init,var,traces,scalar_traces,particle_traces,array_traces,
reference_points,nearestneighbors,neighbor_trace,dm1b,
coefficient,radfunc,spindensity,structurefactor,
spindensity_new, # temporary
sposet,bspline_builder,composite_builder,heg_builder,include,
multideterminant,detlist,ci,mcwalkerset,csf,det,
optimize,cg_optimizer,flex_optimizer,optimize_qmc,wftest,kspace_jastrow,
@ -2816,6 +2835,9 @@ pressure.defaults.set(
momentum.defaults.set(
type='momentum'
)
spindensity_new.defaults.set( # temporary
type='spindensity_new',name='SpinDensityNew'
)
linear.defaults.set(
@ -2912,6 +2934,9 @@ dmc.defaults.set(
#nonlocalmoves = True,
#estimators = classcollection(localenergy)
)
vmc_batch.defaults.set(
method='vmc_batch',move='pbyp',
)

View File

@ -253,7 +253,7 @@ class WavefunctionAnalyzer(PropertyAnalyzer):
else:
rcut = rcut_cell
#end if
coeff = corr.coefficients.coeff
coeff = corr.coefficients.coeff.flatten()
jastrows[jname][jn] = Jastrow1B(func,coeff,rcut)
#end for
#end if
@ -270,7 +270,7 @@ class WavefunctionAnalyzer(PropertyAnalyzer):
#end if
s1 = corr.speciesa
s2 = corr.speciesb
coeff = corr.coefficients.coeff
coeff = corr.coefficients.coeff.flatten()
jastrows[jname][jn] = Jastrow2B(func,coeff,s1,s2,rcut)
#end for
#end if

View File

@ -38,7 +38,7 @@
namespace ma
{
inline static void gesvd(char jobu,
inline void gesvd(char jobu,
char jobvt,
int m,
int n,
@ -57,7 +57,7 @@ inline static void gesvd(char jobu,
//sgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info);
}
inline static void gesvd(char jobu,
inline void gesvd(char jobu,
char jobvt,
int m,
int n,
@ -76,7 +76,7 @@ inline static void gesvd(char jobu,
//dgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info);
}
inline static void gesvd(char jobu,
inline void gesvd(char jobu,
char jobvt,
int m,
int n,
@ -96,7 +96,7 @@ inline static void gesvd(char jobu,
//cgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, info);
}
inline static void gesvd(char jobu,
inline void gesvd(char jobu,
char jobvt,
int m,
int n,
@ -117,7 +117,7 @@ inline static void gesvd(char jobu,
}
template<typename T>
inline static void gesvd_bufferSize(const int m, const int n, T* a, int& lwork)
inline void gesvd_bufferSize(const int m, const int n, T* a, int& lwork)
{
T work, S;
int status;
@ -127,7 +127,7 @@ inline static void gesvd_bufferSize(const int m, const int n, T* a, int& lwork)
}
template<typename T>
inline static void gesvd_bufferSize(const int m, const int n, std::complex<T>* a, int& lwork)
inline void gesvd_bufferSize(const int m, const int n, std::complex<T>* a, int& lwork)
{
std::complex<T> work;
T rwork;
@ -137,7 +137,7 @@ inline static void gesvd_bufferSize(const int m, const int n, std::complex<T>* a
lwork = int(real(work));
}
inline static void geev(char* jobvl,
inline void geev(char* jobvl,
char* jobvr,
int* n,
double* a,
@ -155,7 +155,7 @@ inline static void geev(char* jobvl,
dgeev(jobvl, jobvr, n, a, lda, alphar, alphai, vl, ldvl, vr, ldvr, work, lwork, info);
}
inline static void geev(char* jobvl,
inline void geev(char* jobvl,
char* jobvr,
int* n,
float* a,
@ -173,7 +173,7 @@ inline static void geev(char* jobvl,
sgeev(jobvl, jobvr, n, a, lda, alphar, alphai, vl, ldvl, vr, ldvr, work, lwork, info);
}
inline static void ggev(char* jobvl,
inline void ggev(char* jobvl,
char* jobvr,
int* n,
double* a,
@ -194,7 +194,7 @@ inline static void ggev(char* jobvl,
dggev(jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai, beta, vl, ldvl, vr, ldvr, work, lwork, info);
}
inline static void ggev(char* jobvl,
inline void ggev(char* jobvl,
char* jobvr,
int* n,
float* a,
@ -215,7 +215,7 @@ inline static void ggev(char* jobvl,
sggev(jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai, beta, vl, ldvl, vr, ldvr, work, lwork, info);
}
inline static void hevr(char& JOBZ,
inline void hevr(char& JOBZ,
char& RANGE,
char& UPLO,
int& N,
@ -256,7 +256,7 @@ inline static void hevr(char& JOBZ,
}
}
inline static void hevr(char& JOBZ,
inline void hevr(char& JOBZ,
char& RANGE,
char& UPLO,
int& N,
@ -297,7 +297,7 @@ inline static void hevr(char& JOBZ,
}
}
inline static void hevr(char& JOBZ,
inline void hevr(char& JOBZ,
char& RANGE,
char& UPLO,
int& N,
@ -338,7 +338,7 @@ inline static void hevr(char& JOBZ,
}
}
inline static void hevr(char& JOBZ,
inline void hevr(char& JOBZ,
char& RANGE,
char& UPLO,
int& N,
@ -379,7 +379,7 @@ inline static void hevr(char& JOBZ,
}
}
inline static void gvx(int ITYPE,
inline void gvx(int ITYPE,
char& JOBZ,
char& RANGE,
char& UPLO,
@ -417,7 +417,7 @@ inline static void gvx(int ITYPE,
}
}
inline static void gvx(int ITYPE,
inline void gvx(int ITYPE,
char& JOBZ,
char& RANGE,
char& UPLO,
@ -455,7 +455,7 @@ inline static void gvx(int ITYPE,
}
}
inline static void gvx(int ITYPE,
inline void gvx(int ITYPE,
char& JOBZ,
char& RANGE,
char& UPLO,
@ -493,7 +493,7 @@ inline static void gvx(int ITYPE,
}
}
inline static void gvx(int ITYPE,
inline void gvx(int ITYPE,
char& JOBZ,
char& RANGE,
char& UPLO,
@ -531,28 +531,28 @@ inline static void gvx(int ITYPE,
}
}
inline static void getrf_bufferSize(const int n, const int m, float* a, const int lda, int& lwork) { lwork = 0; }
inline static void getrf_bufferSize(const int n, const int m, double* a, const int lda, int& lwork) { lwork = 0; }
inline static void getrf_bufferSize(const int n, const int m, std::complex<float>* a, const int lda, int& lwork)
inline void getrf_bufferSize(const int n, const int m, float* a, const int lda, int& lwork) { lwork = 0; }
inline void getrf_bufferSize(const int n, const int m, double* a, const int lda, int& lwork) { lwork = 0; }
inline void getrf_bufferSize(const int n, const int m, std::complex<float>* a, const int lda, int& lwork)
{
lwork = 0;
}
inline static void getrf_bufferSize(const int n, const int m, std::complex<double>* a, const int lda, int& lwork)
inline void getrf_bufferSize(const int n, const int m, std::complex<double>* a, const int lda, int& lwork)
{
lwork = 0;
}
void static getrf(const int& n, const int& m, double* a, const int& n0, int* piv, int& st, double* work = nullptr)
inline void getrf(const int& n, const int& m, double* a, const int& n0, int* piv, int& st, double* work = nullptr)
{
dgetrf(n, m, a, n0, piv, st);
}
void static getrf(const int& n, const int& m, float* a, const int& n0, int* piv, int& st, float* work = nullptr)
inline void getrf(const int& n, const int& m, float* a, const int& n0, int* piv, int& st, float* work = nullptr)
{
sgetrf(n, m, a, n0, piv, st);
}
void static getrf(const int& n,
inline void getrf(const int& n,
const int& m,
std::complex<double>* a,
const int& n0,
@ -563,7 +563,7 @@ void static getrf(const int& n,
zgetrf(n, m, a, n0, piv, st);
}
void static getrf(const int& n,
inline void getrf(const int& n,
const int& m,
std::complex<float>* a,
const int& n0,
@ -574,31 +574,31 @@ void static getrf(const int& n,
cgetrf(n, m, a, n0, piv, st);
}
inline static void getrfBatched(const int n, float** a, int lda, int* piv, int* info, int batchSize)
inline void getrfBatched(const int n, float** a, int lda, int* piv, int* info, int batchSize)
{
for (int i = 0; i < batchSize; i++)
getrf(n, n, a[i], lda, piv + i * n, *(info + i));
}
inline static void getrfBatched(const int n, double** a, int lda, int* piv, int* info, int batchSize)
inline void getrfBatched(const int n, double** a, int lda, int* piv, int* info, int batchSize)
{
for (int i = 0; i < batchSize; i++)
getrf(n, n, a[i], lda, piv + i * n, *(info + i));
}
inline static void getrfBatched(const int n, std::complex<double>** a, int lda, int* piv, int* info, int batchSize)
inline void getrfBatched(const int n, std::complex<double>** a, int lda, int* piv, int* info, int batchSize)
{
for (int i = 0; i < batchSize; i++)
getrf(n, n, a[i], lda, piv + i * n, *(info + i));
}
inline static void getrfBatched(const int n, std::complex<float>** a, int lda, int* piv, int* info, int batchSize)
inline void getrfBatched(const int n, std::complex<float>** a, int lda, int* piv, int* info, int batchSize)
{
for (int i = 0; i < batchSize; i++)
getrf(n, n, a[i], lda, piv + i * n, *(info + i));
}
inline static void getri_bufferSize(int n, float const* a, int lda, int& lwork)
inline void getri_bufferSize(int n, float const* a, int lda, int& lwork)
{
float work;
int status;
@ -607,7 +607,7 @@ inline static void getri_bufferSize(int n, float const* a, int lda, int& lwork)
lwork = int(work);
}
inline static void getri_bufferSize(int n, double const* a, int lda, int& lwork)
inline void getri_bufferSize(int n, double const* a, int lda, int& lwork)
{
double work;
int status;
@ -616,7 +616,7 @@ inline static void getri_bufferSize(int n, double const* a, int lda, int& lwork)
lwork = int(work);
}
inline static void getri_bufferSize(int n, std::complex<float> const* a, int lda, int& lwork)
inline void getri_bufferSize(int n, std::complex<float> const* a, int lda, int& lwork)
{
std::complex<float> work;
int status;
@ -625,7 +625,7 @@ inline static void getri_bufferSize(int n, std::complex<float> const* a, int lda
lwork = int(real(work));
}
inline static void getri_bufferSize(int n, std::complex<double> const* a, int lda, int& lwork)
inline void getri_bufferSize(int n, std::complex<double> const* a, int lda, int& lwork)
{
std::complex<double> work;
int status;
@ -635,12 +635,12 @@ inline static void getri_bufferSize(int n, std::complex<double> const* a, int ld
}
void static getri(int n, float* restrict a, int n0, int const* restrict piv, float* restrict work, int n1, int& status)
inline void getri(int n, float* restrict a, int n0, int const* restrict piv, float* restrict work, int n1, int& status)
{
sgetri(n, a, n0, piv, work, n1, status);
}
void static getri(int n,
inline void getri(int n,
double* restrict a,
int n0,
int const* restrict piv,
@ -651,7 +651,7 @@ void static getri(int n,
dgetri(n, a, n0, piv, work, n1, status);
}
void static getri(int n,
inline void getri(int n,
std::complex<float>* restrict a,
int n0,
int const* restrict piv,
@ -662,7 +662,7 @@ void static getri(int n,
cgetri(n, a, n0, piv, work, n1, status);
}
void static getri(int n,
inline void getri(int n,
std::complex<double>* restrict a,
int n0,
int const* restrict piv,
@ -674,7 +674,7 @@ void static getri(int n,
}
template<typename T>
inline static void getriBatched(int n, T** a, int lda, int* piv, T** ainv, int ldc, int* info, int batchSize)
inline void getriBatched(int n, T** a, int lda, int* piv, T** ainv, int ldc, int* info, int batchSize)
{
for (int i = 0; i < batchSize; i++)
{
@ -684,7 +684,7 @@ inline static void getriBatched(int n, T** a, int lda, int* piv, T** ainv, int l
}
}
void static geqrf(int M,
inline void geqrf(int M,
int N,
std::complex<double>* A,
const int LDA,
@ -701,7 +701,7 @@ void static geqrf(int M,
#endif
}
void static geqrf(int M, int N, double* A, const int LDA, double* TAU, double* WORK, int LWORK, int& INFO)
inline void geqrf(int M, int N, double* A, const int LDA, double* TAU, double* WORK, int LWORK, int& INFO)
{
#ifdef __NO_QR__
INFO = 0;
@ -711,7 +711,7 @@ void static geqrf(int M, int N, double* A, const int LDA, double* TAU, double* W
#endif
}
void static geqrf(int M,
inline void geqrf(int M,
int N,
std::complex<float>* A,
const int LDA,
@ -728,7 +728,7 @@ void static geqrf(int M,
#endif
}
void static geqrf(int M, int N, float* A, const int LDA, float* TAU, float* WORK, int LWORK, int& INFO)
inline void geqrf(int M, int N, float* A, const int LDA, float* TAU, float* WORK, int LWORK, int& INFO)
{
#ifdef __NO_QR__
INFO = 0;
@ -739,7 +739,7 @@ void static geqrf(int M, int N, float* A, const int LDA, float* TAU, float* WORK
}
template<typename T>
static void geqrf_bufferSize(int m, int n, T* a, int lda, int& lwork)
inline void geqrf_bufferSize(int m, int n, T* a, int lda, int& lwork)
{
typename std::decay<T>::type work;
int status;
@ -748,7 +748,7 @@ static void geqrf_bufferSize(int m, int n, T* a, int lda, int& lwork)
lwork = int(real(work));
}
void static gelqf(int M,
inline void gelqf(int M,
int N,
std::complex<double>* A,
const int LDA,
@ -765,7 +765,7 @@ void static gelqf(int M,
#endif
}
void static gelqf(int M, int N, double* A, const int LDA, double* TAU, double* WORK, int LWORK, int& INFO)
inline void gelqf(int M, int N, double* A, const int LDA, double* TAU, double* WORK, int LWORK, int& INFO)
{
#ifdef __NO_QR__
INFO = 0;
@ -775,7 +775,7 @@ void static gelqf(int M, int N, double* A, const int LDA, double* TAU, double* W
#endif
}
void static gelqf(int M,
inline void gelqf(int M,
int N,
std::complex<float>* A,
const int LDA,
@ -792,7 +792,7 @@ void static gelqf(int M,
#endif
}
void static gelqf(int M, int N, float* A, const int LDA, float* TAU, float* WORK, int LWORK, int& INFO)
inline void gelqf(int M, int N, float* A, const int LDA, float* TAU, float* WORK, int LWORK, int& INFO)
{
#ifdef __NO_QR__
INFO = 0;
@ -803,7 +803,7 @@ void static gelqf(int M, int N, float* A, const int LDA, float* TAU, float* WORK
}
template<typename T>
static void gelqf_bufferSize(int m, int n, T* a, int lda, int& lwork)
inline void gelqf_bufferSize(int m, int n, T* a, int lda, int& lwork)
{
typename std::decay<T>::type work;
int status;
@ -812,7 +812,7 @@ static void gelqf_bufferSize(int m, int n, T* a, int lda, int& lwork)
lwork = int(real(work));
}
void static gqr(int M,
inline void gqr(int M,
int N,
int K,
std::complex<double>* A,
@ -830,7 +830,7 @@ void static gqr(int M,
#endif
}
void static gqr(int M, int N, int K, double* A, const int LDA, double* TAU, double* WORK, int LWORK, int& INFO)
inline void gqr(int M, int N, int K, double* A, const int LDA, double* TAU, double* WORK, int LWORK, int& INFO)
{
#ifdef __NO_QR__
INFO = 0;
@ -840,7 +840,7 @@ void static gqr(int M, int N, int K, double* A, const int LDA, double* TAU, doub
#endif
}
void static gqr(int M,
inline void gqr(int M,
int N,
int K,
std::complex<float>* A,
@ -858,7 +858,7 @@ void static gqr(int M,
#endif
}
void static gqr(int M, int N, int K, float* A, const int LDA, float* TAU, float* WORK, int LWORK, int& INFO)
inline void gqr(int M, int N, int K, float* A, const int LDA, float* TAU, float* WORK, int LWORK, int& INFO)
{
#ifdef __NO_QR__
INFO = 0;
@ -869,7 +869,7 @@ void static gqr(int M, int N, int K, float* A, const int LDA, float* TAU, float*
}
template<typename T>
static void gqr_bufferSize(int m, int n, int k, T* a, int lda, int& lwork)
inline void gqr_bufferSize(int m, int n, int k, T* a, int lda, int& lwork)
{
typename std::decay<T>::type work;
int status;
@ -878,7 +878,7 @@ static void gqr_bufferSize(int m, int n, int k, T* a, int lda, int& lwork)
lwork = int(real(work));
}
void static glq(int M,
inline void glq(int M,
int N,
int K,
std::complex<double>* A,
@ -896,7 +896,7 @@ void static glq(int M,
#endif
}
void static glq(int M, int N, int K, double* A, const int LDA, double* TAU, double* WORK, int LWORK, int& INFO)
inline void glq(int M, int N, int K, double* A, const int LDA, double* TAU, double* WORK, int LWORK, int& INFO)
{
#ifdef __NO_QR__
INFO = 0;
@ -906,7 +906,7 @@ void static glq(int M, int N, int K, double* A, const int LDA, double* TAU, doub
#endif
}
void static glq(int M,
inline void glq(int M,
int N,
int K,
std::complex<float>* A,
@ -924,7 +924,7 @@ void static glq(int M,
#endif
}
void static glq(int M, int N, int K, float* A, const int LDA, float* TAU, float* WORK, int const LWORK, int& INFO)
inline void glq(int M, int N, int K, float* A, const int LDA, float* TAU, float* WORK, int const LWORK, int& INFO)
{
#ifdef __NO_QR__
INFO = 0;
@ -935,7 +935,7 @@ void static glq(int M, int N, int K, float* A, const int LDA, float* TAU, float*
}
template<typename T>
static void glq_bufferSize(int m, int n, int k, T* a, int lda, int& lwork)
inline void glq_bufferSize(int m, int n, int k, T* a, int lda, int& lwork)
{
typename std::decay<T>::type work;
int status;
@ -945,7 +945,7 @@ static void glq_bufferSize(int m, int n, int k, T* a, int lda, int& lwork)
}
template<typename T1, typename T2>
inline static void matinvBatched(int n, T1* const* a, int lda, T2** ainv, int lda_inv, int* info, int batchSize)
inline void matinvBatched(int n, T1* const* a, int lda, T2** ainv, int lda_inv, int* info, int batchSize)
{
std::vector<int> piv(n);
int lwork(-1);

View File

@ -484,7 +484,7 @@ void halfRotateCholeskyMatrix(WALKER_TYPES type,
APP_ABORT(" Error: kN > NMO in halfRotateCholeskyMatrix. \n");
// map from [0:2*NMO) to [0:NMO) in collinear case
int k0_alpha, k0_beta, kN_alpha = 0, kN_beta = 0;
int k0_alpha = 0, k0_beta = 0, kN_alpha = 0, kN_beta = 0;
if (k0 >= 0)
{
k0_alpha = std::min(k0, NMO);
@ -495,10 +495,6 @@ void halfRotateCholeskyMatrix(WALKER_TYPES type,
kN_beta = std::max(kN, NMO) - NMO;
}
}
else
{
k0_alpha = k0_beta = kN_alpha = kN_beta = 0;
}
int ak0, ak1;
int Qdim = NAEA * (kN_alpha - k0_alpha) + NAEB * (kN_beta - k0_beta);

View File

@ -54,7 +54,7 @@ void ParallelExecutor<Executor::OPENMP>::operator()(int num_tasks, F&& f, Args&&
++nested_throw_count;
else
{
app_warning() << re.what();
app_error() << re.what() << std::flush;
++throw_count;
}
}

View File

@ -48,8 +48,7 @@ public:
if (n)
{
resize_impl(n);
if (allocator_traits<Alloc>::is_host_accessible)
std::fill_n(X, n, val);
allocator_traits<Alloc>::fill_n(X, n, val);
}
}
@ -64,6 +63,8 @@ public:
resize_impl(rhs.nLocal);
if (allocator_traits<Alloc>::is_host_accessible)
std::copy_n(rhs.data(), nLocal, X);
else
allocator_traits<Alloc>::fill_n(X, nLocal, T());
}
}
@ -76,6 +77,8 @@ public:
resize(rhs.nLocal);
if (allocator_traits<Alloc>::is_host_accessible)
std::copy_n(rhs.data(), nLocal, X);
else
allocator_traits<Alloc>::fill_n(X, nLocal, T());
return *this;
}
@ -101,15 +104,13 @@ public:
}
//! Destructor
virtual ~Vector()
{
free();
}
virtual ~Vector() { free(); }
// Attach to pre-allocated memory
inline void attachReference(T* ref, size_t n)
{
if (nAllocated) {
if (nAllocated)
{
free();
// std::cerr << "Allocated OhmmsVector attachReference called.\n" << std::endl;
// Nice idea but "default" constructed WFC elements in the batched driver make this a mess.
@ -126,28 +127,21 @@ public:
///resize
inline void resize(size_t n, Type_t val = Type_t())
{
static_assert(std::is_same<value_type, typename Alloc::value_type>::value, "Vector and Alloc data types must agree!");
static_assert(std::is_same<value_type, typename Alloc::value_type>::value,
"Vector and Alloc data types must agree!");
if (nLocal > nAllocated)
throw std::runtime_error("Resize not allowed on Vector constructed by initialized memory.");
if(allocator_traits<Alloc>::is_host_accessible)
if (n > nAllocated)
{
if (n > nAllocated)
{
resize_impl(n);
std::fill_n(X, n, val);
}
else
{
if (n > nLocal) std::fill_n(X + nLocal, n - nLocal, val);
nLocal = n;
}
resize_impl(n);
allocator_traits<Alloc>::fill_n(X, n, val);
}
else
{
if (n > nAllocated)
resize_impl(n);
else
nLocal = n;
if (n > nLocal)
allocator_traits<Alloc>::fill_n(X + nLocal, n - nLocal, val);
nLocal = n;
}
return;
}
@ -156,8 +150,8 @@ public:
inline void clear() { nLocal = 0; }
///zero
inline void zero() { std::fill_n(X, nAllocated, T()); }
inline void zero() { allocator_traits<Alloc>::fill_n(X, nAllocated, T()); }
///free
inline void free()
{

View File

@ -15,10 +15,13 @@ SET(QMCEST_SRC
CSEnergyEstimator.cpp
LocalEnergyEstimator.cpp
RMCLocalEnergyEstimator.cpp
SpinDensityInput.cpp
EstimatorManagerBase.cpp
EstimatorManagerNew.cpp
EstimatorManagerCrowd.cpp
CollectablesEstimator.cpp)
CollectablesEstimator.cpp
OperatorEstBase.cpp
SpinDensityNew.cpp)
####################################
# create libqmcestimators

View File

@ -31,17 +31,35 @@ EstimatorManagerCrowd::EstimatorManagerCrowd(EstimatorManagerNew& em)
for (int i = 0; i < em.Estimators.size(); i++)
scalar_estimators_.push_back(em.Estimators[i]->clone());
MainEstimator = scalar_estimators_[EstimatorMap[MainEstimatorName]];
if (em.Collectables)
Collectables = em.Collectables->clone();
for (UPtr<OperatorEstBase>& upeb : em.operator_ests_)
{
operator_ests_.emplace_back(upeb->clone());
}
}
void EstimatorManagerCrowd::accumulate(int global_walkers, RefVector<MCPWalker>& walkers, RefVector<ParticleSet>& psets)
{
block_weight_ += walkers.size();
//Don't normalize we only divide once after reduction.
//RealType norm = 1.0 / global_walkers;
int num_scalar_estimators = scalar_estimators_.size();
for (int i = 0; i < num_scalar_estimators; ++i)
scalar_estimators_[i]->accumulate(global_walkers, walkers, 1);
for (int i = 0; i < operator_ests_.size(); ++i)
operator_ests_[i]->accumulate(walkers, psets);
}
void EstimatorManagerCrowd::startBlock(int steps)
{
crowd_estimator_timer_.restart();
for (auto& uope : operator_ests_)
{
uope->startBlock(steps);
}
block_weight_ = 0.0;
}
void EstimatorManagerCrowd::stopBlock()
{
cpu_block_time_ = crowd_estimator_timer_.elapsed();

View File

@ -31,17 +31,18 @@ class MCWalkerConifugration;
class QMCHamiltonian;
class CollectablesEstimator;
/** Thread local portion of EstimatorManager
/** Thread local estimator container/accumulator
*
* Stepping away from the CloneManger + clones design which creates EstimatorManagers
* Which operate differently based on internal switches.
*
* see EstimatorManagerNew.h for full description of the new design.
*/
class EstimatorManagerCrowd : public EstimatorManagerInterface
{
public:
using MCPWalker = Walker<QMCTraits, PtclOnLatticeTraits>;
//enum { WEIGHT_INDEX=0, BLOCK_CPU_INDEX, ACCEPT_RATIO_INDEX, TOTAL_INDEX};
///name of the primary estimator name
std::string MainEstimatorName;
///the root file name
@ -50,11 +51,21 @@ public:
TinyVector<RealType, 4> RefEnergy;
///default constructor
EstimatorManagerCrowd() = delete;
///copy constructor
/** EstimatorManagerCrowd are always spawn of an EstimatorManagerNew
*
* This coupling should be removed.
*/
EstimatorManagerCrowd(EstimatorManagerNew& em);
///destructor
virtual ~EstimatorManagerCrowd(){};
/** Should be removed from the API
*
* This was necessary because the legacy code just clones EstimatorManager for
* each walker but only the first one still manages the others are
* just estimator containers.
*/
inline bool is_manager() const { return false; }
///return the number of ScalarEstimators
@ -87,17 +98,10 @@ public:
scalar_estimators_[i]->setNumberOfBlocks(blocks);
}
void accumulate(int global_walkers, RefVector<MCPWalker>& walkers, RefVector<ParticleSet>& psets)
{
block_weight_ += walkers.size();
//Don't normalize we only divide once after reduction.
//RealType norm = 1.0 / global_walkers;
int num_scalar_estimators = scalar_estimators_.size();
for (int i = 0; i < num_scalar_estimators; ++i)
scalar_estimators_[i]->accumulate(global_walkers, walkers, 1);
}
void accumulate(int global_walkers, RefVector<MCPWalker>& walkers, RefVector<ParticleSet>& psets);
RefVector<EstimatorType> get_scalar_estimators() { return convertPtrToRefVector(scalar_estimators_); }
RefVector<qmcplusplus::OperatorEstBase> get_operator_estimators() { return convertUPtrToRefVector(operator_ests_); }
RealType get_block_weight() const { return block_weight_; }
double get_cpu_block_time() const { return cpu_block_time_; }
@ -155,6 +159,7 @@ protected:
///estimators of simple scalars
std::vector<EstimatorType*> scalar_estimators_;
std::vector<std::unique_ptr<OperatorEstBase>> operator_ests_;
private:
// This is needed for "efficiency" measure
Timer crowd_estimator_timer_;

View File

@ -11,8 +11,10 @@
#include <functional>
#include <numeric>
#include <algorithm>
#include "EstimatorManagerNew.h"
#include "SpinDensityNew.h"
#include "QMCHamiltonians/QMCHamiltonian.h"
#include "Message/Communicate.h"
#include "Message/CommOperators.h"
@ -122,14 +124,12 @@ void EstimatorManagerNew::reset()
//It's essential that this one is first, other code assumes weightInd always == 0
weightInd = BlockProperties.add("BlockWeight");
assert(weightInd == 0);
cpuInd = BlockProperties.add("BlockCPU");
cpuInd = BlockProperties.add("BlockCPU");
acceptRatioInd = BlockProperties.add("AcceptRatio");
BlockAverages.clear(); //cleaup the records
for (int i = 0; i < Estimators.size(); i++)
Estimators[i]->add2Record(BlockAverages);
max4ascii += BlockAverages.size();
if (Collectables)
Collectables->add2Record(BlockAverages);
}
void EstimatorManagerNew::addHeader(std::ostream& o)
@ -198,8 +198,10 @@ void EstimatorManagerNew::start(int blocks, bool record)
h_file = H5Fcreate(fname.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
for (int i = 0; i < Estimators.size(); i++)
Estimators[i]->registerObservables(h5desc, h_file);
if (Collectables)
Collectables->registerObservables(h5desc, h_file);
for (auto& uope : operator_ests_)
{
uope->registerOperatorEstimator(h5desc, h_file);
}
}
}
@ -209,17 +211,21 @@ void EstimatorManagerNew::startBlock(int steps)
BlockWeight = 0.0;
}
void EstimatorManagerNew::stopBlock(unsigned long accept, unsigned long reject, RealType block_weight, double cpu_block_time)
void EstimatorManagerNew::stopBlock(unsigned long accept,
unsigned long reject,
RealType block_weight,
double cpu_block_time)
{
//take block averages and update properties per block
PropertyCache[weightInd] = block_weight;
PropertyCache[cpuInd] = cpu_block_time;
makeBlockAverages(accept, reject);
reduceOperatorEstimators();
}
/** Called at end of block in Unified Driver
*
* Seems broken if there is more than one ScalarEstimator per crowd.
*/
QMCTraits::FullPrecRealType EstimatorManagerNew::collectScalarEstimators(
const RefVector<ScalarEstimatorBase>& estimators)
@ -253,6 +259,29 @@ QMCTraits::FullPrecRealType EstimatorManagerNew::collectScalarEstimators(
return tot_weight;
}
/** Reduces OperatorEstimator data from Crowds to the managers OperatorEstimator data
*
* The it is a vector of each crowds vector of references to their OperatorEstimators.
* A particular OperatorEstimators reduction via a call to collect may be straight forward
* if the crowd context OperatorEstimator holds a copy of the estimator data structure
* or more complex if it just collects for instance a list of writes to locations
* in the data structure.
*/
void EstimatorManagerNew::collectOperatorEstimators(const std::vector<RefVector<OperatorEstBase>>& crowd_op_ests)
{
for (int iop = 0; iop < operator_ests_.size(); ++iop)
{
RefVector<OperatorEstBase> this_op_est_for_all_crowds;
for (int icrowd = 0; icrowd < crowd_op_ests.size(); ++icrowd)
{
this_op_est_for_all_crowds.emplace_back(crowd_op_ests[icrowd][iop]);
}
operator_ests_[iop]->collect(this_op_est_for_all_crowds);
}
}
// blocks don't close frequently enough that we should be sweating the mpi transfers at all.
// all this Cache stuff is premature optimization because someone wanted to be very fancy
void EstimatorManagerNew::makeBlockAverages(unsigned long accepts, unsigned long rejects)
@ -263,11 +292,13 @@ void EstimatorManagerNew::makeBlockAverages(unsigned long accepts, unsigned long
// a pack into and out of an fp type that can be assured to hold the integral type exactly
// IMHO they should not be primarily stored in a vector with magic indexes
std::vector<unsigned long> accepts_and_rejects(my_comm_->size() * 2, 0);
accepts_and_rejects[my_comm_->rank()] = accepts;
accepts_and_rejects[my_comm_->rank()] = accepts;
accepts_and_rejects[my_comm_->size() + my_comm_->rank()] = rejects;
my_comm_->allreduce(accepts_and_rejects);
unsigned long total_block_accept = std::accumulate(accepts_and_rejects.begin(), accepts_and_rejects.begin() + my_comm_->size(), 0);
unsigned long total_block_reject = std::accumulate(accepts_and_rejects.begin() + my_comm_->size(), accepts_and_rejects.begin() + my_comm_->size() * 2, 0);
unsigned long total_block_accept =
std::accumulate(accepts_and_rejects.begin(), accepts_and_rejects.begin() + my_comm_->size(), 0);
unsigned long total_block_reject = std::accumulate(accepts_and_rejects.begin() + my_comm_->size(),
accepts_and_rejects.begin() + my_comm_->size() * 2, 0);
//Transfer FullPrecisionRead data
int n1 = AverageCache.size();
@ -306,8 +337,9 @@ void EstimatorManagerNew::makeBlockAverages(unsigned long accepts, unsigned long
}
// now we put the correct accept ratio in
PropertyCache[acceptRatioInd] = static_cast<FullPrecRealType>(total_block_accept) / static_cast<FullPrecRealType>(total_block_accept + total_block_reject);
PropertyCache[acceptRatioInd] = static_cast<FullPrecRealType>(total_block_accept) /
static_cast<FullPrecRealType>(total_block_accept + total_block_reject);
//add the block average to summarize
energyAccumulator(AverageCache[0]);
varAccumulator(SquaredAverageCache[0]);
@ -327,6 +359,50 @@ void EstimatorManagerNew::makeBlockAverages(unsigned long accepts, unsigned long
RecordCount++;
}
void EstimatorManagerNew::reduceOperatorEstimators()
{
if (operator_ests_.size() > 0)
{
std::vector<size_t> operator_data_sizes(operator_ests_.size());
RefVector<OperatorEstBase> ref_op_ests = convertUPtrToRefVector(operator_ests_);
for (int iop = 0; iop < operator_data_sizes.size(); ++iop)
{
operator_data_sizes[iop] = operator_ests_[iop]->get_data()->size();
}
// 1 larger because we put the weight in to avoid dependence of the Scalar estimators being reduced firt.
size_t nops = *(std::max_element(operator_data_sizes.begin(), operator_data_sizes.end())) + 1;
std::vector<double> operator_send_buffer;
std::vector<double> operator_recv_buffer;
operator_send_buffer.reserve(nops);
operator_recv_buffer.reserve(nops);
for (int iop = 0; iop < operator_ests_.size(); ++iop)
{
auto& estimator = *operator_ests_[iop];
auto& data = estimator.get_data_ref();
size_t adjusted_size = data.size() + 1;
operator_send_buffer.resize(adjusted_size);
operator_recv_buffer.resize(adjusted_size);
auto cur = operator_send_buffer.begin();
std::copy_n(data.begin(), data.size(), cur);
operator_send_buffer[data.size()] = estimator.get_walkers_weight();
// This is necessary to use mpi3's C++ style reduce
#ifdef HAVE_MPI
my_comm_->comm.reduce_n(operator_send_buffer.begin(), adjusted_size, operator_recv_buffer.begin(), std::plus<>{},
0);
#else
operator_recv_buffer = operator_send_buffer;
#endif
if (my_comm_->rank() == 0)
{
std::copy_n(operator_recv_buffer.begin(), data.size(), data.begin());
size_t reduced_walker_weights = operator_recv_buffer[data.size()];
RealType invTotWgt = 1.0 / reduced_walker_weights;
operator_ests_[iop]->normalize(invTotWgt);
}
}
}
}
void EstimatorManagerNew::getApproximateEnergyVariance(RealType& e, RealType& var)
{
@ -356,7 +432,7 @@ EstimatorManagerNew::EstimatorType* EstimatorManagerNew::getEstimator(const std:
return Estimators[(*it).second];
}
bool EstimatorManagerNew::put(QMCHamiltonian& H, xmlNodePtr cur)
bool EstimatorManagerNew::put(QMCHamiltonian& H, const ParticleSet& pset, xmlNodePtr cur)
{
std::vector<std::string> extra;
cur = cur->children;
@ -394,6 +470,19 @@ bool EstimatorManagerNew::put(QMCHamiltonian& H, xmlNodePtr cur)
add(new CSEnergyEstimator(H, nPsi), MainEstimatorName);
app_log() << " Adding a default LocalEnergyEstimator for the MainEstimator " << std::endl;
}
else if (est_name == "SpinDensityNew")
{
SpinDensityInput spdi;
spdi.readXML(cur);
DataLocality dl = DataLocality::crowd;
if (spdi.get_save_memory())
dl = DataLocality::rank;
if (spdi.get_cell().explicitly_defined)
operator_ests_.emplace_back(std::make_unique<SpinDensityNew>(std::move(spdi), pset.mySpecies, dl));
else
operator_ests_.emplace_back(
std::make_unique<SpinDensityNew>(std::move(spdi), pset.Lattice, pset.mySpecies, dl));
}
else
extra.push_back(est_name);
}

View File

@ -12,12 +12,15 @@
#ifndef QMCPLUSPLUS_ESTIMATORMANAGERNEW_H
#define QMCPLUSPLUS_ESTIMATORMANAGERNEW_H
#include <memory>
#include "Configuration.h"
#include "Utilities/Timer.h"
#include "Utilities/PooledData.h"
#include "Message/Communicate.h"
#include "Estimators/ScalarEstimatorBase.h"
#include "Estimators/EstimatorManagerInterface.h"
#include "OperatorEstBase.h"
#include "Particle/Walker.h"
#include "OhmmsPETE/OhmmsVector.h"
#include "OhmmsData/HDFAttribIO.h"
@ -44,7 +47,7 @@ public:
using FullPrecRealType = QMCTraits::FullPrecRealType;
typedef ScalarEstimatorBase EstimatorType;
using FPRBuffer = std::vector<FullPrecRealType>;
using FPRBuffer = std::vector<FullPrecRealType>;
using MCPWalker = Walker<QMCTraits, PtclOnLatticeTraits>;
///name of the primary estimator name
@ -106,6 +109,16 @@ public:
*/
int add(EstimatorType* newestimator) { return add(newestimator, MainEstimatorName); }
/** add a "non" physical operator estimator
*
* this is a dratically reduced version of OperatorBase right now it just supports
* what the SpinDensityNew estimator needs
*
* What is actually important is that it has its own locality aware data and
* EstimatorManagerNew doesn't know about or manage that data.
*/
int addEstOperator(OperatorEstBase& op_est);
///return a pointer to the estimator aname
EstimatorType* getEstimator(const std::string& a);
@ -116,7 +129,7 @@ public:
inline RealType variance(int i) const { return Estimators[i]->variance(); }
///process xml tag associated with estimators
bool put(QMCHamiltonian& H, xmlNodePtr cur);
bool put(QMCHamiltonian& H, const ParticleSet& pset, xmlNodePtr cur);
/** reset the estimator
*/
@ -157,6 +170,9 @@ public:
*/
RealType collectScalarEstimators(const RefVector<ScalarEstimatorBase>& scalar_estimators);
void collectOperatorEstimators(const std::vector<RefVector<OperatorEstBase>>& op_ests);
/** get the average of per-block energy and variance of all the blocks
* Note: this is not weighted average. It can be the same as weighted average only when block weights are identical.
*/
@ -232,6 +248,14 @@ protected:
std::vector<EstimatorType*> Estimators;
///convenient descriptors for hdf5
std::vector<observable_helper*> h5desc;
/** OperatorEst Observables
*
* since the operator estimators are also a close set at compile time
* they could be treated just like the inputs.
* However the idea of a shared interface is much more straight forward for
* them.
*/
std::vector<std::unique_ptr<OperatorEstBase>> operator_ests_;
/////estimators of composite data
//CompositeEstimatorSet* CompEstimators;
///Timer
@ -244,6 +268,8 @@ private:
/// collect data and write
void makeBlockAverages(unsigned long accept, unsigned long reject);
void reduceOperatorEstimators();
///add header to an std::ostream
void addHeader(std::ostream& o);
size_t FieldWidth;

View File

@ -0,0 +1,54 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File refactored from: OperatorEstBase.cpp
//////////////////////////////////////////////////////////////////////////////////////
/**@file
*/
#include "Message/Communicate.h"
#include "OperatorEstBase.h"
#include "QMCHamiltonians/QMCHamiltonian.h"
namespace qmcplusplus
{
OperatorEstBase::OperatorEstBase(DataLocality dl) : data_locality_(dl), walkers_weight_(0) {}
OperatorEstBase::OperatorEstBase(const OperatorEstBase& oth) : data_locality_(oth.data_locality_), walkers_weight_(0) {}
// I suspect this can be a pure function outside of the class.
// In this case at least we don't care to copy the data_ as we are going to reduce these later and don't want
// to end up with a multiplicative factor if we already have data.
OperatorEstBase::Data OperatorEstBase::createLocalData(size_t size, DataLocality data_locality)
{
Data new_data;
new_data = std::make_unique<std::vector<QMCT::RealType>>(size, 0);
return new_data;
}
void OperatorEstBase::collect(const RefVector<OperatorEstBase>& type_erased_operator_estimators)
{
for (OperatorEstBase& crowd_oeb : type_erased_operator_estimators)
{
std::transform(data_->begin(), data_->end(), crowd_oeb.get_data()->begin(), data_->begin(), std::plus<>{});
// For debugging purposes
walkers_weight_ += crowd_oeb.walkers_weight_;
crowd_oeb.walkers_weight_ = 0;
}
std::cout << "spindens walkers weight: " << walkers_weight_ << '\n';
}
void OperatorEstBase::normalize(QMCT::RealType invTotWgt)
{
auto& data = *data_;
for (QMCT::RealType& elem : data)
elem *= invTotWgt;
walkers_weight_ = 0;
}
} // namespace qmcplusplus

View File

@ -0,0 +1,108 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File refactored from: OperatorBase.h
//////////////////////////////////////////////////////////////////////////////////////
/**@file
*/
#ifndef QMCPLUSPLUS_OPERATORESTBASE_H
#define QMCPLUSPLUS_OPERATORESTBASE_H
#include "Particle/ParticleSet.h"
#include "OhmmsData/RecordProperty.h"
#include "Utilities/RandomGenerator.h"
#include "QMCHamiltonians/observable_helper.h"
#include "QMCWaveFunctions/OrbitalSetTraits.h"
#include "type_traits/DataLocality.h"
#include <bitset>
namespace qmcplusplus
{
class DistanceTableData;
class TrialWaveFunction;
/** @ingroup Estimators
* @brief An abstract class for gridded estimators
*
*/
class OperatorEstBase
{
public:
using QMCT = QMCTraits;
using MCPWalker = Walker<QMCTraits, PtclOnLatticeTraits>;
/** the type in this variant changes based on data locality
*/
using Data = UPtr<std::vector<QMCT::RealType>>;
/// locality for accumulation data
DataLocality data_locality_;
///name of this object
std::string myName;
QMCT::FullPrecRealType get_walkers_weight() const { return walkers_weight_; }
///constructor
OperatorEstBase(DataLocality dl);
OperatorEstBase(const OperatorEstBase& oth);
///virtual destructor
virtual ~OperatorEstBase() = default;
/** Accumulate whatever it is you are accumulating with respect to walkers
*
* This method is assumed to be called from the crowd context
* It provides parallelism with respect to computational effort of the estimator
* without causing a global sync.
* Depending on data locality the accumlation of the result may be different from
* the single thread write directly into the OperatorEstimator data.
*/
virtual void accumulate(RefVector<MCPWalker>& walkers, RefVector<ParticleSet>& psets) = 0;
/** Reduce estimator result data from crowds to rank
*
* This is assumed to be called from only from one thread per crowds->rank
* reduction. Implied is this is during a global sync or there is a guarantee
* that the crowd operator estimators accumulation data is not being written to.
*
* There could be concurrent operations inside the scope of the collect call.
*/
virtual void collect(const RefVector<OperatorEstBase>& oebs);
virtual void normalize(QMCT::RealType invToWgt);
virtual void startBlock(int steps) = 0;
std::vector<QMCT::RealType>& get_data_ref() { return *data_; }
Data& get_data() { return data_; };
/*** add to OperatorEstimator descriptor for hdf5
* @param h5desc contains a set of hdf5 descriptors for a scalar observable
* @param gid hdf5 group to which the observables belong
*
* The default implementation does nothing. The derived classes which compute
* big data, e.g. density, should overwrite this function.
*/
virtual void registerOperatorEstimator(std::vector<observable_helper*>& h5desc, hid_t gid) const {}
virtual OperatorEstBase* clone() = 0;
QMCT::FullPrecRealType get_walkers_weight() { return walkers_weight_; }
protected:
QMCT::FullPrecRealType walkers_weight_;
/** data management
*/
static Data createLocalData(size_t size, DataLocality data_locality);
Data data_;
};
} // namespace qmcplusplus
#endif

View File

@ -0,0 +1,132 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// Some code refactored from: SpinDensity.cpp
//////////////////////////////////////////////////////////////////////////////////////
#include "SpinDensityInput.h"
#include <cmath>
#include "OhmmsData/AttributeSet.h"
#include "Message/UniformCommunicateError.h"
namespace qmcplusplus
{
void SpinDensityInput::readXML(xmlNodePtr cur)
{
std::string write_report;
std::string save_memory;
OhmmsAttributeSet attrib;
attrib.add(myName_, "name");
attrib.add(write_report, "report");
attrib.add(save_memory, "save_memory");
attrib.put(cur);
Tensor<Real, DIM> axes;
int test_moves = 0;
xmlNodePtr element = cur->xmlChildrenNode;
while (element != NULL)
{
std::string ename((const char*)element->name);
if (ename == "parameter")
{
const XMLAttrString name(element, "name");
if (name == "dr")
{
have_dr_ = true;
putContent(dr_, element);
}
else if (name == "grid")
{
have_grid_ = true;
putContent(grid_, element);
}
else if (name == "corner")
{
have_corner_ = true;
putContent(corner_, element);
}
else if (name == "center")
{
have_center_ = true;
putContent(center_, element);
}
else if (name == "cell")
{
have_cell_ = true;
putContent(axes, element);
}
else if (name == "test_moves")
putContent(test_moves, element);
}
element = element->next;
}
if (have_dr_ && have_grid_)
throw UniformCommunicateError("SpinDensity input dr and grid are provided, this is ambiguous");
else if (!have_dr_ && !have_grid_)
throw UniformCommunicateError("SpinDensity input must provide dr or grid");
if (have_corner_ && have_center_)
throw UniformCommunicateError("SpinDensity input corner and center are provided, this is ambiguous");
if (have_cell_)
{
cell_.set(axes);
if (!have_corner_ && !have_center_)
throw UniformCommunicateError("SpinDensity input must provide corner or center with explicitly defined cell");
}
if (write_report == "yes")
write_report_ = true;
else
write_report_ = false;
if (save_memory == "yes")
save_memory_ = true;
else
save_memory_ = false;
// weird legacy stuff
// if (write_report == "yes")
// report(" ");
// if (test_moves > 0)
// test(test_moves, *Ptmp);
}
SpinDensityInput::DerivedParameters SpinDensityInput::calculateDerivedParameters(Lattice& lattice)
{
PosType corner = 0.0;
if (have_center_)
corner = center_ - lattice.Center;
else if (have_corner_)
corner = corner_;
TinyVector<int, DIM> grid;
if (have_dr_)
for (int d = 0; d < DIM; ++d)
grid[d] = (int)std::ceil(std::sqrt(dot(lattice.Rv[d], lattice.Rv[d])) / dr_[d]);
else if (have_grid_)
grid = grid_;
size_t npoints = 1;
for (int d = 0; d < DIM; ++d)
npoints *= grid[d];
TinyVector<int, DIM> gdims;
gdims[0] = npoints / grid[0];
for (int d = 1; d < DIM; ++d)
gdims[d] = gdims[d - 1] / grid[d];
return {corner, grid, gdims, npoints};
}
} // namespace qmcplusplus

View File

@ -0,0 +1,91 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// Some code refactored from: SpinDensity.h
//////////////////////////////////////////////////////////////////////////////////////
#ifndef QMCPLUSPLUS_SPINDENSITYINPUT_H
#define QMCPLUSPLUS_SPINDENSITYINPUT_H
#include "Configuration.h"
#include "OhmmsData/ParameterSet.h"
#include "Containers/OhmmsPETE/TinyVector.h"
namespace qmcplusplus
{
/** Native representation for Spin Density Estimators inputs
*
* This class servers three purposes all related to properly handling
* and verifying the spin density input.
* 1. An immutable representation of actual user input
* 2. Parse the xml node of SpinDensityNew input.
* 3. Hold the logic of calculating derived parameters.
*
*/
class SpinDensityInput
{
public:
using Real = QMCTraits::RealType;
using POLT = PtclOnLatticeTraits;
using Lattice = POLT::ParticleLayout_t;
using PosType = QMCTraits::PosType;
static constexpr int DIM = QMCTraits::DIM;
public:
SpinDensityInput(){};
void readXML(xmlNodePtr cur);
Lattice get_cell() const { return cell_; }
PosType get_corner() const { return corner_; }
TinyVector<int, DIM> get_grid() const { return grid_; }
int get_npoints() const { return npoints_; }
bool get_write_report() const { return write_report_; }
bool get_save_memory() const { return save_memory_; }
struct DerivedParameters
{
PosType corner;
TinyVector<int, DIM> grid;
TinyVector<int, DIM> gdims;
size_t npoints;
};
/** Derived parameters of SpinDensity
*
* These require the cell the SpinDensity is evaluated over,
* the caller (SpinDensityNew) either gets this from the input and
* passes it back or passes in the cell from the relevant ParticleSet.
*
*/
DerivedParameters
calculateDerivedParameters(Lattice& lattice);
private:
///name of this Estimator
std::string myName_;
Lattice cell_;
PosType corner_;
PosType dr_;
PosType center_;
TinyVector<int, DIM> grid_;
int npoints_;
bool write_report_;
bool save_memory_;
/** these are necessary for calculateDerivedParameters
*
* If we are going to later write out a canonical input for
* this input then they are needed as well.
*/
bool have_dr_ = false;
bool have_grid_ = false;
bool have_center_ = false;
bool have_corner_ = false;
bool have_cell_ = false;
};
} // namespace qmcplusplus
#endif /* SPINDENSITYINPUT_H */

View File

@ -0,0 +1,236 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File refactored from: SpinDensity.cpp
//////////////////////////////////////////////////////////////////////////////////////
#include "SpinDensityNew.h"
#include <iostream>
#include <numeric>
namespace qmcplusplus
{
SpinDensityNew::SpinDensityNew(SpinDensityInput&& input, const SpeciesSet& species, DataLocality dl)
: OperatorEstBase(dl), input_(std::move(input)), species_(species)
{
myName = "SpinDensity";
data_locality_ = dl;
if (input_.get_cell().explicitly_defined == true)
lattice_ = input_.get_cell();
else
throw std::runtime_error("If SpinDensityInput does not contain a cell definition you must call the constructor "
"with an explicit lattice defined");
derived_parameters_ = input_.calculateDerivedParameters(lattice_);
species_size_ = getSpeciesSize(species_);
data_ = createLocalData(getFullDataSize(), data_locality_);
}
SpinDensityNew::SpinDensityNew(SpinDensityInput&& input,
const Lattice& lattice,
const SpeciesSet& species,
const DataLocality dl)
: OperatorEstBase(dl), input_(std::move(input)), species_(species), lattice_(lattice)
{
myName = "SpinDensity";
std::cout << "SpinDensity constructor called\n";
data_locality_ = dl;
if (input_.get_cell().explicitly_defined == true)
throw std::runtime_error(
"SpinDensityNew should not be constructed with both a cell in its input and an lattice input arguement.");
else if (lattice_.explicitly_defined == false)
throw std::runtime_error("SpinDensityNew cannot be constructed from a lattice that is not explicitly defined");
derived_parameters_ = input_.calculateDerivedParameters(lattice_);
species_size_ = getSpeciesSize(species_);
data_ = createLocalData(getFullDataSize(), data_locality_);
}
std::vector<int> SpinDensityNew::getSpeciesSize(SpeciesSet& species)
{
std::vector<int> species_size;
int index = species.findAttribute("membersize");
if (index < 0)
throw std::runtime_error("SpinDensity(P) Species set does not have the required attribute 'membersize'");
for (int s = 0; s < species.size(); ++s)
species_size.push_back(species(index, s));
return species_size;
}
size_t SpinDensityNew::getFullDataSize() { return species_.size() * derived_parameters_.npoints; }
OperatorEstBase* SpinDensityNew::clone()
{
std::cout << "SpinDensity clone called\n";
return new SpinDensityNew(*this);
}
SpinDensityNew::SpinDensityNew(const SpinDensityNew& sdn)
: OperatorEstBase(sdn),
input_(sdn.input_),
species_(sdn.species_),
lattice_(sdn.lattice_),
derived_parameters_(sdn.derived_parameters_),
species_size_(sdn.species_size_)
{
if (data_locality_ == DataLocality::crowd)
{
size_t data_size = sdn.data_->size();
data_ = createLocalData(data_size, data_locality_);
}
else if (data_locality_ == DataLocality::rank)
{
assert(sdn.data_locality_ == DataLocality::rank);
data_locality_ = DataLocality::queue;
// at construction we don't know what the data requirement is going to be
// since its steps per block dependent. so start with 10 steps worth.
int num_particles = std::accumulate(species_size_.begin(), species_size_.end(), 0);
size_t data_size = num_particles * 20;
data_ = createLocalData(data_size, data_locality_);
}
}
void SpinDensityNew::startBlock(int steps)
{
if (data_locality_ == DataLocality::rank)
{
int num_particles = std::accumulate(species_size_.begin(), species_size_.end(), 0);
size_t data_size = num_particles * steps * 2;
data_->reserve(data_size);
data_->resize(0);
}
}
/** Gets called every step and writes to thread local data.
*
* I tried for readable and not doing the optimizers job.
* The offsets into bare data are already bad enough.
*/
void SpinDensityNew::accumulate(RefVector<MCPWalker>& walkers, RefVector<ParticleSet>& psets)
{
auto& dp_ = derived_parameters_;
for (int iw = 0; iw < walkers.size(); ++iw)
{
MCPWalker& walker = walkers[iw];
ParticleSet& pset = psets[iw];
QMCT::RealType weight = walker.Weight;
// for testing
walkers_weight_ += weight;
int p = 0;
std::vector<QMCT::RealType>& data = *data_;
for (int s = 0; s < species_.size(); ++s)
for (int ps = 0; ps < species_size_[s]; ++ps, ++p)
{
QMCT::PosType u = lattice_.toUnit(pset.R[p] - dp_.corner);
size_t point = dp_.npoints * s;
for (int d = 0; d < QMCT::DIM; ++d)
point += dp_.gdims[d] * ((int)(dp_.grid[d] * (u[d] - std::floor(u[d])))); //periodic only
accumulateToData(point, weight);
}
}
};
void SpinDensityNew::accumulateToData(size_t point, QMCT::RealType weight)
{
if (data_locality_ == DataLocality::crowd)
{
(*data_)[point] += weight;
}
else if (data_locality_ == DataLocality::queue)
{
(*data_).push_back(point);
(*data_).push_back(weight);
}
else
{
throw std::runtime_error("You cannot accumulate to a SpinDensityNew with datalocality of this type");
}
}
void SpinDensityNew::collect(const RefVector<OperatorEstBase>& type_erased_operator_estimators)
{
if (data_locality_ == DataLocality::rank)
{
for (OperatorEstBase& crowd_oeb : type_erased_operator_estimators)
{
// This will throw a std::bad_cast in debug if the calling code hands the
// wrong type erased operator_estimator type into here.
// In release we don't want that overhead.
#ifndef NDEBUG
auto& oeb = dynamic_cast<SpinDensityNew&>(crowd_oeb);
#else
auto& oeb = static_cast<SpinDensityNew&>(crowd_oeb);
#endif
auto& data = oeb.get_data_ref();
for (int id = 0; id < data.size(); id += 2)
{
// This is a smell
size_t point{static_cast<size_t>(data[id])};
const QMCT::RealType weight{data[id + 1]};
(*data_)[point] += weight;
walkers_weight_ += weight;
}
oeb.walkers_weight_ = 0;
}
}
else if (data_locality_ == DataLocality::crowd)
{
OperatorEstBase::collect(type_erased_operator_estimators);
}
else
{
throw std::runtime_error("You cannot call collect on a SpinDensityNew with this DataLocality");
}
}
void SpinDensityNew::report(const std::string& pad)
{
auto& dp_ = derived_parameters_;
app_log() << pad << "SpinDensity report" << std::endl;
app_log() << pad << " dim = " << QMCT::DIM << std::endl;
app_log() << pad << " npoints = " << dp_.npoints << std::endl;
app_log() << pad << " grid = " << dp_.grid << std::endl;
app_log() << pad << " gdims = " << dp_.gdims << std::endl;
app_log() << pad << " corner = " << dp_.corner << std::endl;
app_log() << pad << " center = " << dp_.corner + lattice_.Center << std::endl;
app_log() << pad << " cell " << std::endl;
for (int d = 0; d < QMCT::DIM; ++d)
app_log() << pad << " " << d << " " << lattice_.Rv[d] << std::endl;
app_log() << pad << " end cell " << std::endl;
app_log() << pad << " nspecies = " << species_.size() << std::endl;
for (int s = 0; s < species_.size(); ++s)
app_log() << pad << " species[" << s << "]"
<< " = " << species_.speciesName[s] << " " << species_.attribName.size() << std::endl;
app_log() << pad << "end SpinDensity report" << std::endl;
}
void SpinDensityNew::registerOperatorEstimator(std::vector<observable_helper*>& h5desc, hid_t gid) const
{
hid_t sgid = H5Gcreate(gid, myName.c_str(), 0);
//vector<int> ng(DIM);
//for(int d=0;d<DIM;++d)
// ng[d] = grid[d];
std::vector<int> ng(1);
ng[0] = derived_parameters_.npoints;
for (int s = 0; s < species_.size(); ++s)
{
observable_helper* oh = new observable_helper(species_.speciesName[s]);
oh->set_dimensions(ng, 0);
oh->open(sgid);
h5desc.push_back(oh);
}
}
} // namespace qmcplusplus

View File

@ -0,0 +1,110 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File refactored from: SpinDensity.h
//////////////////////////////////////////////////////////////////////////////////////
#ifndef QMCPLUSPLUS_SPINDENSITYNEW_H
#define QMCPLUSPLUS_SPINDENSITYNEW_H
#include "SpinDensityInput.h"
#include <vector>
#include "Configuration.h"
#include "OperatorEstBase.h"
#include "Containers/OhmmsPETE/TinyVector.h"
#include "Utilities/SpeciesSet.h"
namespace qmcplusplus
{
/** Class that collects density per species of particle
*
* commonly used for spin up and down electrons
*
*/
class SpinDensityNew : public OperatorEstBase
{
public:
using POLT = PtclOnLatticeTraits;
using Lattice = POLT::ParticleLayout_t;
using QMCT = QMCTraits;
//data members
SpinDensityInput input_;
SpeciesSet species_;
/** @ingroup SpinDensity mutable parameters
*
* they should be limited to values that can be changed from input
* or are not present explicitly in the SpinDensityInput
* @{
*/
/// Lattice is always local since it is either in the input or a constructor argument.
Lattice lattice_;
SpinDensityInput::DerivedParameters derived_parameters_;
/**}@*/
// this is a bit of a mess to get from SpeciesSet
std::vector<int> species_size_;
/** Constructor for SpinDensityInput that contains an explicitly defined cell
*/
SpinDensityNew(SpinDensityInput&& sdi, const SpeciesSet& species, DataLocality dl = DataLocality::crowd);
/** Constructor for SpinDensityInput without explicitly defined cell
*
* the crystal lattice should come from the same particle set as the species set.
* in case you are tempted to just pass the ParticleSet don't. It clouds the data dependence of
* constructing the estimator and creates a strong coupling between the classes.
*
* Ideally when validating input is built up enough there would be only one constructor with
* signature
*
* SpinDensityNew(SpinDensityInput&& sdi, SpinDensityInput::DerivedParameters&& dev_par, SpeciesSet species, DataLocality dl);
*/
SpinDensityNew(SpinDensityInput&& sdi, const Lattice&, const SpeciesSet& species, const DataLocality dl = DataLocality::crowd);
SpinDensityNew(const SpinDensityNew& sdn);
void startBlock(int steps) override;
//standard interface
OperatorEstBase* clone() override;
void accumulate(RefVector<MCPWalker>& walkers, RefVector<ParticleSet>& psets) override;
/** These absolutely must be of this derived type
*/
void collect(const RefVector<OperatorEstBase>& operator_estimators) override;
/** this allows the EstimatorManagerNew to reduce without needing to know the details
* of SpinDensityNew's data.
*
* can use base class default until crowd level SpinDensity estimators don't have a copy of the density grid.
*/
//void collect(const OperatorEstBase& oeb);
/** this gets us into the hdf5 file
*
* Just parroting for now don't fully understand.
*, needs to be unraveled and simplified the hdf5 output is another
* big state big coupling design.
*/
void registerOperatorEstimator(std::vector<observable_helper*>& h5desc, hid_t gid) const override;
private:
static std::vector<int> getSpeciesSize(SpeciesSet& species);
/** derived_parameters_ must be valid i.e. initialized with call to input_.calculateDerivedParameters
*/
size_t getFullDataSize();
void accumulateToData(size_t point, QMCT::RealType weight);
void reset();
void report(const std::string& pad);
};
} // namespace qmcplusplus
#endif /* QMCPLUSPLUS_SPINDENSITYNEW_H */

View File

@ -8,8 +8,6 @@
#//
#// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
#//////////////////////////////////////////////////////////////////////////////////////
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${QMCPACK_UNIT_TEST_DIR})
@ -17,12 +15,12 @@ SET(SRC_DIR estimators)
SET(UTEST_EXE test_${SRC_DIR})
SET(UTEST_NAME deterministic-unit_test_${SRC_DIR})
SET(SRCS test_accumulator.cpp test_local_energy_est.cpp EstimatorManagerBaseTest.cpp EstimatorManagerNewTest.cpp test_manager.cpp test_EstimatorManagerNew.cpp test_trace_manager.cpp)
SET(SRCS test_accumulator.cpp test_local_energy_est.cpp FakeOperatorEstimator.cpp EstimatorManagerBaseTest.cpp EstimatorManagerNewTest.cpp test_manager.cpp test_EstimatorManagerNew.cpp test_trace_manager.cpp RandomForTest.cpp SpinDensityTesting.cpp test_SpinDensityInput.cpp test_SpinDensityNew.cpp)
ADD_EXECUTABLE(${UTEST_EXE} ${SRCS})
TARGET_LINK_LIBRARIES(${UTEST_EXE} catch_main qmcestimators_unit)
TARGET_LINK_LIBRARIES(${UTEST_EXE} catch_main qmcutil qmcestimators_unit)
IF(USE_OBJECT_TARGET)
TARGET_LINK_LIBRARIES(${UTEST_EXE} qmcestimators_unit qmcham_unit qmcwfs qmcparticle qmcutil containers platform_omptarget)
TARGET_LINK_LIBRARIES(${UTEST_EXE} qmcutil qmcestimators_unit qmcham_unit qmcwfs qmcparticle qmcutil containers platform_omptarget)
ENDIF()
ADD_UNIT_TEST(${UTEST_NAME} "${QMCPACK_UNIT_TEST_DIR}/${UTEST_EXE}")
@ -31,7 +29,7 @@ IF(HAVE_MPI)
SET(UTEST_EXE test_${SRC_DIR}_mpi)
SET(UTEST_NAME deterministic-unit_test_${SRC_DIR}_mpi)
#this is dependent on the directory creation and sym linking of earlier driver tests
SET(SRCS EstimatorManagerNewTest.cpp test_manager_mpi.cpp)
SET(SRCS FakeOperatorEstimator.cpp EstimatorManagerNewTest.cpp test_manager_mpi.cpp)
ADD_EXECUTABLE(${UTEST_EXE} ${SRCS})
IF(USE_OBJECT_TARGET)
TARGET_LINK_LIBRARIES(${UTEST_EXE} qmcestimators_unit qmcham_unit qmcdriver_unit qmcwfs qmcparticle qmcutil containers platform_omptarget)

View File

@ -12,10 +12,12 @@
#include "EstimatorManagerNewTest.h"
#include "Estimators/ScalarEstimatorBase.h"
#include "Platforms/Host/OutputManager.h"
#include "FakeOperatorEstimator.h"
namespace qmcplusplus {
namespace testing {
namespace qmcplusplus
{
namespace testing
{
EstimatorManagerNewTest::EstimatorManagerNewTest(Communicate* comm, int ranks) : em(comm), comm_(comm)
{
int num_ranks = comm_->size();
@ -23,7 +25,6 @@ EstimatorManagerNewTest::EstimatorManagerNewTest(Communicate* comm, int ranks) :
throw std::runtime_error("Bad Rank Count, test expects different number of ranks.");
app_log() << "running on " << num_ranks << '\n';
}
void EstimatorManagerNewTest::fakeSomeScalarSamples()
@ -50,11 +51,35 @@ void EstimatorManagerNewTest::fakeSomeScalarSamples()
em.get_SquaredAverageCache().resize(4);
}
void EstimatorManagerNewTest::fakeSomeOperatorEstimatorSamples(int rank)
{
em.operator_ests_.emplace_back(new FakeOperatorEstimator(comm_->size(), DataLocality::crowd));
FakeOperatorEstimator& foe = dynamic_cast<FakeOperatorEstimator&>(*(em.operator_ests_.back()));
std::vector<QMCT::RealType>& data = foe.get_data_ref();
data[rank] += rank;
data[rank * 10] += rank * 10;
foe.set_walker_weights(1);
}
std::vector<QMCTraits::RealType> EstimatorManagerNewTest::generateGoodOperatorData(int num_ranks)
{
std::vector<QMCT::RealType> good_data(num_ranks * 10, 0.0);
if (comm_->rank() == 0)
{
for (int ir = 0; ir < num_ranks; ++ir)
{
good_data[ir] += ir;
good_data[ir * 10] += ir * 10;
}
}
return good_data;
}
void EstimatorManagerNewTest::collectScalarEstimators()
{
RefVector<ScalarEstimatorBase> est_list = makeRefVector<ScalarEstimatorBase>(estimators_);
em.collectScalarEstimators(est_list);
}
}
}
} // namespace testing
} // namespace qmcplusplus

View File

@ -30,21 +30,32 @@ namespace testing
class EstimatorManagerNewTest
{
public:
using QMCT = QMCTraits;
EstimatorManagerNewTest(Communicate* comm, int ranks);
/** Quickly add scalar samples using FakeEstimator mock estimator. */
void fakeSomeScalarSamples();
/** Quickly add scalar samples using FakeOperatorEstimator mock estimator. */
void fakeSomeOperatorEstimatorSamples(int rank);
/** call private EMB method and colelct EMBTs estimators_ */
void collectScalarEstimators();
/** reduce the OperatorEstimators onto the EstimatorManagerNew copy. */
void collectOperatorEstimators();
/** for mpi test (it's trivial for 1 rank)
*
* only used by test_manager_mpi.cpp so implemented there.
*/
std::vector<QMCT::RealType> generateGoodOperatorData(int num_ranks);
bool testMakeBlockAverages();
void testReduceOperatorEstimators();
std::vector<QMCT::RealType>& get_operator_data() { return em.operator_ests_[0]->get_data_ref(); }
EstimatorManagerNew em;
private:
Communicate* comm_;
std::vector<FakeEstimator> estimators_;
};
}

View File

@ -0,0 +1,32 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#include "FakeOperatorEstimator.h"
#include "type_traits/DataLocality.h"
namespace qmcplusplus
{
FakeOperatorEstimator::FakeOperatorEstimator(int num_ranks, DataLocality data_locality) :
OperatorEstBase(data_locality)
{
data_locality_ = data_locality;
data_ = createLocalData(num_ranks * 10, data_locality_);
}
FakeOperatorEstimator::FakeOperatorEstimator(const FakeOperatorEstimator& foe)
: OperatorEstBase(foe)
{
size_t data_size = foe.data_->size();
data_ = createLocalData(data_size, data_locality_);
}
}

View File

@ -0,0 +1,44 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#ifndef QMCPLUSPLUS_FAKEOPERATORESTIMATOR_H
#define QMCPLUSPLUS_FAKEOPERATORESTIMATOR_H
#include "OperatorEstBase.h"
#include "Configuration.h"
#include "type_traits/DataLocality.h"
namespace qmcplusplus
{
class FakeOperatorEstimator : public OperatorEstBase
{
public:
using QMCT = QMCTraits;
FakeOperatorEstimator(int num_ranks, DataLocality data_locality);
FakeOperatorEstimator(const FakeOperatorEstimator& foe);
~FakeOperatorEstimator() override {};
void accumulate(RefVector<MCPWalker>& walkers, RefVector<ParticleSet>& psets) override {}
void registerOperatorEstimator(std::vector<observable_helper*>& h5dec, hid_t gid) const override {}
void startBlock(int nsteps) override {}
OperatorEstBase* clone() override { return new FakeOperatorEstimator(*this); }
void set_walker_weights(QMCT::RealType weight) { walkers_weight_ = weight; }
};
} // namespace qmcplusplus
#endif

View File

@ -0,0 +1,38 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#include "RandomForTest.h"
#include <algorithm>
namespace qmcplusplus
{
namespace testing
{
RandomForTest::RandomForTest() { rng.init(0, 1, 111); }
std::vector<RandomForTest::Real> RandomForTest::getRealRandoms(int ncount)
{
std::vector<Real> rng_reals;
rng_reals.reserve(ncount);
std::generate_n(std::back_inserter(rng_reals), ncount, rng);
return rng_reals;
}
void RandomForTest::makeRngReals(std::vector<Real>& rngReals)
{
// until c++ std = 17
//std::generate(rng_reals.begin(), rng_reals.end(), rng());
for (auto& rng_real : rngReals)
rng_real = rng();
}
} // namespace testing
} // namespace qmcplusplus

View File

@ -0,0 +1,36 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#ifndef QMCPLUSPLUS_RANDOMFORTEST_H
#define QMCPLUSPLUS_RANDOMFORTEST_H
#include <vector>
#include "Configuration.h"
#include "Utilities/StdRandom.h"
namespace qmcplusplus
{
namespace testing
{
class RandomForTest
{
public:
using Real = QMCTraits::RealType;
RandomForTest();
std::vector<Real> getRealRandoms(int ncount);
void makeRngReals(std::vector<Real>& rng_reals);
private:
StdRandom<Real> rng;
};
} // namespace testing
} // namespace qmcplusplus
#endif

View File

@ -0,0 +1,29 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#include "SpinDensityTesting.h"
namespace qmcplusplus
{
namespace testing
{
Lattice makeTestLattice()
{
Lattice lattice;
lattice.BoxBConds = true; // periodic
lattice.R = ParticleSet::Tensor_t(3.37316115, 3.37316115, 0.00000000, 0.00000000, 3.37316115, 3.37316115, 3.37316115,
0.00000000, 3.37316115);
lattice.reset();
lattice.explicitly_defined = true;
return lattice;
}
} // namespace testing
} // namespace qmcplusplus

View File

@ -0,0 +1,29 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#ifndef QMCPLUSPLUS_SPINDENSITYTESTING_H
#define QMCPLUSPLUS_SPINDENSITYTESTING_H
#include "ParticleSet.h"
namespace qmcplusplus
{
namespace testing
{
using POLT = PtclOnLatticeTraits;
using Lattice = POLT::ParticleLayout_t;
Lattice makeTestLattice();
}
}
#endif

View File

@ -0,0 +1,72 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
//////////////////////////////////////////////////////////////////////////////////////
#ifndef QMCPLUSPLUS_VALIDSPINDENSITYINPUT_H
#define QMCPLUSPLUS_VALIDSPINDENSITYINPUT_H
#include <array>
namespace qmcplusplus
{
namespace testing
{
// clang-format: off
constexpr std::array<const char*, 3> valid_spin_density_input_sections{
R"(
<estimator name="spindensity_new" type="spindensity" report="yes">
<parameter name="grid">
10 10 10
</parameter>
<parameter name="center">
0.0 0.0 0.0
</parameter>
<parameter name="cell">
3.37316115 3.37316115 0.00000000
0.00000000 3.37316115 3.37316115
3.37316115 0.00000000 3.37316115
</parameter>
</estimator>
)",
R"(
<estimator name="spindensity_new" type="spindensity" report="yes">
<parameter name="dr">
.4777 .4777 .4777
</parameter>
<parameter name="center">
0.0 0.0 0.0
</parameter>
<parameter name="cell">
3.37316115 3.37316115 0.00000000
0.00000000 3.37316115 3.37316115
3.37316115 0.00000000 3.37316115
</parameter>
</estimator>
)",
R"(
<estimator name="spindensity_new" type="spindensity" report="yes">
<parameter name="dr">
.4777 .4777 .4777
</parameter>
<parameter name="center">
0.0 0.0 0.0
</parameter>
</estimator>
)"};
// clang-format: on
constexpr int valid_spindensity_input_grid = 0;
constexpr int valid_spindensity_input_dr = 1;
constexpr int valid_spindensity_input_no_cell = 2;
} // namespace testing
} // namespace qmcplusplus
#endif /* QMCPLUSPLUS_VALIDSPINDENSITYINPUT_H */

View File

@ -66,6 +66,19 @@ TEST_CASE("EstimatorManagerNew::collectScalarEstimators", "[estimators]")
}
TEST_CASE("EstimatorManagerNew::collectOperatorEstimators", "[estimators]")
{
Communicate* c = OHMMS::Controller;
testing::EstimatorManagerNewTest embt(c, 1);
// by design we have done no averaging here
// the division by total weight happens only when a block is over and the
// accumulated data has been reduced down. So here there should just be simple sums.
}
TEST_CASE("EstimatorManagerNew adhoc addVector operator", "[estimators]")
{
int num_scalars = 3;

View File

@ -0,0 +1,57 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
//////////////////////////////////////////////////////////////////////////////////////
#include "catch.hpp"
#include "SpinDensityInput.h"
#include "ValidSpinDensityInput.h"
#include "SpinDensityTesting.h"
#include "ParticleSet.h"
#include "OhmmsData/Libxml2Doc.h"
#include <stdio.h>
#include <sstream>
namespace qmcplusplus
{
TEST_CASE("SpinDensityInput::readXML", "[estimators]")
{
using POLT = PtclOnLatticeTraits;
using Lattice = POLT::ParticleLayout_t;
for (auto input_xml : testing::valid_spin_density_input_sections)
{
Libxml2Document doc;
bool okay = doc.parseFromString(input_xml);
REQUIRE(okay);
xmlNodePtr node = doc.getRoot();
SpinDensityInput sdi;
sdi.readXML(node);
Lattice lattice;
if (sdi.get_cell().explicitly_defined == true)
lattice = sdi.get_cell();
else
lattice = testing::makeTestLattice();
SpinDensityInput::DerivedParameters dev_par = sdi.calculateDerivedParameters(lattice);
CHECK(dev_par.npoints == 1000);
TinyVector<int, SpinDensityInput::DIM> grid(10, 10, 10);
CHECK(dev_par.grid == grid);
TinyVector<int, SpinDensityInput::DIM> gdims(100, 10, 1);
CHECK(dev_par.gdims == gdims);
}
}
} // namespace qmcplusplus

View File

@ -0,0 +1,309 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
//////////////////////////////////////////////////////////////////////////////////////
#include "catch.hpp"
#include "SpinDensityInput.h"
#include "ValidSpinDensityInput.h"
#include "SpinDensityNew.h"
#include "RandomForTest.h"
#include "Utilities/SpeciesSet.h"
#include "ParticleSet.h"
#include "SpinDensityTesting.h"
#include "OhmmsData/Libxml2Doc.h"
#include <stdio.h>
#include <sstream>
namespace qmcplusplus
{
void accumulateFromPsets(int ncrowds, SpinDensityNew& sdn, UPtrVector<OperatorEstBase>& crowd_sdns)
{
using QMCT = QMCTraits;
for (int iops = 0; iops < ncrowds; ++iops)
{
std::vector<OperatorEstBase::MCPWalker> walkers;
int nwalkers = 4;
for (int iw = 0; iw < nwalkers; ++iw)
walkers.emplace_back(2);
std::vector<ParticleSet> psets;
crowd_sdns.emplace_back(std::make_unique<SpinDensityNew>(sdn));
SpinDensityNew& crowd_sdn = dynamic_cast<SpinDensityNew&>(*(crowd_sdns.back()));
for (int iw = 0; iw < nwalkers; ++iw)
{
psets.emplace_back();
ParticleSet& pset = psets.back();
pset.create(2);
pset.R[0] = ParticleSet::PosType(0.00000000, 0.00000000, 0.00000000);
pset.R[1] = ParticleSet::PosType(0.68658058, 0.68658058, 0.68658058);
}
auto ref_walkers = makeRefVector<OperatorEstBase::MCPWalker>(walkers);
auto ref_psets = makeRefVector<ParticleSet>(psets);
crowd_sdn.accumulate(ref_walkers, ref_psets);
}
}
void randomUpdateAccumulate(testing::RandomForTest& rft, UPtrVector<OperatorEstBase>& crowd_sdns)
{
using QMCT = QMCTraits;
for (auto& uptr_crowd_sdn : crowd_sdns)
{
std::vector<OperatorEstBase::MCPWalker> walkers;
int nwalkers = 4;
for (int iw = 0; iw < nwalkers; ++iw)
walkers.emplace_back(2);
std::vector<ParticleSet> psets;
SpinDensityNew& crowd_sdn = dynamic_cast<SpinDensityNew&>(*(uptr_crowd_sdn));
std::vector<QMCT::RealType> rng_reals(nwalkers * QMCT::DIM * 2);
rft.makeRngReals(rng_reals);
auto it_rng_reals = rng_reals.begin();
for (int iw = 0; iw < nwalkers; ++iw)
{
psets.emplace_back();
ParticleSet& pset = psets.back();
pset.create(2);
pset.R[0] = ParticleSet::PosType(*it_rng_reals++, *it_rng_reals++, *it_rng_reals++);
pset.R[1] = ParticleSet::PosType(*it_rng_reals++, *it_rng_reals++, *it_rng_reals++);
}
auto ref_walkers = makeRefVector<OperatorEstBase::MCPWalker>(walkers);
auto ref_psets = makeRefVector<ParticleSet>(psets);
crowd_sdn.accumulate(ref_walkers, ref_psets);
}
}
TEST_CASE("SpinDensityNew::SpinDensityNew(SPInput, SpeciesSet)", "[estimators]")
{
Libxml2Document doc;
bool okay = doc.parseFromString(testing::valid_spin_density_input_sections[testing::valid_spindensity_input_grid]);
REQUIRE(okay);
xmlNodePtr node = doc.getRoot();
SpinDensityInput sdi;
sdi.readXML(node);
SpeciesSet species_set;
int ispecies = species_set.addSpecies("C");
int iattribute = species_set.addAttribute("membersize");
species_set(iattribute, ispecies) = 2;
SpinDensityInput sdi_copy = sdi;
SpinDensityNew(std::move(sdi), species_set);
CrystalLattice<OHMMS_PRECISION, OHMMS_DIM> lattice;
CHECK_THROWS(SpinDensityNew(std::move(sdi_copy), lattice, species_set));
}
TEST_CASE("SpinDensityNew::SpinDensityNew(SPInput, Lattice, SpeciesSet)", "[estimators]")
{
Libxml2Document doc;
bool okay = doc.parseFromString(testing::valid_spin_density_input_sections[testing::valid_spindensity_input_no_cell]);
REQUIRE(okay);
xmlNodePtr node = doc.getRoot();
SpinDensityInput sdi;
sdi.readXML(node);
SpeciesSet species_set;
int ispecies = species_set.addSpecies("C");
int iattribute = species_set.addAttribute("membersize");
species_set(iattribute, ispecies) = 2;
auto lattice = testing::makeTestLattice();
SpinDensityNew(std::move(sdi), lattice, species_set);
}
TEST_CASE("SpinDensityNew::accumulate", "[estimators]")
{
using MCPWalker = OperatorEstBase::MCPWalker;
using QMCT = QMCTraits;
Libxml2Document doc;
bool okay = doc.parseFromString(testing::valid_spin_density_input_sections[0]);
REQUIRE(okay);
xmlNodePtr node = doc.getRoot();
SpinDensityInput sdi;
sdi.readXML(node);
SpeciesSet species_set;
int ispecies = species_set.addSpecies("u");
ispecies = species_set.addSpecies("d");
CHECK(ispecies == 1);
int iattribute = species_set.addAttribute("membersize");
species_set(iattribute, 0) = 1;
species_set(iattribute, 1) = 1;
SpinDensityNew sdn(std::move(sdi), species_set);
std::vector<MCPWalker> walkers;
int nwalkers = 4;
for (int iw = 0; iw < nwalkers; ++iw)
walkers.emplace_back(2);
std::vector<ParticleSet> psets;
for (int iw = 0; iw < nwalkers; ++iw)
{
psets.emplace_back();
ParticleSet& pset = psets.back();
pset.create(2);
pset.R[0] = ParticleSet::PosType(0.00000000, 0.00000000, 0.00000000);
pset.R[1] = ParticleSet::PosType(1.68658058, 1.68658058, 1.68658058);
}
auto ref_walkers = makeRefVector<MCPWalker>(walkers);
auto ref_psets = makeRefVector<ParticleSet>(psets);
sdn.accumulate(ref_walkers, ref_psets);
std::vector<QMCT::RealType>& data_ref = sdn.get_data_ref();
// There should be a check that the discretization of particle locations expressed in lattice coords
// is correct. This just checks it hasn't changed from how it was in SpinDensity which lacked testing.
CHECK(data_ref[555] == 4);
CHECK(data_ref[1777] == 4);
}
TEST_CASE("SpinDensityNew::collect(DataLocality::crowd)", "[estimators]")
{
{
using MCPWalker = OperatorEstBase::MCPWalker;
using QMCT = QMCTraits;
Libxml2Document doc;
bool okay = doc.parseFromString(testing::valid_spin_density_input_sections[0]);
REQUIRE(okay);
xmlNodePtr node = doc.getRoot();
SpinDensityInput sdi;
sdi.readXML(node);
SpeciesSet species_set;
int ispecies = species_set.addSpecies("u");
ispecies = species_set.addSpecies("d");
CHECK(ispecies == 1);
int iattribute = species_set.addAttribute("membersize");
species_set(iattribute, 0) = 1;
species_set(iattribute, 1) = 1;
SpinDensityNew sdn(std::move(sdi), species_set);
UPtrVector<OperatorEstBase> crowd_sdns;
int ncrowds = 2;
accumulateFromPsets(ncrowds, sdn, crowd_sdns);
RefVector<OperatorEstBase> crowd_oeb_refs = convertUPtrToRefVector(crowd_sdns);
sdn.collect(crowd_oeb_refs);
std::vector<QMCT::RealType>& data_ref = sdn.get_data_ref();
// There should be a check that the discretization of particle locations expressed in lattice coords
// is correct. This just checks it hasn't changed from how it was in SpinDensity which lacked testing.
CHECK(data_ref[555] == 4 * ncrowds);
CHECK(data_ref[1666] == 4 * ncrowds);
}
}
TEST_CASE("SpinDensityNew::collect(DataLocality::rank)", "[estimators]")
{
{
using MCPWalker = OperatorEstBase::MCPWalker;
using QMCT = QMCTraits;
Libxml2Document doc;
bool okay = doc.parseFromString(testing::valid_spin_density_input_sections[0]);
REQUIRE(okay);
xmlNodePtr node = doc.getRoot();
SpinDensityInput sdi;
sdi.readXML(node);
SpeciesSet species_set;
int ispecies = species_set.addSpecies("u");
ispecies = species_set.addSpecies("d");
CHECK(ispecies == 1);
int iattribute = species_set.addAttribute("membersize");
species_set(iattribute, 0) = 1;
species_set(iattribute, 1) = 1;
SpinDensityNew sdn(std::move(sdi), species_set, DataLocality::rank);
auto lattice = testing::makeTestLattice();
UPtrVector<OperatorEstBase> crowd_sdns;
int ncrowds = 2;
accumulateFromPsets(ncrowds, sdn, crowd_sdns);
RefVector<OperatorEstBase> crowd_oeb_refs = convertUPtrToRefVector(crowd_sdns);
sdn.collect(crowd_oeb_refs);
std::vector<QMCT::RealType>& data_ref = sdn.get_data_ref();
// There should be a check that the discretization of particle locations expressed in lattice coords
// is correct. This just checks it hasn't changed from how it was in SpinDensity which lacked testing.
CHECK(data_ref[555] == 4 * ncrowds);
CHECK(data_ref[1666] == 4 * ncrowds);
}
}
TEST_CASE("SpinDensityNew algorithm comparison", "[estimators]")
{
using MCPWalker = OperatorEstBase::MCPWalker;
using QMCT = QMCTraits;
Libxml2Document doc;
bool okay = doc.parseFromString(testing::valid_spin_density_input_sections[0]);
REQUIRE(okay);
xmlNodePtr node = doc.getRoot();
SpinDensityInput sdi;
sdi.readXML(node);
SpeciesSet species_set;
int ispecies = species_set.addSpecies("u");
ispecies = species_set.addSpecies("d");
CHECK(ispecies == 1);
int iattribute = species_set.addAttribute("membersize");
species_set(iattribute, 0) = 1;
species_set(iattribute, 1) = 1;
SpinDensityInput sdi_copy = sdi;
int ncrowds = 3;
int nsteps = 4;
SpinDensityNew sdn_rank(std::move(sdi), species_set, DataLocality::rank);
UPtrVector<OperatorEstBase> crowd_sdns_rank;
accumulateFromPsets(ncrowds, sdn_rank, crowd_sdns_rank);
testing::RandomForTest rng_for_test_rank;
for (int i = 0; i < nsteps; ++i)
randomUpdateAccumulate(rng_for_test_rank, crowd_sdns_rank);
RefVector<OperatorEstBase> crowd_oeb_refs_rank = convertUPtrToRefVector(crowd_sdns_rank);
sdn_rank.collect(crowd_oeb_refs_rank);
std::vector<QMCT::RealType>& data_ref_rank = sdn_rank.get_data_ref();
SpinDensityNew sdn_crowd(std::move(sdi), species_set, DataLocality::crowd);
UPtrVector<OperatorEstBase> crowd_sdns_crowd;
accumulateFromPsets(ncrowds, sdn_crowd, crowd_sdns_crowd);
testing::RandomForTest rng_for_test_crowd;
for (int i = 0; i < nsteps; ++i)
randomUpdateAccumulate(rng_for_test_crowd, crowd_sdns_crowd);
RefVector<OperatorEstBase> crowd_oeb_refs_crowd = convertUPtrToRefVector(crowd_sdns_crowd);
sdn_crowd.collect(crowd_oeb_refs_crowd);
std::vector<QMCT::RealType>& data_ref_crowd = sdn_crowd.get_data_ref();
for (size_t i = 0; i < data_ref_rank.size(); ++i)
{
if (data_ref_crowd[i] != data_ref_rank[i])
FAIL_CHECK("crowd local " << data_ref_crowd[i] << " != rank local " << data_ref_rank[i] << " at index " << i);
break;
}
}
} // namespace qmcplusplus

View File

@ -19,12 +19,12 @@
namespace qmcplusplus
{
namespace testing {
namespace testing
{
bool EstimatorManagerNewTest::testMakeBlockAverages()
{
if(em.my_comm_->rank() == 1) {
if (em.my_comm_->rank() == 1)
{
estimators_[1].scalars[0](3.0);
estimators_[1].scalars[1](3.0);
estimators_[1].scalars[2](3.0);
@ -33,8 +33,8 @@ bool EstimatorManagerNewTest::testMakeBlockAverages()
// manipulation of state to arrive at to be tested state.
// - From EstimatorManagerBase::reset
em.weightInd = em.BlockProperties.add("BlockWeight");
em.cpuInd = em.BlockProperties.add("BlockCPU");
em.weightInd = em.BlockProperties.add("BlockWeight");
em.cpuInd = em.BlockProperties.add("BlockCPU");
em.acceptRatioInd = em.BlockProperties.add("AcceptRatio");
// - From EstimatorManagerBase::start
@ -45,12 +45,11 @@ bool EstimatorManagerNewTest::testMakeBlockAverages()
// - 2 with 1 sample 1
// - 1 with 2
double block_weight = 0;
std::for_each(estimators_.begin(),estimators_.end(),[&block_weight](auto& est){
block_weight += est.scalars[0].count();
});
std::for_each(estimators_.begin(), estimators_.end(),
[&block_weight](auto& est) { block_weight += est.scalars[0].count(); });
em.PropertyCache[em.weightInd] = block_weight;
em.PropertyCache[em.cpuInd] = 1.0;
RefVector<ScalarEstimatorBase> est_list = makeRefVector<ScalarEstimatorBase>(estimators_);
em.collectScalarEstimators(est_list);
@ -60,29 +59,58 @@ bool EstimatorManagerNewTest::testMakeBlockAverages()
return true;
}
}
void EstimatorManagerNewTest::testReduceOperatorEstimators() { em.reduceOperatorEstimators(); }
} // namespace testing
TEST_CASE("EstimatorManagerNew::makeBlockAverages()", "[estimators]")
{
Communicate* c = OHMMS::Controller;
int num_ranks = c->size();
int num_ranks = c->size();
testing::EstimatorManagerNewTest embt(c, num_ranks);
embt.fakeSomeScalarSamples();
embt.testMakeBlockAverages();
// right now only rank() == 0 gets the actual averages
if(c->rank() == 0)
if (c->rank() == 0)
{
double correct_value = ( 5.0 * num_ranks + 3.0) / (4 * (num_ranks -1) + 5);
double correct_value = (5.0 * num_ranks + 3.0) / (4 * (num_ranks - 1) + 5);
CHECK(embt.em.get_AverageCache()[0] == Approx(correct_value));
correct_value = (8.0 * num_ranks + 3.0 ) / (4 * (num_ranks -1) + 5);
correct_value = (8.0 * num_ranks + 3.0) / (4 * (num_ranks - 1) + 5);
CHECK(embt.em.get_AverageCache()[1] == Approx(correct_value));
correct_value = (11.0 * num_ranks + 3.0 ) / (4 * (num_ranks -1) + 5);
correct_value = (11.0 * num_ranks + 3.0) / (4 * (num_ranks - 1) + 5);
CHECK(embt.em.get_AverageCache()[2] == Approx(correct_value));
correct_value = (14.0 * num_ranks + 3.0 ) / (4 * (num_ranks -1) + 5);
correct_value = (14.0 * num_ranks + 3.0) / (4 * (num_ranks - 1) + 5);
CHECK(embt.em.get_AverageCache()[3] == Approx(correct_value));
}
}
TEST_CASE("EstimatorManagerNew::reduceOperatorestimators()", "[estimators]")
{
Communicate* c = OHMMS::Controller;
int num_ranks = c->size();
testing::EstimatorManagerNewTest embt(c, num_ranks);
embt.fakeSomeOperatorEstimatorSamples(c->rank());
std::vector<QMCTraits::RealType> good_data = embt.generateGoodOperatorData(num_ranks);
embt.testReduceOperatorEstimators();
if (c->rank() == 0)
{
auto& test_data = embt.get_operator_data();
QMCTraits::RealType norm = 1.0 / static_cast<QMCTraits::RealType>(num_ranks);
for (size_t i = 0; i < test_data.size(); ++i)
{
QMCTraits::RealType norm_good_data = good_data[i] * norm;
if (norm_good_data != test_data[i])
{
FAIL_CHECK("norm_good_data " << norm_good_data << " != test_data " << test_data[i] << " at index " << i);
break;
}
}
}
}
} // namespace qmcplusplus

View File

@ -19,7 +19,7 @@ TEST_CASE("test_communicate_split_one", "[message]")
{
Communicate* c = OHMMS::Controller;
Communicate* c2 = new Communicate(*c, 1);
auto c2 = std::make_unique<Communicate>(*c, 1);
REQUIRE(c2->size() == c->size());
REQUIRE(c2->rank() == c->rank());

View File

@ -31,8 +31,9 @@ namespace qmcplusplus
* Should not modify the name, since composite types, such as MDRunData, use the name.
*
*/
struct ProjectData : public OhmmsElementBase
class ProjectData : public OhmmsElementBase
{
public:
/// constructor
ProjectData(const char* aname = 0);
@ -56,20 +57,32 @@ struct ProjectData : public OhmmsElementBase
void setCommunicator(Communicate* c);
///returns the name of the project
inline const char* CurrentMainRoot() const { return m_projectmain.c_str(); }
/** returns the title of the project
* <project id="det_qmc_short_sdbatch_vmcbatch_mwalkers" series="0">
* translate to m_title = "det_qmc_short_sdbatch_vmcbatch_mwalkers"
*/
inline const std::string& getTitle() const { return m_title; }
///returns the name of the project
inline const char* CurrentRoot() const { return m_projectroot.c_str(); }
/** returns the projectmain of the project, the series id is incremented at every QMC section
* <project id="det_qmc_short_sdbatch_vmcbatch_mwalkers" series="0">
* translate to m_projectmain = "det_qmc_short_sdbatch_vmcbatch_mwalkers.s000"
*/
inline const std::string& CurrentMainRoot() const { return m_projectmain; }
///returns the name of the project
inline const char* NextRoot() const { return m_nextroot.c_str(); }
/** returns the nextroot of the project, the series id is incremented at every QMC section
* <project id="det_qmc_short_sdbatch_vmcbatch_mwalkers" series="0">
* translate to m_projectmain = "det_qmc_short_sdbatch_vmcbatch_mwalkers.s001"
*/
inline const std::string& NextRoot() const { return m_nextroot; }
/** return the root of the previous sequence
* @param oldroot is composed by the m_title and m_series
*/
bool PreviousRoot(std::string& oldroot) const;
int getSeriesIndex() const { return m_series; }
private:
///title of the project
std::string m_title;

View File

@ -233,7 +233,7 @@ void VariableSet::setDefaults(bool optimize_all)
Index[i] = optimize_all ? i : -1;
}
void VariableSet::print(std::ostream& os, int leftPadSpaces, bool printHeader)
void VariableSet::print(std::ostream& os, int leftPadSpaces, bool printHeader) const
{
std::string pad_str = std::string(leftPadSpaces, ' ');
int max_name_len = 0;

View File

@ -356,7 +356,7 @@ struct VariableSet
*/
void setDefaults(bool optimize_all);
void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false);
void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false) const;
};
} // namespace optimize

View File

@ -140,7 +140,7 @@ public:
}
void dfunc(std::vector<Return_t> RT, std::vector<Return_t>& FG)
void dfunc(const std::vector<Return_t>& RT, std::vector<Return_t>& FG)
{
///To test we simply output the analytic and numeric gradients of the cost function. Make sure they agree.
std::vector<Return_t> Dummy(FG);
@ -163,7 +163,7 @@ public:
else
app_log() << vname << " " << RT[k] << " " << Dummy[k] << " " << FG[k] << " inf" << std::endl;
if (output_param_file_)
param_deriv_file_ << FG[k] << " ";
param_deriv_file_ << std::setprecision(10) << FG[k] << " ";
}
if (output_param_file_)
param_deriv_file_ << std::endl;

View File

@ -21,6 +21,7 @@ SET(PARTICLE
ParticleSet.BC.cpp
DynamicCoordinatesBuilder.cpp
MCWalkerConfiguration.cpp
WalkerConfigurations.cpp
SampleStack.cpp
createDistanceTableAA.cpp
createDistanceTableAB.cpp

View File

@ -38,8 +38,7 @@ TEST_CASE("ewald3d", "[lrhandler]")
ParticleSet ref; // handler needs ref.SK.KLists
ref.Lattice = Lattice; // !!!! crucial for access to Volume
ref.LRBox = Lattice; // !!!! crucial for S(k) update
StructFact* SK = new StructFact(ref, Lattice.LR_kc);
ref.SK = SK;
ref.SK = std::make_unique<StructFact>(ref, Lattice.LR_kc);
EwaldHandler3D handler(ref, Lattice.LR_kc);
// make sure initBreakup changes the default sigma
@ -87,8 +86,7 @@ TEST_CASE("ewald3d df", "[lrhandler]")
ParticleSet ref; // handler needs ref.SK.KLists
ref.Lattice = Lattice; // !!!! crucial for access to Volume
ref.LRBox = Lattice; // !!!! crucial for S(k) update
StructFact* SK = new StructFact(ref, Lattice.LR_kc);
ref.SK = SK;
ref.SK = std::make_unique<StructFact>(ref, Lattice.LR_kc);
EwaldHandler3D handler(ref, Lattice.LR_kc);
// make sure initBreakup changes the default sigma

View File

@ -43,8 +43,7 @@ TEST_CASE("dummy", "[lrhandler]")
ParticleSet ref; // handler needs ref.SK.KLists
ref.Lattice = Lattice; // !!!! crucial for access to Volume
ref.LRBox = Lattice; // !!!! crucial for S(k) update
StructFact* SK = new StructFact(ref, Lattice.LR_kc);
ref.SK = SK;
ref.SK = std::make_unique<StructFact>(ref, Lattice.LR_kc);
DummyLRHandler<CoulombF2> handler(Lattice.LR_kc);
handler.initBreakup(ref);
@ -62,8 +61,8 @@ TEST_CASE("dummy", "[lrhandler]")
// the full Coulomb potential should be retained in kspace
for (int ish = 0; ish < handler.MaxKshell; ish++)
{
int ik = SK->KLists.kshell[ish];
double k2 = SK->KLists.ksq[ik];
int ik = ref.SK->KLists.kshell[ish];
double k2 = ref.SK->KLists.ksq[ik];
double fk_expect = fk(k2);
REQUIRE(handler.Fk_symm[ish] == Approx(norm * fk_expect));
}

View File

@ -49,8 +49,7 @@ TEST_CASE("srcoul", "[lrhandler]")
ParticleSet ref; // handler needs ref.SK.KLists
ref.Lattice = Lattice; // !!!! crucial for access to Volume
ref.LRBox = Lattice; // !!!! crucial for S(k) update
StructFact* SK = new StructFact(ref, Lattice.LR_kc);
ref.SK = SK;
ref.SK = std::make_unique<StructFact>(ref, Lattice.LR_kc);
LRHandlerSRCoulomb<EslerCoulomb3D_ForSRCOUL, LPQHISRCoulombBasis> handler(ref);
handler.initBreakup(ref);
@ -93,8 +92,7 @@ TEST_CASE("srcoul df", "[lrhandler]")
ParticleSet ref; // handler needs ref.SK.KLists
ref.Lattice = Lattice; // !!!! crucial for access to Volume
ref.LRBox = Lattice; // !!!! crucial for S(k) update
StructFact* SK = new StructFact(ref, Lattice.LR_kc);
ref.SK = SK;
ref.SK = std::make_unique<StructFact>(ref, Lattice.LR_kc);
LRHandlerSRCoulomb<EslerCoulomb3D_ForSRCOUL, LPQHISRCoulombBasis> handler(ref);
handler.initBreakup(ref);

View File

@ -50,8 +50,7 @@ TEST_CASE("temp3d", "[lrhandler]")
ParticleSet ref; // handler needs ref.SK.KLists
ref.Lattice = Lattice; // !!!! crucial for access to Volume
ref.LRBox = Lattice; // !!!! crucial for S(k) update
StructFact* SK = new StructFact(ref, Lattice.LR_kc);
ref.SK = SK;
ref.SK = std::make_unique<StructFact>(ref, Lattice.LR_kc);
LRHandlerTemp<EslerCoulomb3D, LPQHIBasis> handler(ref);
handler.initBreakup(ref);

View File

@ -36,6 +36,9 @@ struct MCSample
ParticleSet::ParticleLaplacian_t L;
ParticleSet::RealType LogPsi, Sign, PE, KE;
inline MCSample(const ParticleSet& pset) : R(pset.R), spins(pset.spins) { }
/// deprecated. Beyond w.R and w.spins, others are used perhaps somewhere but intended not to.
inline MCSample(const Walker_t& w) : R(w.R), spins(w.spins), G(w.G), L(w.L)
{
LogPsi = w.Properties(WP::LOGPSI);

View File

@ -47,9 +47,7 @@ MCWalkerConfiguration::MCWalkerConfiguration(const DynamicCoordinateKind kind)
iatList_GPU("iatList_GPU"),
AcceptList_GPU("MCWalkerConfiguration::AcceptList_GPU"),
#endif
OwnWalkers(true),
ReadyForPbyP(false),
GlobalNumWalkers(0),
UpdateMode(Update_Walker),
reptile(0),
Polymer(0)
@ -69,9 +67,7 @@ MCWalkerConfiguration::MCWalkerConfiguration(const MCWalkerConfiguration& mcw)
iatList_GPU("iatList_GPU"),
AcceptList_GPU("MCWalkerConfiguration::AcceptList_GPU"),
#endif
OwnWalkers(true),
ReadyForPbyP(false),
GlobalNumWalkers(mcw.GlobalNumWalkers),
UpdateMode(Update_Walker),
Polymer(0)
{
@ -83,54 +79,17 @@ MCWalkerConfiguration::MCWalkerConfiguration(const MCWalkerConfiguration& mcw)
//initPropertyList();
}
///default destructor
MCWalkerConfiguration::~MCWalkerConfiguration()
{
if (OwnWalkers)
destroyWalkers(WalkerList.begin(), WalkerList.end());
}
void MCWalkerConfiguration::createWalkers(int n)
{
if (WalkerList.empty())
{
while (n)
const int old_nw = getActiveWalkers();
WalkerConfigurations::createWalkers(n, TotalNum);
// no pre-existing walkers, need to initialized based on particleset.
if (old_nw == 0)
for (auto* awalker : WalkerList)
{
Walker_t* awalker = new Walker_t(TotalNum);
awalker->R = R;
awalker->spins = spins;
WalkerList.push_back(awalker);
--n;
awalker->R = R;
awalker->spins = spins;
}
}
else
{
if (WalkerList.size() >= n)
{
int iw = WalkerList.size(); //copy from the back
for (int i = 0; i < n; ++i)
{
WalkerList.push_back(new Walker_t(*WalkerList[--iw]));
}
}
else
{
int nc = n / WalkerList.size();
int nw0 = WalkerList.size();
for (int iw = 0; iw < nw0; ++iw)
{
for (int ic = 0; ic < nc; ++ic)
WalkerList.push_back(new Walker_t(*WalkerList[iw]));
}
n -= nc * nw0;
while (n > 0)
{
WalkerList.push_back(new Walker_t(*WalkerList[--nw0]));
--n;
}
}
}
resizeWalkerHistories();
}
@ -139,115 +98,16 @@ void MCWalkerConfiguration::resize(int numWalkers, int numPtcls)
{
if (TotalNum && WalkerList.size())
app_warning() << "MCWalkerConfiguration::resize cleans up the walker list." << std::endl;
const int old_nw = getActiveWalkers();
ParticleSet::resize(unsigned(numPtcls));
int dn = numWalkers - WalkerList.size();
if (dn > 0)
createWalkers(dn);
if (dn < 0)
{
int nw = -dn;
if (nw < WalkerList.size())
WalkerConfigurations::resize(numWalkers, TotalNum);
// no pre-existing walkers, need to initialized based on particleset.
if (old_nw == 0)
for (auto* awalker : WalkerList)
{
iterator it = WalkerList.begin();
while (nw)
{
delete *it;
++it;
--nw;
}
WalkerList.erase(WalkerList.begin(), WalkerList.begin() - dn);
awalker->R = R;
awalker->spins = spins;
}
}
//iterator it = WalkerList.begin();
//while(it != WalkerList.end()) {
// delete *it++;
//}
//WalkerList.erase(WalkerList.begin(),WalkerList.end());
//R.resize(np);
//TotalNum = np;
//createWalkers(nw);
}
///returns the next valid iterator
MCWalkerConfiguration::iterator MCWalkerConfiguration::destroyWalkers(iterator first, iterator last)
{
if (OwnWalkers)
{
iterator it = first;
while (it != last)
{
delete *it++;
}
}
return WalkerList.erase(first, last);
}
void MCWalkerConfiguration::createWalkers(iterator first, iterator last)
{
destroyWalkers(WalkerList.begin(), WalkerList.end());
OwnWalkers = true;
while (first != last)
{
WalkerList.push_back(new Walker_t(**first));
++first;
}
}
void MCWalkerConfiguration::destroyWalkers(int nw)
{
if (nw > WalkerList.size())
{
app_warning() << " Cannot remove walkers. Current Walkers = " << WalkerList.size() << std::endl;
return;
}
nw = WalkerList.size() - nw;
int iw = nw;
while (iw < WalkerList.size())
{
delete WalkerList[iw++];
}
//iterator it(WalkerList.begin()+nw),it_end(WalkerList.end());
//while(it != it_end)
//{
// delete *it++;
//}
WalkerList.erase(WalkerList.begin() + nw, WalkerList.end());
}
void MCWalkerConfiguration::copyWalkers(iterator first, iterator last, iterator it)
{
while (first != last)
{
(*it++)->makeCopy(**first++);
}
}
void MCWalkerConfiguration::copyWalkerRefs(Walker_t* head, Walker_t* tail)
{
if (OwnWalkers)
//destroy the current walkers
{
destroyWalkers(WalkerList.begin(), WalkerList.end());
WalkerList.clear();
OwnWalkers = false; //set to false to prevent deleting the Walkers
}
if (WalkerList.size() < 2)
{
WalkerList.push_back(0);
WalkerList.push_back(0);
}
WalkerList[0] = head;
WalkerList[1] = tail;
}
void MCWalkerConfiguration::fakeWalkerList(Walker_t* first, Walker_t* second)
{
if (WalkerList.size() != 0)
throw std::runtime_error("This should only be called in tests and only with a fresh MCWC!");
OwnWalkers = false;
WalkerList.push_back(first);
WalkerList.push_back(second);
}
/** Make Metropolis move to the walkers and save in a temporary array.
@ -264,18 +124,6 @@ void MCWalkerConfiguration::sample(iterator it, RealType tauinv)
// R += (*it)->R + (*it)->Drift;
}
void MCWalkerConfiguration::reset()
{
iterator it(WalkerList.begin()), it_end(WalkerList.end());
while (it != it_end)
//(*it)->reset();++it;}
{
(*it)->Weight = 1.0;
(*it)->Multiplicity = 1.0;
++it;
}
}
//void MCWalkerConfiguration::clearAuxDataSet() {
// UpdateMode=Update_Particle;
// int nbytes=128*TotalNum*sizeof(RealType);//could be pagesize

View File

@ -23,10 +23,10 @@
#ifndef QMCPLUSPLUS_MCWALKERCONFIGURATION_H
#define QMCPLUSPLUS_MCWALKERCONFIGURATION_H
#include "Particle/ParticleSet.h"
#include "Particle/WalkerConfigurations.h"
#include "Particle/Walker.h"
#include "Particle/SampleStack.h"
#include "Utilities/IteratorUtility.h"
//#include "Particle/Reptile.h"
#ifdef QMC_CUDA
#include "type_traits/CUDATypes.h"
@ -57,7 +57,7 @@ class Reptile;
*</ul>
*/
class MCWalkerConfiguration : public ParticleSet
class MCWalkerConfiguration : public ParticleSet, public WalkerConfigurations
{
public:
/**enumeration for update*/
@ -68,6 +68,7 @@ public:
Update_Particle ///move a particle by particle
};
using Walker_t = WalkerConfigurations::Walker_t;
///container type of the Properties of a Walker
typedef Walker_t::PropertyContainer_t PropertyContainer_t;
///container type of Walkers
@ -78,16 +79,6 @@ public:
typedef WalkerList_t::const_iterator const_iterator;
typedef std::vector<Reptile*> ReptileList_t;
/** starting index of the walkers in a processor group
*
* WalkerOffsets[0]=0 and WalkerOffsets[WalkerOffsets.size()-1]=total number of walkers in a group
* WalkerOffsets[processorid+1]-WalkerOffsets[processorid] is equal to the number of walkers on a processor,
* i.e., W.getActiveWalkers().
* WalkerOffsets is added to handle parallel I/O with hdf5
*/
std::vector<int> WalkerOffsets;
MCDataType<FullPrecRealType> EnsembleProperty;
// Data for GPU-acceleration via CUDA
// These hold a list of pointers to the positions, gradients, and
@ -132,134 +123,28 @@ public:
///default constructor: copy only ParticleSet
MCWalkerConfiguration(const MCWalkerConfiguration& mcw);
///default destructor
~MCWalkerConfiguration();
/** create numWalkers Walkers
*
* Append Walkers to WalkerList.
*/
void createWalkers(int numWalkers);
/** create walkers
* @param first walker iterator
* @param last walker iterator
*/
void createWalkers(iterator first, iterator last);
/** copy walkers
* @param first input walker iterator
* @param last input walker iterator
* @param start first target iterator
*
* No memory allocation is allowed.
*/
void copyWalkers(iterator first, iterator last, iterator start);
/** destroy Walkers from itstart to itend
*@param first starting iterator of the walkers
*@param last ending iterator of the walkers
*/
iterator destroyWalkers(iterator first, iterator last);
/** destroy Walkers
*@param nw number of walkers to be destroyed
*/
void destroyWalkers(int nw);
/** copy the pointers to the Walkers to WalkerList
* @param head pointer to the head walker
* @param tail pointer to the tail walker
*
* \todo I believe this can/should be deleted
* Special function introduced to work with Reptation method.
* Clear the current WalkerList and add two walkers, head and tail.
* OwnWalkers are set to false.
*/
void copyWalkerRefs(Walker_t* head, Walker_t* tail);
/** make fake walker list for testing
*/
void fakeWalkerList(Walker_t* first, Walker_t* second);
///clean up the walker list and make a new list
void resize(int numWalkers, int numPtcls);
///clean up the walker list
using WalkerConfigurations::clear;
///resize Walker::PropertyHistory and Walker::PHindex:
void resizeWalkerHistories();
///make random moves for all the walkers
//void sample(iterator first, iterator last, value_type tauinv);
///make a random move for a walker
void sample(iterator it, RealType tauinv);
///return the number of active walkers
inline size_t getActiveWalkers() const { return WalkerList.size(); }
///return the total number of active walkers among a MPI group
inline size_t getGlobalNumWalkers() const { return GlobalNumWalkers; }
///return the total number of active walkers among a MPI group
inline void setGlobalNumWalkers(size_t nw)
{
GlobalNumWalkers = nw;
EnsembleProperty.NumSamples = nw;
EnsembleProperty.Weight = nw;
}
inline void setWalkerOffsets(const std::vector<int>& o) { WalkerOffsets = o; }
///return the number of particles per walker
inline int getParticleNum() const { return R.size(); }
/// return the first iterator
inline iterator begin() { return WalkerList.begin(); }
/// return the last iterator, [begin(), end())
inline iterator end() { return WalkerList.end(); }
/// return the first const_iterator
inline const_iterator begin() const { return WalkerList.begin(); }
/// return the last const_iterator [begin(), end())
inline const_iterator end() const { return WalkerList.end(); }
/**@}*/
/** clear the WalkerList without destroying them
*
* Provide std::vector::clear interface
*/
inline void clear() { WalkerList.clear(); }
/** insert elements
* @param it locator where the inserting begins
* @param first starting iterator
* @param last ending iterator
*
* Provide std::vector::insert interface
*/
template<class INPUT_ITER>
inline void insert(iterator it, INPUT_ITER first, INPUT_ITER last)
{
WalkerList.insert(it, first, last);
}
/** add Walker_t* at the end
* @param awalker pointer to a walker
*
* Provide std::vector::push_back interface
*/
inline void push_back(Walker_t* awalker) { WalkerList.push_back(awalker); }
///resize Walker::PropertyHistory and Walker::PHindex:
void resizeWalkerHistories();
/** delete the last Walker_t*
*
* Provide std::vector::pop_back interface
*/
inline void pop_back()
{
delete WalkerList.back();
WalkerList.pop_back();
}
inline Walker_t* operator[](int i) { return WalkerList[i]; }
inline const Walker_t* operator[](int i) const { return WalkerList[i]; }
/** set LocalEnergy
* @param e current average Local Energy
*/
@ -273,9 +158,15 @@ public:
inline void setPolymer(MultiChain* chain) { Polymer = chain; }
/** reset the Walkers
*/
void reset();
template<typename ForwardIter>
inline void putConfigurations(ForwardIter target)
{
int ds = OHMMS_DIM * TotalNum;
for (iterator it = WalkerList.begin(); it != WalkerList.end(); ++it, target += ds)
{
copy(get_first_address((*it)->R), get_last_address((*it)->R), target);
}
}
void resetWalkerProperty(int ncopy = 1);
@ -316,24 +207,6 @@ public:
int getMaxSamples() const;
//@}
template<typename ForwardIter>
inline void putConfigurations(ForwardIter target)
{
int ds = OHMMS_DIM * TotalNum;
for (iterator it = WalkerList.begin(); it != WalkerList.end(); ++it, target += ds)
{
copy(get_first_address((*it)->R), get_last_address((*it)->R), target);
}
}
inline void resetWalkerParents()
{
for (iterator it = WalkerList.begin(); it != WalkerList.end(); ++it)
{
(*it)->ParentID = (*it)->ID;
}
}
#ifdef QMC_CUDA
inline void setklinear() { klinear = true; }
@ -413,14 +286,8 @@ public:
#endif
protected:
///boolean for cleanup
bool OwnWalkers;
///true if the buffer is ready for particle-by-particle updates
bool ReadyForPbyP;
///number of walkers on a node
int LocalNumWalkers;
///number of walkers shared by a MPI group
size_t GlobalNumWalkers;
///update-mode index
int UpdateMode;
#ifdef QMC_CUDA
@ -443,8 +310,6 @@ protected:
RealType LocalEnergy;
public:
///a collection of walkers
WalkerList_t WalkerList;
///a collection of reptiles contained in MCWalkerConfiguration.
ReptileList_t ReptileList;
Reptile* reptile;

View File

@ -111,7 +111,7 @@ void ParticleSet::createSK()
else
{
app_log() << "\n Creating Structure Factor for periodic systems " << LRBox.LR_kc << std::endl;
SK = new StructFact(*this, LRBox.LR_kc);
SK = std::make_unique<StructFact>(*this, LRBox.LR_kc);
}
//Lattice.print(app_log());
//This uses the copy constructor to avoid recomputing the data.

View File

@ -64,7 +64,6 @@ ParticleSet::ParticleSet(const DynamicCoordinateKind kind)
SameMass(true),
ThreadID(0),
activePtcl(-1),
SK(0),
Properties(0, 0, 1, WP::MAXPROPERTIES),
myTwist(0.0),
ParentName("0"),
@ -81,7 +80,6 @@ ParticleSet::ParticleSet(const ParticleSet& p)
ThreadID(0),
activePtcl(-1),
mySpecies(p.getSpeciesSet()),
SK(0),
Properties(p.Properties),
myTwist(0.0),
ParentName(p.parentName()),
@ -107,7 +105,7 @@ ParticleSet::ParticleSet(const ParticleSet& p)
if (p.SK)
{
LRBox = p.LRBox; //copy LRBox
SK = new StructFact(*p.SK); //safe to use the copy constructor
SK = std::make_unique<StructFact>(*p.SK); //safe to use the copy constructor
//R.InUnit=p.R.InUnit;
//createSK();
//SK->DoUpdate=p.SK->DoUpdate;
@ -123,8 +121,6 @@ ParticleSet::~ParticleSet()
{
DEBUG_MEMORY("ParticleSet::~ParticleSet");
delete_iter(DistTables.begin(), DistTables.end());
if (SK)
delete SK;
}
void ParticleSet::create(int numPtcl)
@ -517,7 +513,7 @@ void ParticleSet::mw_computeNewPosDistTablesAndSK(const RefVector<ParticleSet>&
P_list[iw].get().DistTables[i]->move(P_list[iw], new_positions[iw], iat, maybe_accept);
}
StructFact* SK = P_list[0].get().SK;
auto& SK = P_list[0].get().SK;
if (SK && SK->DoUpdate)
{
#pragma omp for
@ -695,6 +691,21 @@ void ParticleSet::acceptMove(Index_t iat, bool partial_table_update)
}
}
void ParticleSet::flex_acceptMove(const RefVector<ParticleSet>& P_list, Index_t iat, bool partial_table_update)
{
for (int iw = 0; iw < P_list.size(); iw++)
P_list[iw].get().acceptMove(iat, partial_table_update);
}
void ParticleSet::rejectMove(Index_t iat)
{
#ifndef NDEBUG
if (iat != activePtcl)
throw std::runtime_error("Bug detected by rejectMove! Request electron is not active!");
#endif
activePtcl = -1;
}
void ParticleSet::flex_donePbyP(const RefVector<ParticleSet>& P_list)
{
if (P_list.size() > 1)

View File

@ -36,30 +36,6 @@ class DistanceTableData;
class StructFact;
/** Monte Carlo Data of an ensemble
*
* The quantities are shared by all the nodes in a group
* - NumSamples number of samples
* - Weight total weight of a sample
* - Energy average energy of a sample
* - Variance variance
* - LivingFraction fraction of walkers alive each step.
*/
template<typename T>
struct MCDataType
{
T NumSamples;
T RNSamples;
T Weight;
T Energy;
T AlternateEnergy;
T Variance;
T R2Accepted;
T R2Proposed;
T LivingFraction;
};
/** Specialized paritlce class for atomistic simulations
*
* Derived from QMCTraits, ParticleBase<PtclOnLatticeTraits> and OhmmsElementBase.
@ -88,9 +64,6 @@ public:
quantum_domains quantum_domain;
//@{ public data members
///property of an ensemble represented by this ParticleSet
//MCDataType<FullPrecRealType> EnsembleProperty;
///ParticleLayout
ParticleLayout_t Lattice, PrimitiveLattice;
///Long-range box
@ -157,7 +130,7 @@ public:
SpeciesSet mySpecies;
///Structure factor
StructFact* SK;
std::unique_ptr<StructFact> SK;
///Particle density in G-space for MPC interaction
std::vector<TinyVector<int, OHMMS_DIM>> DensityReducedGvecs;
@ -391,15 +364,12 @@ public:
*/
void acceptMove(Index_t iat, bool partial_table_update = false);
/// batched version of acceptMove
static void flex_acceptMove(const RefVector<ParticleSet>& P_list, Index_t iat, bool partial_table_update = false)
{
for (int iw = 0; iw < P_list.size(); iw++)
P_list[iw].get().acceptMove(iat, partial_table_update);
}
static void flex_acceptMove(const RefVector<ParticleSet>& P_list, Index_t iat, bool partial_table_update = false);
/** reject the move
* @param iat the electron whose proposed move gets rejected.
*/
void rejectMove(Index_t iat) { activePtcl = -1; }
void rejectMove(Index_t iat);
/// batched version of rejectMove
static void flex_rejectMove(const RefVector<ParticleSet>& P_list, Index_t iat)
{

View File

@ -32,7 +32,7 @@
namespace qmcplusplus
{
ParticleSetPool::ParticleSetPool(Communicate* c, const char* aname)
: MPIObjectBase(c), SimulationCell(nullptr), TileMatrix(0)
: MPIObjectBase(c), TileMatrix(0)
{
TileMatrix.diagonal(1);
ClassName = "ParticleSetPool";
@ -41,7 +41,7 @@ ParticleSetPool::ParticleSetPool(Communicate* c, const char* aname)
ParticleSetPool::ParticleSetPool(ParticleSetPool&& other)
: MPIObjectBase(other.myComm),
SimulationCell(other.SimulationCell),
SimulationCell(std::move(other.SimulationCell)),
TileMatrix(other.TileMatrix),
myPool(std::move(other.myPool))
{
@ -49,6 +49,15 @@ ParticleSetPool::ParticleSetPool(ParticleSetPool&& other)
myName = other.myName;
}
ParticleSetPool::~ParticleSetPool()
{
PoolType::const_iterator it(myPool.begin()), it_end(myPool.end());
while (it != it_end)
{
delete (*it).second;
it++;
}
}
ParticleSet* ParticleSetPool::getParticleSet(const std::string& pname)
{
@ -77,13 +86,14 @@ MCWalkerConfiguration* ParticleSetPool::getWalkerSet(const std::string& pname)
return dynamic_cast<MCWalkerConfiguration*>(mc);
}
void ParticleSetPool::addParticleSet(ParticleSet* p)
void ParticleSetPool::addParticleSet(std::unique_ptr<ParticleSet>&& p)
{
PoolType::iterator pit(myPool.find(p->getName()));
if (pit == myPool.end())
{
LOGMSG(" Adding " << p->getName() << " ParticleSet to the pool")
myPool[p->getName()] = p;
auto& pname = p->getName();
LOGMSG(" Adding " << pname << " ParticleSet to the pool")
myPool[pname] = p.release();
}
else
{
@ -105,10 +115,10 @@ bool ParticleSetPool::putLattice(xmlNodePtr cur)
{
ReportEngine PRE("ParticleSetPool", "putLattice");
bool printcell = false;
if (SimulationCell == 0)
if (!SimulationCell)
{
app_debug() << " Creating global supercell " << std::endl;
SimulationCell = new ParticleSet::ParticleLayout_t;
SimulationCell = std::make_unique<ParticleSet::ParticleLayout_t>();
printcell = true;
}
else
@ -371,7 +381,7 @@ ParticleSet* ParticleSetPool::createESParticleSet(xmlNodePtr cur, const std::str
if (SimulationCell == 0)
{
SimulationCell = new ParticleSet::ParticleLayout_t(ions->Lattice);
SimulationCell = std::make_unique<ParticleSet::ParticleLayout_t>(ions->Lattice);
}
if (qp == 0)

View File

@ -39,6 +39,7 @@ public:
* @param aname xml tag
*/
ParticleSetPool(Communicate* c, const char* aname = "particleset");
~ParticleSetPool();
ParticleSetPool(const ParticleSetPool&) = delete;
ParticleSetPool& operator=(const ParticleSetPool&) = delete;
@ -55,13 +56,18 @@ public:
bool putTileMatrix(xmlNodePtr cur);
/** initialize the supercell shared by all the particle sets
*
* return value is never checked anywhere
* side effect SimulationCell UPtr<ParticleLayout_t> is set
* to particle layout created on heap.
* This is later directly assigned to pset member variable Lattice.
*/
bool putLattice(xmlNodePtr cur);
///return true, if the pool is empty
inline bool empty() const { return myPool.empty(); }
///add a ParticleSet* to the pool
void addParticleSet(ParticleSet* p);
///add a ParticleSet* to the pool with ownership transferred
void addParticleSet(std::unique_ptr<ParticleSet>&& p);
/** get a named ParticleSet
* @param pname name of the ParticleSet
* @return a MCWalkerConfiguration object with pname
@ -105,15 +111,15 @@ private:
* - <simulationcell> element
* - the first particleset created with ES-HDF
*/
ParticleSet::ParticleLayout_t* SimulationCell;
std::unique_ptr<ParticleSet::ParticleLayout_t> SimulationCell;
/** tiling matrix
*/
Tensor<int, OHMMS_DIM> TileMatrix;
/** List of ParticleSet
/** List of ParticleSet owned
*
* Each ParticleSet has to have a unique name which is used as a key for the map
* Each ParticleSet has to have a unique name which is used as a key for the map.
*/
std::map<std::string, ParticleSet*> myPool;
PoolType myPool;
/** xml node for random initialization.
*
* randomize() process initializations just before starting qmc sections

View File

@ -206,6 +206,14 @@ public:
//static_cast<Matrix<FullPrecRealType>>(Properties) = 0.0;
}
#if defined(QMC_CUDA)
//some member variables in CUDA build cannot be and should not be copied
//use default copy constructor to skip actual data copy
Walker(const Walker& a) = default;
#else
Walker(const Walker& a) : Properties(1, WP::NUMPROPERTIES, 1, WP::MAXPROPERTIES) { makeCopy(a); }
#endif
inline int addPropertyHistory(int leng)
{
int newL = PropertyHistory.size();
@ -252,8 +260,6 @@ public:
return mean;
}
inline ~Walker() {}
///assignment operator
inline Walker& operator=(const Walker& a)
{
@ -304,6 +310,8 @@ public:
//Drift = a.Drift;
Properties.copy(a.Properties);
DataSet = a.DataSet;
block_end = a.block_end;
scalar_end = a.scalar_end;
if (PropertyHistory.size() != a.PropertyHistory.size())
PropertyHistory.resize(a.PropertyHistory.size());
for (int i = 0; i < PropertyHistory.size(); i++)
@ -436,10 +444,14 @@ public:
DataSet.add(ReleasedNodeAge);
DataSet.add(ReleasedNodeWeight);
// vectors
assert(R.size() != 0);
DataSet.add(R.first_address(), R.last_address());
assert(spins.size() != 0);
DataSet.add(spins.first_address(), spins.last_address());
#if !defined(SOA_MEMORY_OPTIMIZED)
assert(G.size() != 0);
DataSet.add(G.first_address(), G.last_address());
assert(L.size() != 0);
DataSet.add(L.first_address(), L.last_address());
#endif
//Don't add the nLocal but the actual allocated size. We want to register once for the life of a
@ -469,13 +481,18 @@ public:
void copyFromBuffer()
{
assert(DataSet.size() != 0);
DataSet.rewind();
DataSet >> ID >> ParentID >> Generation >> Age >> ReleasedNodeAge >> ReleasedNodeWeight;
// vectors
assert(R.size() != 0);
DataSet.get(R.first_address(), R.last_address());
assert(spins.size() != 0);
DataSet.get(spins.first_address(), spins.last_address());
#if !defined(SOA_MEMORY_OPTIMIZED)
assert(G.size() != 0);
DataSet.get(G.first_address(), G.last_address());
assert(L.size() != 0);
DataSet.get(L.first_address(), L.last_address());
#endif
DataSet.get(Properties.data(), Properties.data() + Properties.capacity());

View File

@ -0,0 +1,158 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign
// Bryan Clark, bclark@Princeton.edu, Princeton University
// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
// Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory
// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#include "WalkerConfigurations.h"
#include <map>
#include "Utilities/IteratorUtility.h"
namespace qmcplusplus
{
WalkerConfigurations::WalkerConfigurations() : LocalNumWalkers(0), GlobalNumWalkers(0) {}
///default destructor
WalkerConfigurations::~WalkerConfigurations() { destroyWalkers(WalkerList.begin(), WalkerList.end()); }
void WalkerConfigurations::createWalkers(int n, size_t numPtcls)
{
if (WalkerList.empty())
{
while (n)
{
Walker_t* awalker = new Walker_t(numPtcls);
WalkerList.push_back(awalker);
--n;
}
}
else
{
if (WalkerList.size() >= n)
{
int iw = WalkerList.size(); //copy from the back
for (int i = 0; i < n; ++i)
{
WalkerList.push_back(new Walker_t(*WalkerList[--iw]));
}
}
else
{
int nc = n / WalkerList.size();
int nw0 = WalkerList.size();
for (int iw = 0; iw < nw0; ++iw)
{
for (int ic = 0; ic < nc; ++ic)
WalkerList.push_back(new Walker_t(*WalkerList[iw]));
}
n -= nc * nw0;
while (n > 0)
{
WalkerList.push_back(new Walker_t(*WalkerList[--nw0]));
--n;
}
}
}
}
void WalkerConfigurations::resize(int numWalkers, size_t numPtcls)
{
int dn = numWalkers - WalkerList.size();
if (dn > 0)
createWalkers(dn, numPtcls);
if (dn < 0)
{
int nw = -dn;
if (nw < WalkerList.size())
{
iterator it = WalkerList.begin();
while (nw)
{
delete *it;
++it;
--nw;
}
WalkerList.erase(WalkerList.begin(), WalkerList.begin() - dn);
}
}
}
///returns the next valid iterator
WalkerConfigurations::iterator WalkerConfigurations::destroyWalkers(iterator first, iterator last)
{
iterator it = first;
while (it != last)
{
delete *it++;
}
return WalkerList.erase(first, last);
}
void WalkerConfigurations::createWalkers(iterator first, iterator last)
{
destroyWalkers(WalkerList.begin(), WalkerList.end());
while (first != last)
{
WalkerList.push_back(new Walker_t(**first));
++first;
}
}
void WalkerConfigurations::destroyWalkers(int nw)
{
if (nw > WalkerList.size())
{
app_warning() << " Cannot remove walkers. Current Walkers = " << WalkerList.size() << std::endl;
return;
}
nw = WalkerList.size() - nw;
int iw = nw;
while (iw < WalkerList.size())
{
delete WalkerList[iw++];
}
WalkerList.erase(WalkerList.begin() + nw, WalkerList.end());
}
void WalkerConfigurations::copyWalkers(iterator first, iterator last, iterator it)
{
while (first != last)
{
(*it++)->makeCopy(**first++);
}
}
/** Make Metropolis move to the walkers and save in a temporary array.
* @param it the iterator of the first walker to work on
* @param tauinv inverse of the time step
*
* R + D + X
*/
void WalkerConfigurations::reset()
{
iterator it(WalkerList.begin()), it_end(WalkerList.end());
while (it != it_end)
//(*it)->reset();++it;}
{
(*it)->Weight = 1.0;
(*it)->Multiplicity = 1.0;
++it;
}
}
} // namespace qmcplusplus

View File

@ -0,0 +1,201 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign
// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
// Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory
// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
/** @file WalkerConfigurations.h
* @brief Declaration of a WalkerConfigurations
*/
#ifndef QMCPLUSPLUS_WALKERCONFIGURATIONS_H
#define QMCPLUSPLUS_WALKERCONFIGURATIONS_H
#include "Configuration.h"
#include "Particle/Walker.h"
#include "Utilities/IteratorUtility.h"
namespace qmcplusplus
{
/** Monte Carlo Data of an ensemble
*
* The quantities are shared by all the nodes in a group
* - NumSamples number of samples
* - Weight total weight of a sample
* - Energy average energy of a sample
* - Variance variance
* - LivingFraction fraction of walkers alive each step.
*/
template<typename T>
struct MCDataType
{
T NumSamples;
T RNSamples;
T Weight;
T Energy;
T AlternateEnergy;
T Variance;
T R2Accepted;
T R2Proposed;
T LivingFraction;
};
/** A set of light weight walkers that are carried between driver sections and restart
*/
class WalkerConfigurations
{
public:
/// walker type
using Walker_t = Walker<QMCTraits, PtclOnLatticeTraits>;
using FullPrecRealType = QMCTraits::FullPrecRealType;
///container type of Walkers
using WalkerList_t = std::vector<Walker_t*>;
/// FIX: a type alias of iterator for an object should not be for just one of many objects it holds.
using iterator = WalkerList_t::iterator;
///const_iterator of Walker container
using const_iterator = WalkerList_t::const_iterator;
/** starting index of the walkers in a processor group
*
* WalkerOffsets[0]=0 and WalkerOffsets[WalkerOffsets.size()-1]=total number of walkers in a group
* WalkerOffsets[processorid+1]-WalkerOffsets[processorid] is equal to the number of walkers on a processor,
* i.e., W.getActiveWalkers().
* WalkerOffsets is added to handle parallel I/O with hdf5
*/
std::vector<int> WalkerOffsets;
MCDataType<FullPrecRealType> EnsembleProperty;
WalkerConfigurations();
~WalkerConfigurations();
WalkerConfigurations(const WalkerConfigurations&) = delete;
WalkerConfigurations& operator=(const WalkerConfigurations&) = delete;
WalkerConfigurations(WalkerConfigurations&&) = default;
WalkerConfigurations& operator=(WalkerConfigurations&&) = default;
/** create numWalkers Walkers
*
* Append Walkers to WalkerList.
*/
void createWalkers(int numWalkers, size_t numPtcls);
/** create walkers
* @param first walker iterator
* @param last walker iterator
*/
void createWalkers(iterator first, iterator last);
/** copy walkers
* @param first input walker iterator
* @param last input walker iterator
* @param start first target iterator
*
* No memory allocation is allowed.
*/
void copyWalkers(iterator first, iterator last, iterator start);
/** destroy Walkers from itstart to itend
*@param first starting iterator of the walkers
*@param last ending iterator of the walkers
*/
iterator destroyWalkers(iterator first, iterator last);
/** destroy Walkers
*@param nw number of walkers to be destroyed
*/
void destroyWalkers(int nw);
///clean up the walker list and make a new list
void resize(int numWalkers, size_t numPtcls);
///return the number of active walkers
inline size_t getActiveWalkers() const { return WalkerList.size(); }
///return the total number of active walkers among a MPI group
inline size_t getGlobalNumWalkers() const { return GlobalNumWalkers; }
///return the total number of active walkers among a MPI group
inline void setGlobalNumWalkers(size_t nw)
{
GlobalNumWalkers = nw;
EnsembleProperty.NumSamples = nw;
EnsembleProperty.Weight = nw;
}
inline void setWalkerOffsets(const std::vector<int>& o) { WalkerOffsets = o; }
/// return the first iterator
inline iterator begin() { return WalkerList.begin(); }
/// return the last iterator, [begin(), end())
inline iterator end() { return WalkerList.end(); }
/// return the first const_iterator
inline const_iterator begin() const { return WalkerList.begin(); }
/// return the last const_iterator [begin(), end())
inline const_iterator end() const { return WalkerList.end(); }
/**@}*/
/** clear the WalkerList without destroying them
*
* Provide std::vector::clear interface
*/
inline void clear() { WalkerList.clear(); }
/** insert elements
* @param it locator where the inserting begins
* @param first starting iterator
* @param last ending iterator
*
* Provide std::vector::insert interface
*/
template<class INPUT_ITER>
inline void insert(iterator it, INPUT_ITER first, INPUT_ITER last)
{
WalkerList.insert(it, first, last);
}
/** add Walker_t* at the end
* @param awalker pointer to a walker
*
* Provide std::vector::push_back interface
*/
inline void push_back(Walker_t* awalker) { WalkerList.push_back(awalker); }
/** delete the last Walker_t*
*
* Provide std::vector::pop_back interface
*/
inline void pop_back()
{
delete WalkerList.back();
WalkerList.pop_back();
}
inline Walker_t* operator[](int i) { return WalkerList[i]; }
inline const Walker_t* operator[](int i) const { return WalkerList[i]; }
/** reset the Walkers
*/
void reset();
protected:
///number of walkers on a node
int LocalNumWalkers;
///number of walkers shared by a MPI group
size_t GlobalNumWalkers;
public:
///a collection of walkers
WalkerList_t WalkerList;
};
} // namespace qmcplusplus
#endif

View File

@ -60,9 +60,9 @@ TEST_CASE("ParticleSetPool", "[qmcapp]")
ParticleSet* ws = pp.getWalkerSet("ion0");
REQUIRE(ws != NULL);
ParticleSet* ps2 = new ParticleSet();
auto ps2 = std::make_unique<ParticleSet>();
ps2->setName("particle_set_2");
pp.addParticleSet(ps2);
pp.addParticleSet(std::move(ps2));
// should do nothing, since no random particlesets were specified
pp.randomize();

View File

@ -41,14 +41,14 @@ TEST_CASE("SampleStack", "[particle]")
REQUIRE(samples.getNumSamples() == 0);
using Walker_t = ParticleSet::Walker_t;
using WalkerList_t = std::vector<Walker_t*>;
using WalkerList_t = std::vector<std::unique_ptr<Walker_t>>;
WalkerList_t walker_list;
// Add size one list
walker_list.push_back(new Walker_t(total_num));
walker_list.push_back(std::make_unique<Walker_t>(total_num));
walker_list[0]->R[0][0] = 1.1;
for (auto wi : walker_list)
for (auto& wi : walker_list)
{
samples.appendSample(MCSample(*wi));
}

View File

@ -72,7 +72,9 @@ TEST_CASE("walker HDF read and write", "[particle]")
// This method sets ownership to false so class does not attempt to
// free the walker elements.
W.copyWalkerRefs(&w1, &w2);
W.createWalkers(2);
W[0]->R = w1.R;
W[1]->R = w2.R;
REQUIRE(W.getActiveWalkers() == 2);

View File

@ -88,12 +88,14 @@ namespace BLAS
zaxpy(n, x, a, incx, b, incy);
}
inline static float norm2(int n, const float* a, int incx = 1) { return snrm2(n, a, incx); }
inline static float norm2(int n, const std::complex<float>* a, int incx = 1) { return scnrm2(n, a, incx); }
inline static double norm2(int n, const double* a, int incx = 1) { return dnrm2(n, a, incx); }
inline static double norm2(int n, const std::complex<double>* a, int incx = 1) { return dznrm2(n, a, incx); }
inline static float norm2(int n, const float* a, int incx = 1) { return snrm2(n, a, incx); }
inline static void scal(int n, float alpha, float* x, int incx = 1) { sscal(n, alpha, x, incx); }
inline static void scal(int n, std::complex<float> alpha, std::complex<float>* x, int incx = 1)
@ -112,10 +114,6 @@ namespace BLAS
inline static void scal(int n, float alpha, std::complex<float>* x, int incx = 1) { csscal(n, alpha, x, incx); }
//inline static
//void gemv(char trans, int n, int m, const double* amat, const double* x, double* y) {
// dgemv(trans, n, m, done, amat, n, x, INCX, dzero, y, INCY);
//}
inline static void gemv(int n, int m, const double* restrict amat, const double* restrict x, double* restrict y)
{
dgemv(NOTRANS, m, n, done, amat, m, x, INCX, dzero, y, INCY);
@ -232,190 +230,6 @@ namespace BLAS
cgemv(trans_in, n, m, alpha, amat, lda, x, incx, beta, y, incy);
}
#if defined(HAVE_MKL)
inline static void gemv(char trans_in,
int n,
int m,
const std::complex<double>& alpha,
const double* restrict amat,
int lda,
const std::complex<double>* restrict x,
int incx,
const std::complex<double>& beta,
std::complex<double>* y,
int incy)
{
dzgemv(trans_in, n, m, alpha, amat, lda, x, incx, beta, y, incy);
}
inline static void gemv(char trans_in,
int n,
int m,
const std::complex<float>& alpha,
const float* restrict amat,
int lda,
const std::complex<float>* restrict x,
int incx,
const std::complex<float>& beta,
std::complex<float>* y,
int incy)
{
scgemv(trans_in, n, m, alpha, amat, lda, x, incx, beta, y, incy);
}
inline static void gemm_batch(const CBLAS_LAYOUT Layout,
const CBLAS_TRANSPOSE* transa_array,
const CBLAS_TRANSPOSE* transb_array,
const int* m_array,
const int* n_array,
const int* k_array,
const float* alpha_array,
const void** a_array,
const int* lda_array,
const void** b_array,
const int* ldb_array,
const void* beta_array,
void** c_array,
const int* ldc_array,
const int group_count,
const int* group_size)
{
cblas_sgemm_batch(Layout, transa_array, transb_array, m_array, n_array, k_array, alpha_array, a_array, lda_array,
b_array, ldb_array, beta_array, c_array, ldc_array, group_count, group_size);
}
inline static void gemm_batch(const CBLAS_LAYOUT Layout,
const CBLAS_TRANSPOSE* transa_array,
const CBLAS_TRANSPOSE* transb_array,
const int* m_array,
const int* n_array,
const int* k_array,
const double* alpha_array,
const void** a_array,
const int* lda_array,
const void** b_array,
const int* ldb_array,
const void* beta_array,
void** c_array,
const int* ldc_array,
const int group_count,
const int* group_size)
{
cblas_dgemm_batch(Layout, transa_array, transb_array, m_array, n_array, k_array, alpha_array, a_array, lda_array,
b_array, ldb_array, beta_array, c_array, ldc_array, group_count, group_size);
}
inline static void gemm_batch(const CBLAS_LAYOUT Layout,
const CBLAS_TRANSPOSE* transa_array,
const CBLAS_TRANSPOSE* transb_array,
const int* m_array,
const int* n_array,
const int* k_array,
const std::complex<float>* alpha_array,
const void** a_array,
const int* lda_array,
const void** b_array,
const int* ldb_array,
const void* beta_array,
void** c_array,
const int* ldc_array,
const int group_count,
const int* group_size)
{
cblas_cgemm_batch(Layout, transa_array, transb_array, m_array, n_array, k_array, alpha_array, a_array, lda_array,
b_array, ldb_array, beta_array, c_array, ldc_array, group_count, group_size);
}
inline static void gemm_batch(const CBLAS_LAYOUT Layout,
const CBLAS_TRANSPOSE* transa_array,
const CBLAS_TRANSPOSE* transb_array,
const int* m_array,
const int* n_array,
const int* k_array,
const std::complex<double>* alpha_array,
const void** a_array,
const int* lda_array,
const void** b_array,
const int* ldb_array,
const void* beta_array,
void** c_array,
const int* ldc_array,
const int group_count,
const int* group_size)
{
cblas_zgemm_batch(Layout, transa_array, transb_array, m_array, n_array, k_array, alpha_array, a_array, lda_array,
b_array, ldb_array, beta_array, c_array, ldc_array, group_count, group_size);
}
#endif
template<typename T>
inline static void gemmStridedBatched(char Atrans,
char Btrans,
int M,
int N,
int K,
T alpha,
const T* A,
int lda,
int strideA,
const T* restrict B,
int ldb,
int strideB,
T beta,
T* restrict C,
int ldc,
int strideC,
int batchSize)
{
#ifdef HAVE_MKL
// MKL has batched gemm, but with pointer interface. Translate here
std::vector<const void*> Aptrs(batchSize);
std::vector<const void*> Bptrs(batchSize);
std::vector<const void*> Cptrs(batchSize);
for (int i = 0; i < batchSize; i++)
{
Aptrs[i] = static_cast<const void*>(A + i * strideA);
Bptrs[i] = static_cast<const void*>(B + i * strideB);
Cptrs[i] = static_cast<const void*>(C + i * strideC);
}
// Expect arrays of size group_count.
// This is 1 in strided case, so passing pointers to everything
gemm_batch(CblasColMajor, &Atrans, &Btrans, &M, &N, &K, &alpha, Aptrs.data(), &lda, Bptrs.data(), &ldb, &beta,
Cptrs.data(), &ldc, 1, &batchSize);
#else
// No batched gemm, :-( gemm loop
for (int i = 0; i < batchSize; i++)
gemm(Atrans, Btrans, M, N, K, alpha, A + i * strideA, lda, B + i * strideB, ldb, beta, C + i * strideC, ldc);
#endif
}
template<typename T>
inline static void gemmBatched(char Atrans,
char Btrans,
int M,
int N,
int K,
T alpha,
T const** A,
int lda,
T const** B,
int ldb,
T beta,
T** C,
int ldc,
int batchSize)
{
#ifdef HAVE_MKL
gemm_batch(CblasColMajor, &Atrans, &Btrans, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc, 1, &batchSize);
#else
// No batched gemm, :-( gemm loop
for (int i = 0; i < batchSize; i++)
gemm(Atrans, Btrans, M, N, K, alpha, A[i], lda, B[i], ldb, beta, C[i], ldc);
#endif
}
inline static void gemm(char Atrans,
char Btrans,
int M,
@ -484,77 +298,6 @@ namespace BLAS
cgemm(Atrans, Btrans, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
}
// inline static
// void symv(char uplo, int n, const double alpha, double* a, int lda,
// double* x, const int incx, const double beta, double* y,
// const int incy) {
// dsymv(&uplo,&n,&alpha,a,&lda,x,&incx,&beta,y,&incy);
// }
// inline static
// void symv(char uplo, int n, const std::complex<double> alpha,
// std::complex<double>* a, int lda, std::complex<double>* x, const int incx,
// const std::complex<double> beta, std::complex<double>* y, const int incy) {
// zsymv(&uplo,&n,&alpha,a,&lda,x,&incx,&beta,y,&incy);
// }
// inline static
// void symv(const char uplo, int n, const float alpha, float* a, int lda,
// float* x, const int incx, const float beta, float* y,
// const int incy) {
// ssymv(&uplo,&n,&alpha,a,&lda,x,&incx,&beta,y,&incy);
// }
// inline static
// void symv(const char uplo, int n, const std::complex<float> alpha,
// std::complex<float>* a, int lda, std::complex<float>* x, const int incx,
// const std::complex<float> beta, std::complex<float>* y, const int incy) {
// csymv(&uplo,&n,&alpha,a,&lda,x,&incx,&beta,y,&incy);
// }
// inline static
// void symv(int n, double alpha, double* a, double* x, double* y) {
// dsymv(&UPLO,&n,&alpha,a,&n,x,&INCX,&dzero,y,&INCY);
// }
// inline static
// void symv(int n, const double* a, const double* x, double* y) {
// dsymv(&UPLO,&n,&done,a,&n,x,&INCX,&dzero,y,&INCY);
// }
// inline static
// void symv(int n, float alpha, float* a, float* x, float* y) {
// ssymv(&UPLO,&n,&alpha,a,&n,x,&INCX,&szero,y,&INCY);
// }
// inline static
// void symv(int n, float* a, float* x, float* y) {
// ssymv(&UPLO,&n,&sone,a,&n,x,&INCX,&szero,y,&INCY);
// }
// inline static void
// symv(int n, std::complex<double> alpha, std::complex<double>* a, std::complex<double>* x,
// std::complex<double>* y) {
// zsymv(&UPLO,&n,&alpha,a,&n,x,&INCX,&zzero,y,&INCY);
// }
// inline static
// void symv(int n, std::complex<double>* a, std::complex<double>* x, std::complex<double>* y) {
// zsymv(&UPLO,&n,&zone,a,&n,x,&INCX,&zzero,y,&INCY);
// }
// inline static void
// symv(int n, std::complex<float> alpha, std::complex<float>* a, std::complex<float>* x,
// std::complex<float>* y) {
// csymv(&UPLO,&n,&alpha,a,&n,x,&INCX,&czero,y,&INCY);
// }
// inline static
// void symv(int n, std::complex<float>* a, std::complex<float>* x, std::complex<float>* y) {
// csymv(&UPLO,&n,&cone,a,&n,x,&INCX,&czero,y,&INCY);
// }
template<typename T>
inline static T dot(int n, const T* restrict a, const T* restrict b)
{

View File

@ -26,6 +26,7 @@
#define zaxpy zaxpy_
#define dnrm2 dnrm2_
#define snrm2 snrm2_
#define scnrm2 scnrm2_
#define dznrm2 dznrm2_
#define dsymv dsymv_
#define ssymv ssymv_
@ -107,29 +108,13 @@
#define dpotrf dpotrf_
#define zpotrf zpotrf_
#if defined(HAVE_MKL)
#define dzgemv dzgemv_
#define scgemv scgemv_
#define dzgemm dzgemm_
#define scgemm scgemm_
typedef enum
{
CblasRowMajor = 101,
CblasColMajor = 102
} CBLAS_LAYOUT;
typedef enum
{
CblasNoTrans = 111,
CblasTrans = 112,
CblasConjTrans = 113
} CBLAS_TRANSPOSE;
#endif
#endif
// declaring Fortran interfaces
extern "C"
{
void saxpy(const int& n, const float& da, const float* dx, const int& incx, float* dy, const int& incy);
void caxpy(const int& n,
const std::complex<float>& da,
const std::complex<float>* dx,
@ -139,8 +124,6 @@ extern "C"
void daxpy(const int& n, const double& da, const double* dx, const int& incx, double* dy, const int& incy);
void saxpy(const int& n, const float& da, const float* dx, const int& incx, float* dy, const int& incy);
void zaxpy(const int& n,
const std::complex<double>& da,
const std::complex<double>* dx,
@ -149,18 +132,18 @@ extern "C"
const int& incy);
double dnrm2(const int& n, const double* dx, const int& incx);
float snrm2(const int& n, const float* dx, const int& incx);
float scnrm2(const int& n, const std::complex<float>* dx, const int& incx);
double dnrm2(const int& n, const double* dx, const int& incx);
double dznrm2(const int& n, const std::complex<double>* dx, const int& incx);
double dscal(const int& n, const double&, double* x, const int&);
double sscal(const int& n, const float&, float* x, const int&);
double cscal(const int& n, const std::complex<float>&, std::complex<float>* x, const int&);
double zscal(const int& n, const std::complex<double>&, std::complex<double>* x, const int&);
double zdscal(const int& n, const double&, std::complex<double>* x, const int&);
double csscal(const int& n, const float&, std::complex<float>* x, const int&);
void sscal(const int& n, const float&, float* x, const int&);
void cscal(const int& n, const std::complex<float>&, std::complex<float>* x, const int&);
void dscal(const int& n, const double&, double* x, const int&);
void zscal(const int& n, const std::complex<double>&, std::complex<double>* x, const int&);
void csscal(const int& n, const float&, std::complex<float>* x, const int&);
void zdscal(const int& n, const double&, std::complex<double>* x, const int&);
void dsymv(const char& uplo,
const int& n,
@ -340,130 +323,6 @@ extern "C"
std::complex<float>* cv,
const int& incy);
#if defined(HAVE_MKL)
void dzgemm(const char&,
const char&,
const int&,
const int&,
const int&,
const std::complex<double>&,
const double*,
const int&,
const std::complex<double>*,
const int&,
const std::complex<double>&,
std::complex<double>*,
const int&);
void scgemm(const char&,
const char&,
const int&,
const int&,
const int&,
const std::complex<float>&,
const float*,
const int&,
const std::complex<float>*,
const int&,
const std::complex<float>&,
std::complex<float>*,
const int&);
void dzgemv(const char& trans,
const int& nr,
const int& nc,
const std::complex<double>& alpha,
const double* amat,
const int& lda,
const std::complex<double>* bv,
const int& incx,
const std::complex<double>& beta,
std::complex<double>* cv,
const int& incy);
void scgemv(const char& trans,
const int& nr,
const int& nc,
const std::complex<float>& alpha,
const float* amat,
const int& lda,
const std::complex<float>* bv,
const int& incx,
const std::complex<float>& beta,
std::complex<float>* cv,
const int& incy);
void cblas_sgemm_batch(const CBLAS_LAYOUT Layout,
const CBLAS_TRANSPOSE* transa_array,
const CBLAS_TRANSPOSE* transb_array,
const int* m_array,
const int* n_array,
const int* k_array,
const void* alpha_array,
const void** a_array,
const int* lda_array,
const void** b_array,
const int* ldb_array,
const void* beta_array,
void** c_array,
const int* ldc_array,
const int group_count,
const int* group_size);
void cblas_cgemm_batch(const CBLAS_LAYOUT Layout,
const CBLAS_TRANSPOSE* transa_array,
const CBLAS_TRANSPOSE* transb_array,
const int* m_array,
const int* n_array,
const int* k_array,
const void* alpha_array,
const void** a_array,
const int* lda_array,
const void** b_array,
const int* ldb_array,
const void* beta_array,
void** c_array,
const int* ldc_array,
const int group_count,
const int* group_size);
void cblas_dgemm_batch(const CBLAS_LAYOUT Layout,
const CBLAS_TRANSPOSE* transa_array,
const CBLAS_TRANSPOSE* transb_array,
const int* m_array,
const int* n_array,
const int* k_array,
const void* alpha_array,
const void** a_array,
const int* lda_array,
const void** b_array,
const int* ldb_array,
const void* beta_array,
void** c_array,
const int* ldc_array,
const int group_count,
const int* group_size);
void cblas_zgemm_batch(const CBLAS_LAYOUT Layout,
const CBLAS_TRANSPOSE* transa_array,
const CBLAS_TRANSPOSE* transb_array,
const int* m_array,
const int* n_array,
const int* k_array,
const void* alpha_array,
const void** a_array,
const int* lda_array,
const void** b_array,
const int* ldb_array,
const void* beta_array,
void** c_array,
const int* ldc_array,
const int group_count,
const int* group_size);
#endif
void dsyrk(const char&,
const char&,
const int&,

View File

@ -12,6 +12,7 @@
SET(CUDA_SRCS
cuBLAS_missing_functions.cu
CUDAfill.cpp
)
CUDA_ADD_LIBRARY(platform_cuda ${CUDA_SRCS})

View File

@ -25,6 +25,7 @@
#include "config.h"
#include "cudaError.h"
#include "allocator_traits.hpp"
#include "CUDAfill.hpp"
#include "CPU/SIMD/alignment.config.h"
namespace qmcplusplus
@ -119,6 +120,7 @@ template<typename T>
struct allocator_traits<CUDAAllocator<T>>
{
const static bool is_host_accessible = false;
static void fill_n(T* ptr, size_t n, const T& value) { CUDAfill_n(ptr, n, value); }
};
/** allocator for CUDA host pinned memory
@ -191,7 +193,8 @@ struct CUDALockedPageAllocator : public ULPHA
{
static_assert(std::is_same<T, value_type>::value, "CUDALockedPageAllocator and ULPHA data types must agree!");
value_type* pt = ULPHA::allocate(n);
cudaErrorCheck(cudaHostRegister(pt, n*sizeof(T), cudaHostRegisterDefault), "cudaHostRegister failed in CUDALockedPageAllocator!");
cudaErrorCheck(cudaHostRegister(pt, n * sizeof(T), cudaHostRegisterDefault),
"cudaHostRegister failed in CUDALockedPageAllocator!");
return pt;
}

View File

@ -0,0 +1,37 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2021 QMCPACK developers.
//
// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//
// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#include "CUDAfill.hpp"
#include <stdexcept>
#include <cuda_runtime_api.h>
#include "cudaError.h"
namespace qmcplusplus
{
template<typename T>
void CUDAfill_n(T* ptr, size_t n, const T& value)
{
if (value != T())
throw std::runtime_error("CUDAfill_n doesn't support fill non T() values!");
// setting 0 value on each byte should be 0 for int, float and double.
cudaErrorCheck(cudaMemset(ptr, 0, n * sizeof(T)), "Memset failed in CUDAfill_n!");
}
template void CUDAfill_n<int>(int* ptr, size_t n, const int& value);
template void CUDAfill_n<size_t>(size_t* ptr, size_t n, const size_t& value);
template void CUDAfill_n<float>(float* ptr, size_t n, const float& value);
template void CUDAfill_n<double>(double* ptr, size_t n, const double& value);
template void CUDAfill_n<std::complex<float>>(std::complex<float>* ptr, size_t n, const std::complex<float>& value);
template void CUDAfill_n<std::complex<double>>(std::complex<double>* ptr, size_t n, const std::complex<double>& value);
} // namespace qmcplusplus

View File

@ -0,0 +1,39 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2021 QMCPACK developers.
//
// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//
// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#ifndef QMCPLUSPLUS_CUDAFILL_H
#define QMCPLUSPLUS_CUDAFILL_H
#include <complex>
#include <stdexcept>
#include <cstddef>
namespace qmcplusplus
{
template<typename T>
void CUDAfill_n(T* ptr, size_t n, const T& value);
extern template void CUDAfill_n<int>(int* ptr, size_t n, const int& value);
extern template void CUDAfill_n<size_t>(size_t* ptr, size_t n, const size_t& value);
extern template void CUDAfill_n<float>(float* ptr, size_t n, const float& value);
extern template void CUDAfill_n<double>(double* ptr, size_t n, const double& value);
extern template void CUDAfill_n<std::complex<float>>(std::complex<float>* ptr,
size_t n,
const std::complex<float>& value);
extern template void CUDAfill_n<std::complex<double>>(std::complex<double>* ptr,
size_t n,
const std::complex<double>& value);
} // namespace qmcplusplus
#endif

View File

@ -13,6 +13,8 @@
#ifndef QMCPLUSPLUS_ACCESS_TRAITS_H
#define QMCPLUSPLUS_ACCESS_TRAITS_H
#include <algorithm>
namespace qmcplusplus
{
/** template class defines whether the memory allocated by the allocator is host accessible
@ -21,6 +23,11 @@ template<class Allocator>
struct allocator_traits
{
const static bool is_host_accessible = true;
template<typename T>
static void fill_n(T* ptr, size_t n, const T& value)
{
std::fill_n(ptr, n, value);
}
};
template<class Allocator>

View File

@ -72,9 +72,6 @@ void QMCAppBase::saveXml()
if (!XmlDocStack.empty())
{
std::string newxml(myProject.CurrentMainRoot());
//string newxml(myProject.CurrentRoot());
//myProject.PreviousRoot(newxml);
//myProject.rewind();
newxml.append(".cont.xml");
app_log() << "\n========================================================="
<< "\n A new xml input file : " << newxml << std::endl;
@ -82,5 +79,5 @@ void QMCAppBase::saveXml()
}
}
std::string& QMCAppBase::getTitle() { return myProject.m_title; }
const std::string& QMCAppBase::getTitle() const { return myProject.getTitle(); }
} // namespace qmcplusplus

View File

@ -57,7 +57,7 @@ public:
/** execute the main function */
virtual bool execute() = 0;
std::string& getTitle();
const std::string& getTitle() const;
protected:
///stack of xml document

View File

@ -280,7 +280,7 @@ bool QMCMain::execute()
HDFVersion cur_version;
v_str << cur_version[0] << " " << cur_version[1];
xmlNodePtr newmcptr = xmlNewNode(NULL, (const xmlChar*)"mcwalkerset");
xmlNewProp(newmcptr, (const xmlChar*)"fileroot", (const xmlChar*)myProject.CurrentMainRoot());
xmlNewProp(newmcptr, (const xmlChar*)"fileroot", (const xmlChar*)myProject.CurrentMainRoot().c_str());
xmlNewProp(newmcptr, (const xmlChar*)"node", (const xmlChar*)"-1");
xmlNewProp(newmcptr, (const xmlChar*)"nprocs", (const xmlChar*)np_str.str().c_str());
xmlNewProp(newmcptr, (const xmlChar*)"version", (const xmlChar*)v_str.str().c_str());
@ -320,7 +320,7 @@ void QMCMain::executeLoop(xmlNodePtr cur)
if (cname == "qmc")
{
//prevent completed is set
bool success = executeQMCSection(tcur, iter > 0);
bool success = executeQMCSection(tcur, true);
if (!success)
{
app_warning() << " Terminated loop execution. A sub section returns false." << std::endl;
@ -331,6 +331,8 @@ void QMCMain::executeLoop(xmlNodePtr cur)
tcur = tcur->next;
}
}
// Destroy the last driver at the end of a loop with no further reuse of a driver needed.
last_driver.reset(nullptr);
}
bool QMCMain::executeQMCSection(xmlNodePtr cur, bool reuse)
@ -558,38 +560,39 @@ bool QMCMain::processPWH(xmlNodePtr cur)
/** prepare for a QMC run
* @param cur qmc element
* @param reuse if true, the current call is from a loop
* @return true, if a valid QMCDriver is set.
*/
bool QMCMain::runQMC(xmlNodePtr cur, bool reuse)
{
std::unique_ptr<QMCDriverInterface> qmc_driver;
std::string prev_config_file = last_driver ? last_driver->get_root_name() : "";
bool append_run = false;
if (!population_)
{
population_.reset(new MCPopulation(myComm->size(), *qmcSystem, ptclPool->getParticleSet("e"), psiPool->getPrimary(),
hamPool->getPrimary(), myComm->rank()));
}
if (reuse)
if (reuse && last_driver)
qmc_driver = std::move(last_driver);
else
{
QMCDriverFactory driver_factory;
QMCDriverFactory::DriverAssemblyState das = driver_factory.readSection(myProject.m_series, cur);
qmc_driver = driver_factory.newQMCDriver(std::move(last_driver), myProject.m_series, cur, das, *qmcSystem,
*ptclPool, *psiPool, *hamPool, *population_, myComm);
QMCDriverFactory driver_factory(myProject);
QMCDriverFactory::DriverAssemblyState das = driver_factory.readSection(cur);
qmc_driver = driver_factory.createQMCDriver(cur, das, *qmcSystem, *ptclPool, *psiPool, *hamPool, myComm);
append_run = das.append_run;
}
if (qmc_driver)
{
if (last_branch_engine_legacy_driver)
{
last_branch_engine_legacy_driver->resetRun(cur);
qmc_driver->setBranchEngine(std::move(last_branch_engine_legacy_driver));
}
//advance the project id
//if it is NOT the first qmc node and qmc/@append!='yes'
if (!FirstQMC && !append_run)
myProject.advance();
qmc_driver->setStatus(myProject.CurrentMainRoot(), prev_config_file, append_run);
qmc_driver->setStatus(myProject.CurrentMainRoot(), "", append_run);
// PD:
// Q: How does m_walkerset_in end up being non empty?
// A: Anytime that we aren't doing a restart.
@ -603,12 +606,13 @@ bool QMCMain::runQMC(xmlNodePtr cur, bool reuse)
infoSummary.flush();
infoLog.flush();
Timer qmcTimer;
NewTimer* t1 = timer_manager.createTimer(qmc_driver->getEngineName(), timer_level_coarse);
t1->start();
qmc_driver->run();
t1->stop();
app_log() << " QMC Execution time = " << std::setprecision(4) << qmcTimer.elapsed() << " secs" << std::endl;
last_driver = std::move(qmc_driver);
// transfer the states of a driver before its destruction
last_branch_engine_legacy_driver = qmc_driver->getBranchEngine();
// save the driver in a driver loop
if (reuse)
last_driver = std::move(qmc_driver);
return true;
}
else

View File

@ -22,6 +22,7 @@
#include "QMCDrivers/QMCDriverFactory.h"
#include "QMCApp/QMCMainState.h"
#include "QMCApp/QMCAppBase.h"
#include "QMCDrivers/SimpleFixedNodeBranch.h"
namespace qmcplusplus
{
@ -47,7 +48,10 @@ private:
///flag to indicate that a qmc is the first QMC
bool FirstQMC;
/// the last driver object. Should be in a loop only.
std::unique_ptr<QMCDriverInterface> last_driver;
/// last branch engine used by legacy drivers
std::unique_ptr<SimpleFixedNodeBranch> last_branch_engine_legacy_driver;
///xml mcwalkerset elements for output
std::vector<xmlNodePtr> m_walkerset;

View File

@ -25,7 +25,6 @@
#include "Message/MPIObjectBase.h"
#include "Particle/ParticleSetPool.h"
#include "QMCDrivers/QMCDriverFactory.h"
#include "QMCDrivers/MCPopulation.h"
#include "type_traits/template_types.hpp"
class Communicate;
@ -66,8 +65,6 @@ struct QMCMainState : public MPIObjectBase
*/
std::unique_ptr<HamiltonianPool> hamPool;
UPtr<MCPopulation> population_;
/** default constructor **/
QMCMainState(Communicate* c);

View File

@ -35,16 +35,16 @@ TEST_CASE("ProjectData", "[ohmmsapp]")
ProjectData proj1;
// If no name given, it gets set to time and date
// and the title is set equal to the name
REQUIRE(std::string(proj1.m_title).size() > 0);
REQUIRE(std::string(proj1.m_title) == proj1.getName());
REQUIRE(proj1.getTitle().size() > 0);
REQUIRE(proj1.getTitle() == proj1.getName());
ProjectData proj2("test");
REQUIRE(proj2.m_series == 0);
REQUIRE(proj2.getSeriesIndex() == 0);
proj2.advance();
REQUIRE(proj2.m_series == 1);
REQUIRE(proj2.getSeriesIndex() == 1);
REQUIRE(proj2.m_title == std::string("asample"));
REQUIRE(proj2.getTitle() == std::string("asample"));
REQUIRE(proj2.getName() == std::string("test"));
proj2.setCommunicator(c);
@ -67,7 +67,7 @@ TEST_CASE("ProjectData::put no series", "[ohmmsapp]")
xmlNodePtr root = doc.getRoot();
proj.put(root);
REQUIRE(proj.m_series == 0);
REQUIRE(proj.getSeriesIndex() == 0);
}
TEST_CASE("ProjectData::put with series", "[ohmmsapp]")
@ -85,7 +85,7 @@ TEST_CASE("ProjectData::put with series", "[ohmmsapp]")
xmlNodePtr root = doc.getRoot();
proj.put(root);
REQUIRE(proj.m_series == 1);
REQUIRE(proj.getSeriesIndex() == 1);
// host and date nodes get added for output to the .cont.xml file
}

View File

@ -258,5 +258,4 @@ void BranchIO<SFNB>::bcast_state()
}
template class BranchIO<SimpleFixedNodeBranch>;
template class BranchIO<SFNBranch>;
} // namespace qmcplusplus

View File

@ -13,9 +13,6 @@
#include "QMCDrivers/SimpleFixedNodeBranch.h"
#include "QMCDrivers/SFNBranch.h"
//#include <boost/archive/text_oarchive.hpp>
#ifndef QMCPLUSPLUS_BRANCHIO_H
#define QMCPLUSPLUS_BRANCHIO_H
@ -46,7 +43,6 @@ public:
};
extern template class BranchIO<SimpleFixedNodeBranch>;
extern template class BranchIO<SFNBranch>;
} // namespace qmcplusplus
#endif

View File

@ -39,6 +39,7 @@ SET(QMCDRIVERS
WFOpt/QMCCorrelatedSamplingLinearOptimize.cpp
WFOpt/QMCFixedSampleLinearOptimize.cpp
WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp
WFOpt/OutputMatrix.cpp
Optimizers/DescentEngine.cpp
Optimizers/HybridEngine.cpp
WFOpt/QMCCostFunctionBase.cpp
@ -67,6 +68,7 @@ SET(QMCDRIVERS
DMC/DMCDriverInput.cpp
DMC/WalkerControlFactory.cpp
DMC/WalkerReconfiguration.cpp
DMC/WalkerControl.cpp
DMC/SODMCUpdatePbyPFast.cpp
RMC/RMC.cpp
RMC/RMCUpdatePbyP.cpp

View File

@ -11,14 +11,13 @@
#include <type_traits>
#include "ContextForSteps.h"
#include "QMCDrivers/MCPopulation.h"
namespace qmcplusplus
{
ContextForSteps::ContextForSteps(int num_walkers,
int num_particles,
std::vector<std::pair<int, int>> particle_group_indexes,
RandomGenerator_t& random_gen)
int num_particles,
std::vector<std::pair<int, int>> particle_group_indexes,
RandomGenerator_t& random_gen)
: particle_group_indexes_(particle_group_indexes), random_gen_(random_gen)
{
/** glambda to create type T with constructor T(int) and put in it unique_ptr

View File

@ -22,8 +22,6 @@
namespace qmcplusplus
{
class MCPopulation;
class DistanceTableData;
/** Thread local context for moving walkers
@ -38,45 +36,42 @@ class ContextForSteps
{
public:
using ParticlePositions = PtclOnLatticeTraits::ParticlePos_t;
using PosType = QMCTraits::PosType;
using MCPWalker = Walker<QMCTraits, PtclOnLatticeTraits>;
using RealType = QMCTraits::RealType;
using PosType = QMCTraits::PosType;
using MCPWalker = Walker<QMCTraits, PtclOnLatticeTraits>;
using RealType = QMCTraits::RealType;
ContextForSteps(int num_walkers,
int num_particles,
std::vector<std::pair<int,int>> particle_group_indexes,
RandomGenerator_t& random_gen);
int num_particles,
std::vector<std::pair<int, int>> particle_group_indexes,
RandomGenerator_t& random_gen);
int get_num_groups() const { return particle_group_indexes_.size(); }
RandomGenerator_t& get_random_gen() { return random_gen_; }
void nextDeltaRs(size_t num_rs) {
void nextDeltaRs(size_t num_rs)
{
// hate to repeat this pattern, this should never resize.
walker_deltas_.resize(num_rs);
makeGaussRandomWithEngine(walker_deltas_, random_gen_);
}
std::vector<PosType>& get_walker_deltas() { return walker_deltas_; }
auto deltaRsBegin() { return walker_deltas_.begin(); };
int getPtclGroupStart(int group) const { return particle_group_indexes_[group].first; }
int getPtclGroupEnd(int group) const { return particle_group_indexes_[group].second; }
protected:
std::vector<PosType> walker_deltas_;
/** indexes of start and stop of each particle group;
*
* Seems like these should be iterators but haven't thought through the implications.
*/
std::vector<std::pair<int,int>> particle_group_indexes_;
std::vector<std::pair<int, int>> particle_group_indexes_;
RandomGenerator_t& random_gen_;
};
}
} // namespace qmcplusplus
#endif

View File

@ -276,7 +276,7 @@ void CSVMC::resetRun()
app_log() << os.str() << std::endl;
CSMovers[ip]->put(qmcNode);
CSMovers[ip]->resetRun(branchEngine, estimatorClones[ip], traceClones[ip], DriftModifier);
CSMovers[ip]->resetRun(branchEngine.get(), estimatorClones[ip], traceClones[ip], DriftModifier);
}
}
#if !defined(REMOVE_TRACEMANAGER)
@ -301,7 +301,7 @@ void CSVMC::resetRun()
{
//int ip=omp_get_thread_num();
CSMovers[ip]->put(qmcNode);
CSMovers[ip]->resetRun(branchEngine, estimatorClones[ip], traceClones[ip], DriftModifier);
CSMovers[ip]->resetRun(branchEngine.get(), estimatorClones[ip], traceClones[ip], DriftModifier);
if (qmc_driver_mode[QMC_UPDATE_MODE])
CSMovers[ip]->initCSWalkersForPbyP(W.begin() + wPerNode[ip], W.begin() + wPerNode[ip + 1], nWarmupSteps > 0);
else

View File

@ -16,7 +16,6 @@ void Crowd::clearWalkers()
{
// We're clearing the refs to the objects not the referred to objects.
mcp_walkers_.clear();
mcp_wfbuffers_.clear();
walker_elecs_.clear();
walker_twfs_.clear();
walker_hamiltonians_.clear();
@ -34,7 +33,6 @@ void Crowd::reserve(int crowd_size)
void Crowd::addWalker(MCPWalker& walker, ParticleSet& elecs, TrialWaveFunction& twf, QMCHamiltonian& hamiltonian)
{
mcp_walkers_.push_back(walker);
mcp_wfbuffers_.push_back(walker.DataSet);
walker_elecs_.push_back(elecs);
walker_twfs_.push_back(twf);
walker_hamiltonians_.push_back(hamiltonian);

View File

@ -34,7 +34,6 @@ class Crowd
{
public:
using MCPWalker = MCPopulation::MCPWalker;
using WFBuffer = MCPopulation::WFBuffer;
using GradType = QMCTraits::GradType;
using RealType = QMCTraits::RealType;
using FullPrecRealType = QMCTraits::FullPrecRealType;
@ -81,12 +80,11 @@ public:
auto beginElectrons() { return walker_elecs_.begin(); }
auto endElectrons() { return walker_elecs_.end(); }
RefVector<MCPWalker>& get_walkers() { return mcp_walkers_; }
std::vector<std::reference_wrapper<ParticleSet>>& get_walker_elecs() { return walker_elecs_; }
std::vector<std::reference_wrapper<TrialWaveFunction>>& get_walker_twfs() { return walker_twfs_; }
std::vector<std::reference_wrapper<QMCHamiltonian>>& get_walker_hamiltonians() { return walker_hamiltonians_; }
const RefVector<MCPWalker>& get_walkers() const { return mcp_walkers_; }
const RefVector<ParticleSet>& get_walker_elecs() const { return walker_elecs_; }
const RefVector<TrialWaveFunction>& get_walker_twfs() const { return walker_twfs_; }
const RefVector<QMCHamiltonian>& get_walker_hamiltonians() const { return walker_hamiltonians_; }
RefVector<WFBuffer>& get_mcp_wfbuffers() { return mcp_wfbuffers_; }
const EstimatorManagerCrowd& get_estimator_manager_crowd() const { return estimator_manager_crowd_; }
int size() const { return mcp_walkers_.size(); }
@ -104,7 +102,6 @@ private:
* @{
*/
RefVector<MCPWalker> mcp_walkers_;
RefVector<WFBuffer> mcp_wfbuffers_;
RefVector<ParticleSet> walker_elecs_;
RefVector<TrialWaveFunction> walker_twfs_;
RefVector<QMCHamiltonian> walker_hamiltonians_;

View File

@ -41,11 +41,8 @@ typedef int TraceManager;
namespace qmcplusplus
{
/// Constructor.
DMC::DMC(MCWalkerConfiguration& w,
TrialWaveFunction& psi,
QMCHamiltonian& h,
Communicate* comm)
: QMCDriver(w, psi, h, comm, "DMC"),
DMC::DMC(MCWalkerConfiguration& w, TrialWaveFunction& psi, QMCHamiltonian& h, Communicate* comm, bool enable_profiling)
: QMCDriver(w, psi, h, comm, "DMC", enable_profiling),
KillNodeCrossing(0),
BranchInterval(-1),
L2("no"),
@ -60,7 +57,7 @@ DMC::DMC(MCWalkerConfiguration& w,
m_param.add(NonLocalMove, "nonlocalmove", "string");
m_param.add(NonLocalMove, "nonlocalmoves", "string");
m_param.add(mover_MaxAge, "MaxAge", "double");
m_param.add(L2,"L2_diffusion","string");
m_param.add(L2, "L2_diffusion", "string");
}
void DMC::resetUpdateEngines()
@ -97,7 +94,13 @@ void DMC::resetUpdateEngines()
copy(wPerNode.begin(), wPerNode.end(), std::ostream_iterator<int>(o, " "));
o << "\n";
if (qmc_driver_mode[QMC_UPDATE_MODE])
{
o << " Updates by particle-by-particle moves";
if (L2 == "yes")
app_log() << "Using DMCUpdatePbyPL2" << std::endl;
else
app_log() << "Using DMCUpdatePbyPWithRejectionFast" << std::endl;
}
else
o << " Updates by walker moves";
// Appears to be set in constructor reported here and used nowhere
@ -130,7 +133,7 @@ void DMC::resetUpdateEngines()
Movers[ip] = new SODMCUpdatePbyPWithRejectionFast(*wClones[ip], *psiClones[ip], *hClones[ip], *Rng[ip]);
Movers[ip]->setSpinMass(SpinMass);
Movers[ip]->put(qmcNode);
Movers[ip]->resetRun(branchEngine, estimatorClones[ip], traceClones[ip], DriftModifier);
Movers[ip]->resetRun(branchEngine.get(), estimatorClones[ip], traceClones[ip], DriftModifier);
Movers[ip]->initWalkersForPbyP(W.begin() + wPerNode[ip], W.begin() + wPerNode[ip + 1]);
}
else
@ -142,19 +145,13 @@ void DMC::resetUpdateEngines()
{
if (qmc_driver_mode[QMC_UPDATE_MODE])
{
bool do_L2 = L2=="yes";
if(!do_L2)
{
app_log()<<"Using DMCUpdatePbyPWithRejectionFast\n";
Movers[ip] = new DMCUpdatePbyPWithRejectionFast(*wClones[ip], *psiClones[ip], *hClones[ip], *Rng[ip]);
}
else
{
app_log()<<"Using DMCUpdatePbyPL2\n";
if (L2 == "yes")
Movers[ip] = new DMCUpdatePbyPL2(*wClones[ip], *psiClones[ip], *hClones[ip], *Rng[ip]);
}
else
Movers[ip] = new DMCUpdatePbyPWithRejectionFast(*wClones[ip], *psiClones[ip], *hClones[ip], *Rng[ip]);
Movers[ip]->put(qmcNode);
Movers[ip]->resetRun(branchEngine, estimatorClones[ip], traceClones[ip], DriftModifier);
Movers[ip]->resetRun(branchEngine.get(), estimatorClones[ip], traceClones[ip], DriftModifier);
Movers[ip]->initWalkersForPbyP(W.begin() + wPerNode[ip], W.begin() + wPerNode[ip + 1]);
}
else
@ -164,7 +161,7 @@ void DMC::resetUpdateEngines()
else
Movers[ip] = new DMCUpdateAllWithRejection(*wClones[ip], *psiClones[ip], *hClones[ip], *Rng[ip]);
Movers[ip]->put(qmcNode);
Movers[ip]->resetRun(branchEngine, estimatorClones[ip], traceClones[ip], DriftModifier);
Movers[ip]->resetRun(branchEngine.get(), estimatorClones[ip], traceClones[ip], DriftModifier);
Movers[ip]->initWalkers(W.begin() + wPerNode[ip], W.begin() + wPerNode[ip + 1]);
}
}

View File

@ -31,7 +31,7 @@ class DMC : public QMCDriver, public CloneManager
{
public:
/// Constructor.
DMC(MCWalkerConfiguration& w, TrialWaveFunction& psi, QMCHamiltonian& h, Communicate* comm);
DMC(MCWalkerConfiguration& w, TrialWaveFunction& psi, QMCHamiltonian& h, Communicate* comm, bool enable_profiling);
bool run();
bool put(xmlNodePtr cur);

View File

@ -22,6 +22,8 @@
#include "Utilities/RunTimeManager.h"
#include "ParticleBase/RandomSeqGenerator.h"
#include "Utilities/ProgressReportEngine.h"
#include "QMCDrivers/DMC/WalkerControl.h"
#include "QMCDrivers/SFNBranch.h"
namespace qmcplusplus
{
@ -33,13 +35,14 @@ using WP = WalkerProperties::Indexes;
*
* Note you must call the Base constructor before the derived class sets QMCType
*/
DMCBatched::DMCBatched(QMCDriverInput&& qmcdriver_input,
DMCBatched::DMCBatched(const ProjectData& project_info,
QMCDriverInput&& qmcdriver_input,
DMCDriverInput&& input,
MCPopulation& pop,
MCPopulation&& pop,
TrialWaveFunction& psi,
QMCHamiltonian& h,
Communicate* comm)
: QMCDriverNew(std::move(qmcdriver_input), pop, psi, h,
: QMCDriverNew(project_info, std::move(qmcdriver_input), std::move(pop), psi, h,
"DMCBatched::", comm,
"DMCBatched",
std::bind(&DMCBatched::setNonLocalMoveHandler, this, _1)),
@ -49,42 +52,14 @@ DMCBatched::DMCBatched(QMCDriverInput&& qmcdriver_input,
}
// clang-format on
DMCBatched::~DMCBatched() = default;
void DMCBatched::setNonLocalMoveHandler(QMCHamiltonian& golden_hamiltonian)
{
golden_hamiltonian.setNonLocalMoves(dmcdriver_input_.get_non_local_move(), qmcdriver_input_.get_tau(),
dmcdriver_input_.get_alpha(), dmcdriver_input_.get_gamma());
}
void DMCBatched::resetUpdateEngines()
{
ReportEngine PRE("DMC", "resetUpdateEngines");
Timer init_timer;
// Here DMC loads "Ensemble of cloned MCWalkerConfigurations"
// I'd like to do away with this method in DMCBatched.
// false indicates we do not support kill at node crossings.
branch_engine_->initWalkerController(population_, dmcdriver_input_.get_reconfiguration(), false);
estimator_manager_->reset();
RefVector<MCPWalker> walkers(convertUPtrToRefVector(population_.get_walkers()));
branch_engine_->checkParameters(population_.get_num_global_walkers(), walkers);
std::ostringstream o;
if (dmcdriver_input_.get_reconfiguration())
o << " Fixed population using reconfiguration method\n";
else
o << " Fluctuating population\n";
o << " Persistent walkers are killed after " << dmcdriver_input_.get_max_age() << " MC sweeps\n";
o << " BranchInterval = " << dmcdriver_input_.get_branch_interval() << "\n";
o << " Steps per block = " << qmcdriver_input_.get_max_steps() << "\n";
o << " Number of blocks = " << qmcdriver_input_.get_max_blocks() << "\n";
app_log() << o.str() << std::endl;
app_log() << " DMC Engine Initialization = " << init_timer.elapsed() << " secs" << std::endl;
}
void DMCBatched::advanceWalkers(const StateForThread& sft,
Crowd& crowd,
DriverTimers& timers,
@ -92,26 +67,13 @@ void DMCBatched::advanceWalkers(const StateForThread& sft,
ContextForSteps& step_context,
bool recompute)
{
timers.buffer_timer.start();
// We copy positions from the walkers to elec particles sets for all the crowds walkers
// we might have received a few updates over the wire (from MPI)
// ** None of the per walker objects have referential integrity step to step **
crowd.loadWalkers();
assert(QMCDriverNew::checkLogAndGL(crowd));
int nnode_crossing(0);
auto& walker_twfs = crowd.get_walker_twfs();
auto& walkers = crowd.get_walkers();
auto& walker_elecs = crowd.get_walker_elecs();
// Note this resets the identities of all the walker TWFs
auto copyTWFFromBuffer = [](TrialWaveFunction& twf, ParticleSet& pset, MCPWalker& walker) {
twf.copyFromBuffer(pset, walker.DataSet);
};
for (int iw = 0; iw < crowd.size(); ++iw)
copyTWFFromBuffer(walker_twfs[iw], walker_elecs[iw], walkers[iw]);
timers.buffer_timer.stop();
const int num_walkers = crowd.size();
//This generates an entire steps worth of deltas.
step_context.nextDeltaRs(num_walkers * sft.population.get_num_particles());
@ -279,65 +241,56 @@ void DMCBatched::advanceWalkers(const StateForThread& sft,
//To use the flex interfaces we have to build RefVectors for walker that moved and walkers that didn't
auto& walker_hamiltonians = crowd.get_walker_hamiltonians();
auto& walker_mcp_wfbuffers = crowd.get_mcp_wfbuffers();
auto& walker_hamiltonians = crowd.get_walker_hamiltonians();
DMCPerWalkerRefRefs per_walker_ref_refs{walkers,
walker_twfs,
walker_hamiltonians,
walker_elecs,
walker_mcp_wfbuffers,
old_walker_energies,
new_walker_energies,
rr_proposed,
rr_accepted,
gf_acc};
DMCPerWalkerRefRefs per_walker_ref_refs{walkers, walker_twfs, walker_hamiltonians,
walker_elecs, old_walker_energies, new_walker_energies,
rr_proposed, rr_accepted, gf_acc};
MovedStalled these = buildMovedStalled(did_walker_move, per_walker_ref_refs);
handleMovedWalkers(these.moved, sft, timers);
handleStalledWalkers(these.stalled, sft);
handleMovedWalkers(these.moved, sft, timers, recompute);
handleStalledWalkers(these.stalled, sft, recompute);
dmc_timers.tmove_timer.start();
std::vector<int> walker_non_local_moves_accepted(
QMCHamiltonian::flex_makeNonLocalMoves(crowd.get_walker_hamiltonians(), crowd.get_walker_elecs()));
assert(QMCDriverNew::checkLogAndGL(crowd));
//could be premature optimization
int num_moved_nonlocal = 0;
int total_moved_nonlocal = 0;
for (int iw = 0; iw < walkers.size(); ++iw)
{
if (walker_non_local_moves_accepted[iw] > 0)
{
num_moved_nonlocal++;
total_moved_nonlocal += walker_non_local_moves_accepted[iw];
crowd.incNonlocalAccept();
}
}
ScopedTimer tmove_timer(&dmc_timers.tmove_timer);
std::vector<int> walker_non_local_moves_accepted(
QMCHamiltonian::flex_makeNonLocalMoves(crowd.get_walker_hamiltonians(), crowd.get_walker_elecs()));
if (num_moved_nonlocal > 0)
{
DMCPerWalkerRefs moved_nonlocal(num_moved_nonlocal);
for (int iw = 0; iw < these.moved.walkers.size(); ++iw)
//could be premature optimization
int num_moved_nonlocal = 0;
int total_moved_nonlocal = 0;
for (int iw = 0; iw < walkers.size(); ++iw)
{
if (walker_non_local_moves_accepted[iw] > 0)
{
moved_nonlocal.walkers.push_back(walkers[iw]);
moved_nonlocal.walker_twfs.push_back(walker_twfs[iw]);
moved_nonlocal.walker_elecs.push_back(walker_elecs[iw]);
moved_nonlocal.walker_hamiltonians.push_back(walker_hamiltonians[iw]);
moved_nonlocal.walker_mcp_wfbuffers.push_back(walker_mcp_wfbuffers[iw]);
num_moved_nonlocal++;
total_moved_nonlocal += walker_non_local_moves_accepted[iw];
crowd.incNonlocalAccept();
}
}
TrialWaveFunction::flex_updateBuffer(moved_nonlocal.walker_twfs, moved_nonlocal.walker_elecs,
moved_nonlocal.walker_mcp_wfbuffers);
ParticleSet::flex_saveWalker(moved_nonlocal.walker_elecs, moved_nonlocal.walkers);
if (num_moved_nonlocal > 0)
{
DMCPerWalkerRefs moved_nonlocal(num_moved_nonlocal);
for (int iw = 0; iw < these.moved.walkers.size(); ++iw)
{
if (walker_non_local_moves_accepted[iw] > 0)
{
moved_nonlocal.walkers.push_back(walkers[iw]);
moved_nonlocal.walker_twfs.push_back(walker_twfs[iw]);
moved_nonlocal.walker_elecs.push_back(walker_elecs[iw]);
moved_nonlocal.walker_hamiltonians.push_back(walker_hamiltonians[iw]);
}
}
TrialWaveFunction::flex_evaluateGL(moved_nonlocal.walker_twfs, moved_nonlocal.walker_elecs, false);
assert(QMCDriverNew::checkLogAndGL(crowd));
ParticleSet::flex_saveWalker(moved_nonlocal.walker_elecs, moved_nonlocal.walkers);
}
}
dmc_timers.tmove_timer.stop();
setMultiplicities(sft.dmcdrv_input, walkers, step_context.get_random_gen());
}
DMCBatched::MovedStalled DMCBatched::buildMovedStalled(const std::vector<int>& did_walker_move,
@ -361,7 +314,6 @@ DMCBatched::MovedStalled DMCBatched::buildMovedStalled(const std::vector<int>& d
these.moved.walker_twfs.push_back(refs.walker_twfs[iw]);
these.moved.walker_hamiltonians.push_back(refs.walker_hamiltonians[iw]);
these.moved.walker_elecs.push_back(refs.walker_elecs[iw]);
these.moved.walker_mcp_wfbuffers.push_back(refs.walker_mcp_wfbuffers[iw]);
these.moved.old_energies.push_back(refs.old_energies[iw]);
these.moved.new_energies.push_back(refs.new_energies[iw]);
these.moved.rr_proposed.push_back(refs.rr_proposed[iw]);
@ -375,7 +327,6 @@ DMCBatched::MovedStalled DMCBatched::buildMovedStalled(const std::vector<int>& d
these.stalled.walker_twfs.push_back(refs.walker_twfs[iw]);
these.stalled.walker_hamiltonians.push_back(refs.walker_hamiltonians[iw]);
these.stalled.walker_elecs.push_back(refs.walker_elecs[iw]);
these.stalled.walker_mcp_wfbuffers.push_back(refs.walker_mcp_wfbuffers[iw]);
these.stalled.old_energies.push_back(refs.old_energies[iw]);
these.stalled.new_energies.push_back(refs.new_energies[iw]);
these.stalled.rr_proposed.push_back(refs.rr_proposed[iw]);
@ -386,12 +337,15 @@ DMCBatched::MovedStalled DMCBatched::buildMovedStalled(const std::vector<int>& d
return these;
}
void DMCBatched::handleMovedWalkers(DMCPerWalkerRefs& moved, const StateForThread& sft, DriverTimers& timers)
void DMCBatched::handleMovedWalkers(DMCPerWalkerRefs& moved,
const StateForThread& sft,
DriverTimers& timers,
bool recompute)
{
if (moved.walkers.size() > 0)
{
timers.buffer_timer.start();
TrialWaveFunction::flex_updateBuffer(moved.walker_twfs, moved.walker_elecs, moved.walker_mcp_wfbuffers);
TrialWaveFunction::flex_evaluateGL(moved.walker_twfs, moved.walker_elecs, recompute);
std::for_each(moved.walkers.begin(), moved.walkers.end(), [](MCPWalker& walker) { walker.Age = 0; });
ParticleSet::flex_saveWalker(moved.walker_elecs, moved.walkers);
timers.buffer_timer.stop();
@ -432,12 +386,12 @@ void DMCBatched::handleMovedWalkers(DMCPerWalkerRefs& moved, const StateForThrea
}
}
void DMCBatched::handleStalledWalkers(DMCPerWalkerRefs& stalled, const StateForThread& sft)
void DMCBatched::handleStalledWalkers(DMCPerWalkerRefs& stalled, const StateForThread& sft, bool recompute)
{
for (int iw = 0; iw < stalled.walkers.size(); ++iw)
{
std::cout << "A walker has stalled.\n";
TrialWaveFunction::flex_updateBuffer(stalled.walker_twfs, stalled.walker_elecs, stalled.walker_mcp_wfbuffers);
TrialWaveFunction::flex_evaluateGL(stalled.walker_twfs, stalled.walker_elecs, recompute);
std::for_each(stalled.walkers.begin(), stalled.walkers.end(), [](MCPWalker& walker) { walker.Age = 0; });
ParticleSet::flex_saveWalker(stalled.walker_elecs, stalled.walkers);
@ -459,27 +413,6 @@ void DMCBatched::handleStalledWalkers(DMCPerWalkerRefs& stalled, const StateForT
}
}
void DMCBatched::setMultiplicities(const DMCDriverInput& dmcdriver_input,
RefVector<MCPWalker>& walkers,
RandomGenerator_t& rng)
{
auto setMultiplicity = [&dmcdriver_input, &rng](MCPWalker& walker) {
constexpr FullPrecRealType onehalf(0.5);
constexpr FullPrecRealType cone(1);
walker.Multiplicity = walker.Weight;
if (walker.Age > dmcdriver_input.get_max_age())
walker.Multiplicity = std::min(onehalf, walker.Weight);
else if (walker.Age > 0)
walker.Multiplicity = std::min(cone, walker.Weight);
walker.Multiplicity += rng();
};
for (int iw = 0; iw < walkers.size(); ++iw)
{
setMultiplicity(walkers[iw]);
}
}
void DMCBatched::runDMCStep(int crowd_id,
const StateForThread& sft,
DriverTimers& timers,
@ -492,13 +425,10 @@ void DMCBatched::runDMCStep(int crowd_id,
return;
crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen());
int max_steps = sft.qmcdrv_input.get_max_steps();
// This is migraine inducing here and in the original driver, I believe they are the same in
// VMC(Batched)/DMC(Batched) needs another check and unit test
bool is_recompute_block =
sft.recomputing_blocks ? (1 + sft.block) % sft.qmcdrv_input.get_blocks_between_recompute() == 0 : false;
IndexType step = sft.step;
bool recompute_this_step = (is_recompute_block && (step + 1) == max_steps);
int max_steps = sft.qmcdrv_input.get_max_steps();
IndexType step = sft.step;
// Are we entering the the last step of a block to recompute at?
bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps);
advanceWalkers(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step);
}
@ -517,11 +447,40 @@ void DMCBatched::process(xmlNodePtr node)
{
myComm->barrier_and_abort(ue.what());
}
{
ReportEngine PRE("DMC", "resetUpdateEngines");
Timer init_timer;
// Here DMC loads "Ensemble of cloned MCWalkerConfigurations"
// I'd like to do away with this method in DMCBatched.
app_log() << " Creating the branching engine and walker controler" << std::endl;
branch_engine_ = std::make_unique<SFNBranch>(qmcdriver_input_.get_tau(), population_.get_num_global_walkers());
branch_engine_->put(node);
WalkerController = std::make_unique<WalkerControl>(myComm, Random, dmcdriver_input_.get_reconfiguration());
WalkerController->setMinMax(population_.get_num_global_walkers(), 0);
WalkerController->start();
WalkerController->put(node);
std::ostringstream o;
if (dmcdriver_input_.get_reconfiguration())
o << " Fixed population using reconfiguration method\n";
else
o << " Fluctuating population\n";
o << " Persistent walkers are killed after " << dmcdriver_input_.get_max_age() << " MC sweeps\n";
o << " BranchInterval = " << dmcdriver_input_.get_branch_interval() << "\n";
o << " Steps per block = " << qmcdriver_input_.get_max_steps() << "\n";
o << " Number of blocks = " << qmcdriver_input_.get_max_blocks() << "\n";
app_log() << o.str() << std::endl;
app_log() << " DMC Engine Initialization = " << init_timer.elapsed() << " secs" << std::endl;
}
}
bool DMCBatched::run()
{
resetUpdateEngines();
IndexType num_blocks = qmcdriver_input_.get_max_blocks();
estimator_manager_->start(num_blocks);
@ -538,6 +497,14 @@ bool DMCBatched::run()
section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_));
}
{
FullPrecRealType energy, variance;
population_.measureGlobalEnergyVariance(*myComm, energy, variance);
// false indicates we do not support kill at node crossings.
branch_engine_->initParam(population_, energy, variance, dmcdriver_input_.get_reconfiguration(), false);
WalkerController->setTrialEnergy(branch_engine_->getEtrial());
}
ParallelExecutor<> crowd_task;
for (int block = 0; block < num_blocks; ++block)
@ -548,7 +515,9 @@ bool DMCBatched::run()
dmc_state.recalculate_properties_period = (qmc_driver_mode_[QMC_UPDATE_MODE])
? qmcdriver_input_.get_recalculate_properties_period()
: (qmcdriver_input_.get_max_blocks() + 1) * qmcdriver_input_.get_max_steps();
dmc_state.recomputing_blocks = qmcdriver_input_.get_blocks_between_recompute();
dmc_state.is_recomputing_block = qmcdriver_input_.get_blocks_between_recompute()
? (1 + block) % qmcdriver_input_.get_blocks_between_recompute() == 0
: false;
for (UPtr<Crowd>& crowd : crowds_)
crowd->startBlock(qmcdriver_input_.get_max_steps());
@ -571,7 +540,13 @@ bool DMCBatched::run()
crowd_ref.accumulate(population_.get_num_global_walkers());
}
branch_engine_->branch(step, population_);
{
int iter = block * qmcdriver_input_.get_max_steps() + step;
const int population_now = WalkerController->branch(iter, population_, iter == 0);
branch_engine_->updateParamAfterPopControl(population_now, WalkerController->get_ensemble_property(),
population_.get_num_particles());
WalkerController->setTrialEnergy(branch_engine_->getEtrial());
}
for (UPtr<Crowd>& crowd_ptr : crowds_)
crowd_ptr->clearWalkers();
@ -580,6 +555,8 @@ bool DMCBatched::run()
}
endBlock();
}
branch_engine_->printStatus();
return finalize(num_blocks, true);
}

Some files were not shown because too many files have changed in this diff Show More