Merge branch 'develop' into fast_force_switch

This commit is contained in:
rcclay 2022-09-19 11:10:59 -06:00
commit 0463182966
108 changed files with 34493 additions and 57605 deletions

View File

@ -20,10 +20,10 @@ jobs:
fail-fast: false
matrix:
jobname: [
GCC8-NoMPI-MKL-Real-Mixed, # mixed precision
GCC8-NoMPI-MKL-Complex-Mixed,
GCC8-NoMPI-MKL-Real, # full precision
GCC8-NoMPI-MKL-Complex,
GCC9-NoMPI-MKL-Real-Mixed, # mixed precision
GCC9-NoMPI-MKL-Complex-Mixed,
GCC9-NoMPI-MKL-Real, # full precision
GCC9-NoMPI-MKL-Complex,
]
steps:
@ -115,10 +115,10 @@ jobs:
fail-fast: false
matrix:
jobname: [
GCC8-NoMPI-Legacy-CUDA-Real-Mixed, # mixed precision
GCC8-NoMPI-Legacy-CUDA-Complex-Mixed,
GCC8-NoMPI-Legacy-CUDA-Real, # full precision
GCC8-NoMPI-Legacy-CUDA-Complex,
GCC9-NoMPI-Legacy-CUDA-Real-Mixed, # mixed precision
GCC9-NoMPI-Legacy-CUDA-Complex-Mixed,
GCC9-NoMPI-Legacy-CUDA-Real, # full precision
GCC9-NoMPI-Legacy-CUDA-Complex,
Clang15-MPI-CUDA-AFQMC-Offload-Real-Mixed, # auxiliary field, offload
Clang15-MPI-CUDA-AFQMC-Offload-Real,
Clang15-MPI-CUDA-AFQMC-Offload-Complex-Mixed,
@ -317,10 +317,10 @@ jobs:
fail-fast: false
matrix:
jobname: [
GCC8-MPI-CUDA-AFQMC-Real-Mixed, # auxiliary field, requires MPI
GCC8-MPI-CUDA-AFQMC-Complex-Mixed,
GCC8-MPI-CUDA-AFQMC-Real,
GCC8-MPI-CUDA-AFQMC-Complex,
GCC9-MPI-CUDA-AFQMC-Real-Mixed, # auxiliary field, requires MPI
GCC9-MPI-CUDA-AFQMC-Complex-Mixed,
GCC9-MPI-CUDA-AFQMC-Real,
GCC9-MPI-CUDA-AFQMC-Complex,
]
steps:

View File

@ -1,6 +1,6 @@
# Check compiler version
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0)
message(FATAL_ERROR "Requires gcc 7.0 or higher ")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
message(FATAL_ERROR "Requires GCC 9.0 or higher ")
endif()
# Enable OpenMP

View File

@ -4,12 +4,18 @@ set(TEST_CXX17_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/try_c
file(
WRITE ${TEST_CXX17_SOURCE}
"// Test for C++17 standard library support
#include <variant>
#include <string>
#include <array>
#include <cstddef>
#include <memory_resource>
int main(int argc, char **argv)
{
std::variant<int, float, std::string> intFloatString;
// allocate memory on the stack
std::array<std::byte, 20000> buf;
// without fallback memory allocation on heap
std::pmr::monotonic_buffer_resource pool{ buf.data(), buf.size(),
std::pmr::null_memory_resource() };
return 0;
}
")
@ -28,17 +34,21 @@ if(NOT CXX17_LIBRARY_OKAY)
set(COMPILE_FAIL_OUTPUT cpp17_compile_fail.txt)
file(WRITE "${CMAKE_BINARY_DIR}/${COMPILE_FAIL_OUTPUT}" "${COMPILE_OUTPUT}")
message(STATUS "C++17 standard library support not found")
message(STATUS "C++17 standard library support not found or incomplete")
message("compiler is ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
message("Compiler detected is g++.\n Use version 7.0 or newer for C++17 standard library support.")
message("Compiler detected is g++.\n Use version 9.0 or newer for complete C++17 standard library support.")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
message(
"Compiler detected is clang++.\n If not using libcxx, ensure a g++ version greater than 7.0 is also on the path so that its C++17 library can be used."
"Compiler detected is clang++.\n If not using libcxx, ensure a GCC toolchain version equal or greater "
"than 9.0 gets picked up. Check with 'clang++ -v'. Or use the --gcc-toolchain compiler option "
"(added to CMAKE_CXX_FLAGS) to point to a newer GCC installation."
)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
message(
"Compiler detected is icpc.\n Ensure a gcc version greater than 7.0 is also on the path so that its C++17 library can be used. Or use the -cxxlib switch to point to a newer gcc install."
"Compiler detected is icpc.\n Ensure a GCC version equal or greater than 9.0 is also on the PATH "
"such that its C++17 library can be used. Check with 'icpc -v'. Or use the -cxxlib compiler option "
"(added to CMAKE_CXX_FLAGS) to point to a newer GCC installation."
)
endif()
message(" Output of test compile is in ${COMPILE_FAIL_OUTPUT}")

View File

@ -94,11 +94,15 @@ function(COPY_DIRECTORY_MAYBE_USING_SYMLINK SRC_DIR DST_DIR ${ARGN})
endfunction()
# Symlink or copy an individual file
function(MAYBE_SYMLINK SRC_DIR DST_DIR)
function(MAYBE_SYMLINK SRC_FILE DST_FILE)
if(QMC_SYMLINK_TEST_FILES)
file(CREATE_LINK ${SRC_DIR} ${DST_DIR} SYMBOLIC)
file(CREATE_LINK ${SRC_FILE} ${DST_FILE} SYMBOLIC)
else()
file(COPY ${SRC_DIR} DESTINATION ${DST_DIR})
# file(COPY ...) takes a destination directory and doesn't rename the file.
# cmake_path requires CMake v3.20 and file(COPY_FILE ...) requires CMake v3.21.
# Instead we use configure_file, which takes an input and output filename and
# updates files that change in the source directory or qmc_dir.
configure_file(${SRC_FILE} ${DST_FILE} COPYONLY)
endif()
endfunction()
@ -654,3 +658,16 @@ function(
set_property(TEST ${FULLNAME} APPEND PROPERTY PASS_REGULAR_EXPRESSION "Time limit reached for")
endif()
endfunction()
# Add a test to see if a file exists in the desired location.
function(add_test_check_file_existence TEST_DEP_IN FILE_NAME SHOULD_SUCCEED)
if(TEST ${TEST_DEP_IN})
get_test_property(${TEST_DEP_IN} WORKING_DIRECTORY TEST_DEP_IN_WORK_DIR)
set(TESTNAME ${TEST_DEP_IN}-exists-${FILE_NAME})
add_test(NAME ${TESTNAME} COMMAND ls ${TEST_DEP_IN_WORK_DIR}/${FILE_NAME})
if (NOT SHOULD_SUCCEED)
set_property(TEST ${TESTNAME} PROPERTY WILL_FAIL TRUE)
endif()
set_tests_properties(${TESTNAME} PROPERTIES DEPENDS ${TEST_DEP_IN})
endif()
endfunction()

View File

@ -3,17 +3,8 @@
int main(int argc, char **argv)
{
// Unfortunately this check doesn't work for compilers <=v7.0 because _GLIBCXX_RELEASE appeared in the GCC 7.1 release.
// It is kept here as an example for the future.
#if ( defined(__INTEL_COMPILER) && ( _GLIBCXX_RELEASE < 7 ) )
#error You are using an Intel compiler. They obtain libstdc++ from a GNU compiler installation. For Intel compilers, you must use a GNU version >= 7. Found version <7.
#endif
// libstdc++ from GCC 8 is bad for Intel 19 in both C++14 and C++17
#if ( ( __INTEL_COMPILER == 1900 ) && ( _GLIBCXX_RELEASE > 7 ) )
#error You are using the Intel compiler v19 which obtains libstdc++ from a GNU compiler installation. You must use GNU version 7 with this Intel compiler. Found version >7. Alternatively (preferred route), use a more recent Intel compiler.
#endif
#if ( ( __INTEL_COMPILER == 1910 ) && ( _GLIBCXX_RELEASE > 9 ) )
#error You are using the Intel compiler v19.1 ("20") which obtains libstdc++ from a GNU compiler installation. Due to incompatibilities, you must use a GNU version <= 9 with this Intel compiler version. Found version >9.
#if ( ( __INTEL_COMPILER == 1910 ) && ( _GLIBCXX_RELEASE < 9 || _GLIBCXX_RELEASE > 9 ) )
#error You are using the Intel compiler v19.1 ("20") which obtains libstdc++ from a GCC installation. Due to incompatibilities, you must use a GCC version 9 with this Intel compiler version. Check with 'icpc -v'.
#endif
return 0;
}

View File

@ -14,12 +14,22 @@ export CRAYPE_LINK_TYPE=dynamic
TYPE=Release
Compiler=Intel
if [[ $# -eq 0 ]]; then
source_folder=`pwd`
elif [[ $# -eq 1 ]]; then
source_folder=$1
else
source_folder=$1
install_folder=$2
fi
CURRENT_FOLDER=`pwd`
for name in real real_MP cplx cplx_MP
do
CMAKE_FLAGS="-D CMAKE_SYSTEM_NAME=CrayLinuxEnvironment -D CMAKE_BUILD_TYPE=$TYPE -D MPIEXEC_EXECUTABLE=/bin/sh -D MPIEXEC_NUMPROC_FLAG=$CURRENT_FOLDER/tests/scripts/aprunhelper.sh"
CMAKE_FLAGS="-D CMAKE_SYSTEM_NAME=CrayLinuxEnvironment -D CMAKE_BUILD_TYPE=$TYPE -D MPIEXEC_EXECUTABLE=/bin/sh -D MPIEXEC_NUMPROC_FLAG=$source_folder/tests/scripts/aprunhelper.sh"
if [[ $name == *"cplx"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_COMPLEX=1"
@ -30,6 +40,11 @@ if [[ $name == *"_MP"* ]]; then
fi
folder=build_KNL_${Compiler}_${name}
if [[ -v install_folder ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -DCMAKE_INSTALL_PREFIX=$install_folder/$folder"
fi
echo "**********************************"
echo "$folder"
echo "$CMAKE_FLAGS"
@ -37,9 +52,13 @@ echo "**********************************"
mkdir $folder
cd $folder
if [ ! -f CMakeCache.txt ] ; then
cmake $CMAKE_FLAGS ..
cmake $CMAKE_FLAGS $source_folder
fi
if [[ -v install_folder ]]; then
make -j16 install && chmod -R -w $install_folder/$folder
else
make -j16
fi
make -j32
cd ..
echo

View File

@ -1,9 +1,11 @@
#!/bin/bash
echo "----------------------- WARNING ------------------------------------"
echo "This is **not** production ready and intended for development only!!"
echo "Use config/build_olcf_summit.sh for production on Summit."
echo "----------------------- WARNING ------------------------------------"
# This recipe is intended for OLCF Summit https://www.olcf.ornl.gov/summit/
# It builds all the varaints of QMCPACK in the current directory
# last revision: Aug 29th 2022
#
# How to invoke this script?
# build_olcf_summit_Clang.sh # build all the variants assuming the current directory is the source directory.
# build_olcf_summit_Clang.sh <source_dir> # build all the variants with a given source directory <source_dir>
echo "Purging current module set"
module purge
@ -25,7 +27,7 @@ if [[ ! -d /gpfs/alpine/mat151/world-shared/opt/modules ]] ; then
exit 1
fi
module use /gpfs/alpine/mat151/world-shared/opt/modules
module load llvm/main-20220317-cuda11.0
module load llvm/release-15.0.0-cuda11.0
TYPE=Release
Compiler=Clang
@ -58,11 +60,11 @@ if [[ $name == *"_MP"* ]]; then
fi
if [[ $name == *"offload"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON -DUSE_OBJECT_TARGET=ON -DOFFLOAD_ARCH=sm_70"
CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON -DOFFLOAD_ARCH=sm_70"
fi
if [[ $name == *"cuda"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_CUDA_HOST_COMPILER=`which g++`"
CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70"
fi
folder=build_summit_${Compiler}_${name}

View File

@ -415,11 +415,11 @@ and is not suitable for production. Additional implementation in QMCPACK as
well as improvements in open-source and vendor compilers is required for production status
to be reached. The following compilers have been verified:
- LLVM Clang 14. Support NVIDIA GPUs.
- LLVM Clang 15. Support NVIDIA GPUs.
::
-D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON
-D ENABLE_OFFLOAD=ON
Clang and its downstream compilers support two extra options
@ -452,7 +452,7 @@ For example, using Clang 14 on Summit.
::
-D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70
-D ENABLE_OFFLOAD=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70
Similarly, HIP features can be enabled in conjunction with the offload code path to improve performance on AMD GPUs.

View File

@ -545,14 +545,14 @@ The cost function consists of three components: energy, unreweighted variance, a
<cost name="unreweightedvariance"> 0.00 </cost>
<cost name="reweightedvariance"> 0.05 </cost>
Varational parameter selection
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The predominant way of selecting varational paramemters is via ``<wavefunction>`` input.
Variational parameter selection
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The predominant way of selecting variational parameters is via ``<wavefunction>`` input.
``<coefficients>`` entries support ``optimize="yes"/"no"`` to enable/disable variational parameters in the wavefunction optimization.
The secondary way of selecting varational paramemters is via ``variational_subset`` parameter in the ``<qmc>`` driver input.
The secondary way of selecting variational parameters is via ``variational_subset`` parameter in the ``<qmc>`` driver input.
It allows controlling optimization granularity at each optimization step.
If ``variational_subset`` is not provided or empty, all the varational paramemters are selected.
If variational paramemters are set as not optimizable in the predominant way, the secondary way won't be able to set them optimizable even they are selected.
If ``variational_subset`` is not provided or empty, all the variational parameters are selected.
If variational parameters are set as not optimizable in the predominant way, the secondary way won't be able to set them optimizable even they are selected.
The following example shows optimizing subsets of parameters in stages in a single QMCPACK run.
@ -570,6 +570,28 @@ The following example shows optimizing subsets of parameters in stages in a sing
<parameter name="variational_subset"> uu ud eH CI </parameter>
</qmc>
Variational parameter storage
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
After each optimization step the new wavefunction is stored in a file with an ``.opt.xml`` suffix.
This new wavefunction includes the updated variational parameters.
Writing a new XML wavefunction becomes more complicated if parameters are stored elsewhere (e.g. multideterminant coefficients in an HDF file) and has problems scaling with the number of parameters.
To address these issues the variational parameters are now written to an HDF file.
The new "VP file" has the suffix ``.vp.h5`` and is written in conjunction with the ``.opt.xml`` file.
The wavefunction file connects to the VP file with a tag (``override_variational_parameters``) in the ``.opt.xml`` file that points to the ``.vp.h5`` file.
Should it be necessary to recover the previous behavior without the VP file, this tag can be be turned off with an ``output_vp_override`` parameter in the optimizer input block:
``<parameter name="output_vp_override">no</parameter>``
Both schemes for storing variational parameters coexist. Two important points about the VP file:
* The values of the variational parameters in the VP file take precedence over the values in the XML wavefunction.
* When copying an optimized wavefunction, the ``.vp.h5`` file needs to be copied as well.
For users that want to inspect or modify the VP file,
the He_param test (in ``tests/molecules/He_param``) contains a python script (``convert_vp_format.py``) to read and write the VP file. The script converts to and from a simple text representation of the parameters.
Optimizers
~~~~~~~~~~
@ -1252,7 +1274,7 @@ Parameter gradients
~~~~~~~~~~~~~~~~~~~
The gradients of the energy with respect to the variational parameters can be checked and optionally written to a file.
The check compares the analytic derivatives with a finite difference approximation.
These are activated by giving a ``gradient_test`` method in and ``optimize`` block, as follows:
These are activated by giving a ``gradient_test`` method in an ``optimize`` block, as follows:
::
@ -1272,6 +1294,8 @@ It contains one line per loop iteration, to allow using existing tools to comput
+=======================+==============+=============+=============+============================================+
| ``output_param_file`` | text | yes, no | no | Output parameter gradients to a file |
+-----------------------+--------------+-------------+-------------+--------------------------------------------+
| ``finite_diff_delta`` | double | :math:`> 0` | 1e-5 | Finite difference delta |
+-----------------------+--------------+-------------+-------------+--------------------------------------------+
The input would look like the following:
@ -1284,6 +1308,21 @@ The input would look like the following:
... rest of optimizer input ...
The output has columns for the parameter name, value, analytic gradient, numeric gradient, and relative difference (in percent). Following the relative difference, there may be exclamation marks which highlight large differences that likely indicate a problem.
Sample output looks like:
::
Param_Name Value Numeric Analytic Percent
updet_orb_rot_0000_0002 0.000000e+00 -1.8622037512e-02 4.6904958207e-02 3.52e+02 !!!
updet_orb_rot_0001_0002 0.000000e+00 1.6733860519e-03 3.9023863136e-03 -1.33e+02 !!!
downdet_orb_rot_0000_0002 0.000000e+00 -9.3267917833e-03 -8.0747281231e-03 1.34e+01 !!!
downdet_orb_rot_0001_0002 0.000000e+00 -4.3276838557e-03 2.6684235669e-02 7.17e+02 !!!
uu_0 0.000000e+00 -1.2724910770e-02 -1.2724906671e-02 3.22e-05
uu_1 0.000000e+00 2.0305884219e-02 2.0305883999e-02 1.08e-06
uu_2 0.000000e+00 -1.1644597731e-03 -1.1644591818e-03 5.08e-05
Output of intermediate values
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -307,7 +307,7 @@ class Qmcpack(Simulation):
#end for
return jd
#end def process_jastrow
if wavefunction==None:
if wavefunction is None:
qs = input.get('qmcsystem')
qs.wavefunction = optwf.copy()
else:

View File

@ -1887,7 +1887,7 @@ class wavefunction(QIxml):
attributes = ['name','target','id','ref']+['info','type']
# afqmc
parameters = ['filetype','filename','cutoff']
elements = ['sposet_builder','determinantset','jastrow']
elements = ['sposet_builder','determinantset','jastrow','override_variational_parameters']
identifier = 'name','id'
#end class wavefunction
@ -2079,6 +2079,11 @@ jastrow = QIxmlFactory(
typekey = 'type'
)
class override_variational_parameters(QIxml):
attributes = ['href']
#end class override_variational_parameters
class hamiltonian(QIxml):
# rsqmc afqmc
@ -2667,7 +2672,7 @@ classes = [ #standard classes
header,local,force,forwardwalking,observable,record,rmc,pressure,dmccorrection,
nofk,mpc_est,flux,distancetable,cpp,element,spline,setparams,
backflow,transformation,cubicgrid,molecular_orbital_builder,cmc,sk,skall,gofr,
host,date,user,rpa_jastrow,momentum,
host,date,user,rpa_jastrow,momentum,override_variational_parameters,
# afqmc classes
afqmcinfo,walkerset,propagator,execute,back_propagation,onerdm
]

View File

@ -241,7 +241,11 @@ bool DriverFactory::executeAFQMCDriver(std::string title, int m_series, xmlNodeP
else
{
auto initial_guess = WfnFac.getInitialGuess(wfn_name);
wset.resize(nWalkers, initial_guess[0], initial_guess[1]({0, NMO}, {0, NAEB}));
wset.resize(
nWalkers,
initial_guess[0],
initial_guess[1]({0, NMO}, {0, NAEB})
);
wfn0.Energy(wset);
app_log() << " Energy of starting determinant: \n"
<< " - Total energy : " << std::setprecision(12) << wset[0].energy() << "\n"

View File

@ -188,14 +188,14 @@ public:
int nx((walker_type == COLLINEAR) ? 2 : 1);
// 1. check structures
if (Refs.size(0) != wset.size() || Refs.size(1) != nrefs || Refs.size(2) != nrow * ncol)
if (std::get<0>(Refs.sizes()) != wset.size() || std::get<1>(Refs.sizes()) != nrefs || std::get<2>(Refs.sizes()) != nrow * ncol)
Refs = mpi3CTensor({wset.size(), nrefs, nrow * ncol}, Refs.get_allocator());
DeviceBufferManager buffer_manager;
StaticMatrix detR({wset.size(), nrefs * nx},
buffer_manager.get_generator().template get_allocator<ComplexType>());
int n0, n1;
std::tie(n0, n1) = FairDivideBoundary(TG.getLocalTGRank(), int(Refs.size(2)), TG.getNCoresPerTG());
std::tie(n0, n1) = FairDivideBoundary(TG.getLocalTGRank(), int(std::get<2>(Refs.sizes())), TG.getNCoresPerTG());
boost::multi::array_ref<ComplexType, 3> Refs_(to_address(Refs.origin()), Refs.extensions());
// 2. setup back propagated references

View File

@ -58,11 +58,11 @@ public:
{
ScopedTimer local_timer(AFQMCTimers[energy_timer]);
size_t nwalk = wset.size();
if (eloc.size(0) != nwalk || eloc.size(1) != 3)
if (std::get<0>(eloc.sizes()) != nwalk || std::get<1>(eloc.sizes()) != 3)
eloc.reextent({static_cast<boost::multi::size_t>(nwalk), 3});
if (ovlp.size(0) != nwalk)
if (std::get<0>(ovlp.sizes()) != nwalk)
ovlp.reextent(iextensions<1u>(nwalk));
if (wprop.size(0) != 4 || wprop.size(1) != nwalk)
if (std::get<0>(wprop.sizes()) != 4 || std::get<1>(wprop.sizes()) != nwalk)
wprop.reextent({4, static_cast<boost::multi::size_t>(nwalk)});
ComplexType dum, et;

View File

@ -193,7 +193,7 @@ public:
APP_ABORT("Runtime Error: iav out of range in full1rdm::accumulate. \n\n\n");
int nw(wset.size());
int nrefs(Refs.size(1));
int nrefs(std::get<1>(Refs.sizes()));
double LogOverlapFactor(wset.getLogOverlapFactor());
LocalTGBufferManager shm_buffer_manager;
StaticSHM4Tensor G4D({nw, nspins, std::get<0>(Gdims), std::get<1>(Gdims)},

View File

@ -92,9 +92,9 @@ public:
wset.getProperty(WEIGHT, wgt);
int nx((wset.getWalkerType() == COLLINEAR) ? 2 : 1);
if (wDMsum.size(0) != wset.size() || wDMsum.size(2) != nx)
if (std::get<0>(wDMsum.sizes()) != wset.size() || std::get<1>(wDMsum.sizes()) != nx)
wDMsum.reextent({wset.size(), nx});
if (wOvlp.size(0) != wset.size() || wOvlp.size(2) != nx)
if (std::get<0>(wOvlp.sizes()) != wset.size() || std::get<1>(wOvlp.sizes()) != nx)
wOvlp.reextent({wset.size(), nx});
if (!importanceSampling)
@ -126,8 +126,8 @@ public:
denom_average[0] /= block_size;
dump.push("Mixed");
std::string padded_iblock = std::string(n_zero - std::to_string(iblock).length(), '0') + std::to_string(iblock);
boost::multi::array_ref<ComplexType, 1> wOvlp_(wOvlp.origin(), {wOvlp.size(0) * wOvlp.size(1)});
boost::multi::array_ref<ComplexType, 1> wDMsum_(wDMsum.origin(), {wDMsum.size(0) * wDMsum.size(1)});
boost::multi::array_ref<ComplexType, 1> wOvlp_(wOvlp.origin(), {std::get<0>(wOvlp.sizes()) * std::get<1>(wOvlp.sizes())});
boost::multi::array_ref<ComplexType, 1> wDMsum_(wDMsum.origin(), {std::get<0>(wDMsum.sizes()) * std::get<1>(wDMsum.sizes())});
dump.write(DMAverage, "one_rdm_" + padded_iblock);
dump.write(denom_average, "one_rdm_denom_" + padded_iblock);
dump.write(wOvlp_, "one_rdm_walker_overlaps_" + padded_iblock);

View File

@ -241,11 +241,11 @@ public:
using std::copy_n;
using std::fill_n;
// assumes G[nwalk][spin][M][M]
int nw(G.size(0));
assert(G.size(0) == wgt.size(0));
assert(wgt.size(0) == nw);
assert(Xw.size(0) == nw);
assert(ovlp.size(0) >= nw);
int nw(G.size());
assert(G.size() == wgt.size());
assert(wgt.size() == nw);
assert(Xw.size() == nw);
assert(ovlp.size() >= nw);
assert(G.num_elements() == G_host.num_elements());
assert(G.extensions() == G_host.extensions());
@ -258,27 +258,27 @@ public:
// check structure dimensions
if (iref == 0)
{
if (denom.size(0) != nw)
if (denom.size() != nw)
{
denom = mpi3CVector(iextensions<1u>{nw}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (DMWork1D.size(0) != nw || DMWork1D.size(1) != 3 || DMWork1D.size(2) != nsites)
if (std::get<0>(DMWork1D.sizes()) != nw || std::get<1>(DMWork1D.sizes()) != 3 || std::get<2>(DMWork1D.sizes()) != nsites)
{
DMWork1D = mpi3CTensor({nw, 3, nsites}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (DMWork2D.size(0) != nw || DMWork2D.size(1) != 3 || DMWork2D.size(2) != ns2)
if (std::get<0>(DMWork2D.sizes()) != nw || std::get<1>(DMWork2D.sizes()) != 3 || std::get<2>(DMWork2D.sizes()) != ns2)
{
DMWork2D = mpi3CTensor({nw, 3, ns2}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (NwIJ.size(0) != nsp || NwIJ.size(1) != nw || NwIJ.size(2) != nsites || NwIJ.size(3) != nsites)
if (std::get<0>(NwIJ.sizes()) != nsp || std::get<1>(NwIJ.sizes()) != nw || std::get<2>(NwIJ.sizes()) != nsites || std::get<3>(NwIJ.sizes()) != nsites)
{
NwIJ = mpi3C4Tensor({nsp, nw, nsites, nsites}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (NwI.size(0) != nsp || NwI.size(1) != nw || NwI.size(2) != nsites)
if (std::get<0>(NwI.sizes()) != nsp || std::get<1>(NwI.sizes()) != nw || std::get<2>(NwI.sizes()) != nsites)
{
NwI = mpi3CTensor({nsp, nw, nsites}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (shapes.size(0) < 2 * nw * nsites * nsites)
if (shapes.size() < 2 * nw * nsites * nsites)
shapes = IVector(iextensions<1u>{2 * nw * nsites * nsites}, IAllocator{});
fill_n(denom.origin(), denom.num_elements(), ComplexType(0.0, 0.0));
fill_n(DMWork1D.origin(), DMWork1D.num_elements(), ComplexType(0.0, 0.0));
@ -286,12 +286,12 @@ public:
}
else
{
if (denom.size(0) != nw || DMWork1D.size(0) != nw || DMWork1D.size(1) != 3 || DMWork1D.size(2) != nsites ||
DMWork2D.size(0) != nw || DMWork2D.size(1) != 3 || DMWork2D.size(2) != ns2 || NwI.size(0) != nsp ||
NwI.size(1) != nw || NwI.size(2) != nsites || NwIJ.size(0) != nsp || NwIJ.size(1) != nw ||
NwIJ.size(2) != nsites || NwIJ.size(3) != nsites || DMAverage1D.size(0) != nave || DMAverage1D.size(1) != 3 ||
DMAverage1D.size(2) != nsites || DMAverage2D.size(0) != nave || DMAverage2D.size(1) != 3 ||
DMAverage2D.size(2) != ns2)
if (std::get<0>(denom.sizes()) != nw || std::get<0>(DMWork1D.sizes()) != nw || std::get<1>(DMWork1D.sizes()) != 3 || std::get<2>(DMWork1D.sizes()) != nsites ||
std::get<0>(DMWork2D.sizes()) != nw || std::get<1>(DMWork2D.sizes()) != 3 || std::get<2>(DMWork2D.sizes()) != ns2 || std::get<0>(NwI.sizes()) != nsp ||
std::get<1>(NwI.sizes()) != nw || std::get<2>(NwI.sizes()) != nsites || std::get<0>(NwIJ.sizes()) != nsp || std::get<1>(NwIJ.sizes()) != nw ||
std::get<2>(NwIJ.sizes()) != nsites || std::get<3>(NwIJ.sizes()) != nsites || std::get<0>(DMAverage1D.sizes()) != nave || std::get<1>(DMAverage1D.sizes()) != 3 ||
std::get<2>(DMAverage1D.sizes()) != nsites || std::get<0>(DMAverage2D.sizes()) != nave || std::get<1>(DMAverage2D.sizes()) != 3 ||
std::get<2>(DMAverage2D.sizes()) != ns2)
APP_ABORT(" Error: Invalid state in accumulate_reference. \n\n\n");
}
@ -484,7 +484,7 @@ public:
template<class HostCVec>
void accumulate_block(int iav, HostCVec&& wgt, bool impsamp)
{
int nw(denom.size(0));
int nw(denom.size());
TG.TG_local().barrier();
// this is meant to be small, so serializing
if (TG.TG_local().root())

View File

@ -125,22 +125,22 @@ public:
using std::copy_n;
using std::fill_n;
// assumes G[nwalk][spin][M][M]
int nw(G.size(0));
assert(G.size(0) == wgt.size(0));
assert(wgt.size(0) == nw);
assert(Xw.size(0) == nw);
assert(ovlp.size(0) >= nw);
int nw(G.size());
assert(G.size() == wgt.size());
assert(wgt.size() == nw);
assert(Xw.size() == nw);
assert(ovlp.size() >= nw);
assert(G.num_elements() == G_host.num_elements());
assert(G.extensions() == G_host.extensions());
// check structure dimensions
if (iref == 0)
{
if (denom.size(0) != nw)
if (denom.size() != nw)
{
denom = mpi3CVector(iextensions<1u>{nw}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (DMWork.size(0) != nw || DMWork.size(1) != dm_size)
if (std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != dm_size)
{
DMWork = mpi3CMatrix({nw, dm_size}, shared_allocator<ComplexType>{TG.TG_local()});
}
@ -149,8 +149,8 @@ public:
}
else
{
if (denom.size(0) != nw || DMWork.size(0) != nw || DMWork.size(1) != dm_size || DMAverage.size(0) != nave ||
DMAverage.size(1) != dm_size)
if (std::get<0>(denom.sizes()) != nw || std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != dm_size || std::get<0>(DMAverage.sizes()) != nave ||
std::get<1>(DMAverage.sizes()) != dm_size)
APP_ABORT(" Error: Invalid state in accumulate_reference. \n\n\n");
}
@ -211,7 +211,7 @@ public:
template<class HostCVec>
void accumulate_block(int iav, HostCVec&& wgt, bool impsamp)
{
int nw(denom.size(0));
int nw(denom.size());
int i0, iN;
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), dm_size, TG.TG_local().size());

View File

@ -131,9 +131,9 @@ public:
stdCMatrix R;
if (!dump.readEntry(R, "RotationMatrix"))
APP_ABORT("Error reading RotationMatrix.\n");
if (R.size(1) != NMO)
if (std::get<1>(R.sizes()) != NMO)
APP_ABORT("Error Wrong dimensions in RotationMatrix.\n");
dim[0] = R.size(0);
dim[0] = R.size();
dim[1] = 0;
// conjugate rotation matrix
std::transform(R.origin(), R.origin() + R.num_elements(), R.origin(),
@ -143,9 +143,9 @@ public:
{
if (!dump.readEntry(I, "Indices"))
APP_ABORT("Error reading Indices.\n");
if (I.size(1) != 2)
if (std::get<1>(I.sizes()) != 2)
APP_ABORT("Error Wrong dimensions in Indices.\n");
dim[1] = I.size(0);
dim[1] = std::get<0>(I.sizes());
}
TG.Node().broadcast_n(dim, 2, 0);
XRot = sharedCMatrix({dim[0], NMO}, make_node_allocator<ComplexType>(TG));
@ -179,9 +179,9 @@ public:
TG.Node().barrier();
if (print_from_list)
dm_size = index_list.size(0);
dm_size = index_list.size();
else
dm_size = XRot.size(0) * XRot.size(0);
dm_size = XRot.size() * XRot.size();
}
else
{
@ -236,22 +236,22 @@ public:
static_assert(std::decay<MatG_host>::type::dimensionality == 4, "Wrong dimensionality");
using std::fill_n;
// assumes G[nwalk][spin][M][M]
int nw(G.size(0));
assert(G.size(0) == wgt.size(0));
assert(wgt.size(0) == nw);
assert(Xw.size(0) == nw);
assert(ovlp.size(0) >= nw);
int nw(G.size());
assert(G.size() == wgt.size());
assert(wgt.size() == nw);
assert(Xw.size() == nw);
assert(ovlp.size() >= nw);
assert(G.num_elements() == G_host.num_elements());
assert(G.extensions() == G_host.extensions());
// check structure dimensions
if (iref == 0)
{
if (denom.size(0) != nw)
if (denom.size() != nw)
{
denom = mpi3CVector(iextensions<1u>{nw}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (DMWork.size(0) != nw || DMWork.size(1) != dm_size)
if (std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != dm_size)
{
DMWork = mpi3CMatrix({nw, dm_size}, shared_allocator<ComplexType>{TG.TG_local()});
}
@ -260,8 +260,8 @@ public:
}
else
{
if (denom.size(0) != nw || DMWork.size(0) != nw || DMWork.size(1) != dm_size || DMAverage.size(0) != nave ||
DMAverage.size(1) != dm_size)
if (std::get<0>(denom.sizes()) != nw || std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != dm_size || std::get<0>(DMAverage.sizes()) != nave ||
std::get<1>(DMAverage.sizes()) != dm_size)
APP_ABORT(" Error: Invalid state in accumulate_reference. \n\n\n");
}
@ -275,7 +275,7 @@ public:
template<class HostCVec>
void accumulate_block(int iav, HostCVec&& wgt, bool impsamp)
{
int nw(denom.size(0));
int nw(denom.size());
int i0, iN;
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), dm_size, TG.TG_local().size());
@ -403,7 +403,7 @@ private:
template<class MatG, class CVec>
void acc_no_rotation(MatG&& G, CVec&& Xw)
{
int nw(G.size(0));
int nw(G.size());
assert(G[0].num_elements() == dm_size);
int i0, iN;
@ -427,19 +427,19 @@ private:
template<class MatG, class CVec>
void acc_with_rotation(MatG&& G, CVec&& Xw)
{
int nw(G.size(0));
assert(G.size(2) == G.size(3));
assert(G.size(2) == XRot.size(1));
int nw(G.size());
assert(std::get<2>(G.sizes()) == std::get<3>(G.sizes()));
assert(std::get<2>(G.sizes()) == std::get<1>(XRot.sizes()));
if (walker_type == NONCOLLINEAR)
APP_ABORT("Error: Not yet implemented: acc_with_rotation && noncollinear.\n");
int i0, iN;
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), int(XRot.size(0)), TG.TG_local().size());
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), int(XRot.size()), TG.TG_local().size());
// can batch in the future if too slow
// Grot = Xc * G * H(Xc)
int nX = XRot.size(0);
int nX = XRot.size();
int npts = (iN - i0) * nX;
DeviceBufferManager buffer_manager;
StaticMatrix T1({(iN - i0), NMO}, buffer_manager.get_generator().template get_allocator<ComplexType>());
@ -451,7 +451,7 @@ private:
int cnt = 0;
for (int iw = 0; iw < nw; iw++)
{
if (i0 == iN || i0 == XRot.size(0))
if (i0 == iN || i0 == XRot.size())
break;
if (TG.TG_local().root())
denom[iw] += Xw[iw];
@ -460,7 +460,7 @@ private:
copy_n(T2.origin(), T2.num_elements(), Grot.origin());
if (print_from_list)
{
for (int i = 0; i < index_list.size(0); i++)
for (int i = 0; i < index_list.size(); i++)
{
if (index_list[i][0] >= i0 && index_list[i][0] < iN)
{
@ -478,7 +478,7 @@ private:
copy_n(T2.origin(), T2.num_elements(), Grot.origin());
if (print_from_list)
{
for (int i = 0, ie = index_list.size(0); i < ie; i++)
for (int i = 0, ie = index_list.size(); i < ie; i++)
{
if (index_list[i][0] >= i0 && index_list[i][0] < iN)
{

View File

@ -124,9 +124,9 @@ public:
stdCMatrix R;
if (!dump.readEntry(R, "RotationMatrix"))
APP_ABORT("Error reading RotationMatrix.\n");
if (R.size(1) != NMO)
if (std::get<1>(R.sizes()) != NMO)
APP_ABORT("Error Wrong dimensions in RotationMatrix.\n");
dim[0] = R.size(0);
dim[0] = R.size();
dim[1] = 0;
// conjugate rotation matrix
std::transform(R.origin(), R.origin() + R.num_elements(), R.origin(),
@ -149,7 +149,7 @@ public:
}
TG.Node().barrier();
dm_size = XRot.size(0) * XRot.size(0) * XRot.size(0) * XRot.size(0);
dm_size = XRot.size() * XRot.size() * XRot.size() * XRot.size();
}
else
{
@ -188,22 +188,22 @@ public:
static_assert(std::decay<MatG_host>::type::dimensionality == 4, "Wrong dimensionality");
using std::fill_n;
// assumes G[nwalk][spin][M][M]
int nw(G.size(0));
assert(G.size(0) == wgt.size(0));
assert(wgt.size(0) == nw);
assert(Xw.size(0) == nw);
assert(ovlp.size(0) >= nw);
int nw(G.size());
assert(G.size() == wgt.size());
assert(wgt.size() == nw);
assert(Xw.size() == nw);
assert(ovlp.size() >= nw);
assert(G.num_elements() == G_host.num_elements());
assert(G.extensions() == G_host.extensions());
// check structure dimensions
if (iref == 0)
{
if (denom.size(0) != nw)
if (denom.size() != nw)
{
denom = mpi3CVector(iextensions<1u>{nw}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (DMWork.size(0) != nw || DMWork.size(1) != dm_size)
if (std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != dm_size)
{
DMWork = mpi3CMatrix({nw, dm_size}, shared_allocator<ComplexType>{TG.TG_local()});
}
@ -212,8 +212,8 @@ public:
}
else
{
if (denom.size(0) != nw || DMWork.size(0) != nw || DMWork.size(1) != dm_size || DMAverage.size(0) != nave ||
DMAverage.size(1) != dm_size)
if (std::get<0>(denom.sizes()) != nw || std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != dm_size || std::get<0>(DMAverage.sizes()) != nave ||
std::get<1>(DMAverage.sizes()) != dm_size)
APP_ABORT(" Error: Invalid state in accumulate_reference. \n\n\n");
}
@ -227,7 +227,7 @@ public:
template<class HostCVec>
void accumulate_block(int iav, HostCVec&& wgt, bool impsamp)
{
int nw(denom.size(0));
int nw(denom.size());
int i0, iN;
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), dm_size, TG.TG_local().size());
@ -306,7 +306,7 @@ private:
void acc_no_rotation(MatG&& G, CVec&& Xw)
{
// doing this 1 walker at a time and not worrying about speed
int nw(G.size(0));
int nw(G.size());
int i0, iN;
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), NMO * NMO, TG.TG_local().size());

View File

@ -124,11 +124,11 @@ public:
static_assert(std::decay<MatG_host>::type::dimensionality == 4, "Wrong dimensionality");
using std::fill_n;
// assumes G[nwalk][spin][M][M]
int nw(G.size(0));
assert(G.size(0) == wgt.size(0));
assert(wgt.size(0) == nw);
assert(Xw.size(0) == nw);
assert(ovlp.size(0) >= nw);
int nw(G.size());
assert(G.size() == wgt.size());
assert(wgt.size() == nw);
assert(Xw.size() == nw);
assert(ovlp.size() >= nw);
assert(G.num_elements() == G_host.num_elements());
assert(G.extensions() == G_host.extensions());
assert(G[0].num_elements() == dm_size);
@ -136,11 +136,11 @@ public:
// check structure dimensions
if (iref == 0)
{
if (denom.size(0) != nw)
if (denom.size() != nw)
{
denom = mpi3CVector(iextensions<1u>{nw}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (DMWork.size(0) != 3 || DMWork.size(1) != nw || DMWork.size(2) != dm_size)
if (std::get<0>(DMWork.sizes()) != 3 || std::get<1>(DMWork.sizes()) != nw || std::get<2>(DMWork.sizes()) != dm_size)
{
DMWork = mpi3CTensor({3, nw, dm_size}, shared_allocator<ComplexType>{TG.TG_local()});
}
@ -149,8 +149,8 @@ public:
}
else
{
if (denom.size(0) != nw || DMWork.size(0) != 2 || DMWork.size(1) != nw || DMWork.size(2) != dm_size ||
DMAverage.size(0) != 2 || DMAverage.size(1) != nave || DMAverage.size(2) != dm_size)
if (std::get<0>(denom.sizes()) != nw || std::get<0>(DMWork.sizes()) != 2 || std::get<1>(DMWork.sizes()) != nw || std::get<2>(DMWork.sizes()) != dm_size ||
std::get<0>(DMAverage.sizes()) != 2 || std::get<1>(DMAverage.sizes()) != nave || std::get<2>(DMAverage.sizes()) != dm_size)
APP_ABORT(" Error: Invalid state in accumulate_reference. \n\n\n");
}
@ -179,7 +179,7 @@ public:
template<class HostCVec>
void accumulate_block(int iav, HostCVec&& wgt, bool impsamp)
{
int nw(denom.size(0));
int nw(denom.size());
int i0, iN;
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), dm_size, TG.TG_local().size());

View File

@ -213,11 +213,11 @@ public:
using std::copy_n;
using std::fill_n;
// assumes G[nwalk][spin][M][M]
int nw(G.size(0));
assert(G.size(0) == wgt.size(0));
assert(wgt.size(0) == nw);
assert(Xw.size(0) == nw);
assert(ovlp.size(0) >= nw);
int nw(G.size());
assert(G.size() == wgt.size());
assert(wgt.size() == nw);
assert(Xw.size() == nw);
assert(ovlp.size() >= nw);
assert(G.num_elements() == G_host.num_elements());
assert(G.extensions() == G_host.extensions());
@ -230,11 +230,11 @@ public:
// check structure dimensions
if (iref == 0)
{
if (denom.size(0) != nw)
if (denom.size() != nw)
{
denom = mpi3CVector(iextensions<1u>{nw}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (DMWork.size(0) != nw || DMWork.size(1) != dm_size)
if (std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != dm_size)
{
DMWork = mpi3CMatrix({nw, dm_size}, shared_allocator<ComplexType>{TG.TG_local()});
}
@ -243,8 +243,8 @@ public:
}
else
{
if (denom.size(0) != nw || DMWork.size(0) != nw || DMWork.size(1) != dm_size || DMAverage.size(0) != nave ||
DMAverage.size(1) != dm_size)
if (std::get<0>(denom.sizes()) != nw || std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != dm_size || std::get<0>(DMAverage.sizes()) != nave ||
std::get<1>(DMAverage.sizes()) != dm_size)
APP_ABORT(" Error: Invalid state in accumulate_reference. \n\n\n");
}
@ -326,7 +326,7 @@ public:
template<class HostCVec>
void accumulate_block(int iav, HostCVec&& wgt, bool impsamp)
{
int nw(denom.size(0));
int nw(denom.size());
int i0, iN;
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), dm_size, TG.TG_local().size());
TG.TG_local().barrier();

View File

@ -133,7 +133,7 @@ public:
app_error() << " Error in realspace_correlators: Problems reading orbital: 0 0" << std::endl;
APP_ABORT("");
}
npoints = orb.size(0);
npoints = orb.size();
if (npoints < 1)
{
app_error() << " Error in realspace_correlators: npoints < 1. " << std::endl;
@ -168,7 +168,7 @@ public:
app_error() << " Error in realspace_correlators: Problems reading orbital: " << k << " " << i << std::endl;
APP_ABORT("");
}
if (orb.size(0) != npoints)
if (orb.size() != npoints)
{
app_error() << " Error in realspace_correlators: Inconsistent orbital size: " << k << " " << i << std::endl;
APP_ABORT("");
@ -226,12 +226,12 @@ public:
using std::copy_n;
using std::fill_n;
// assumes G[nwalk][spin][M][M]
int nw(G.size(0));
int npts(Orbitals.size(1));
assert(G.size(0) == wgt.size(0));
assert(wgt.size(0) == nw);
assert(Xw.size(0) == nw);
assert(ovlp.size(0) >= nw);
int nw(G.size());
int npts(std::get<1>(Orbitals.sizes()));
assert(G.size() == wgt.size());
assert(wgt.size() == nw);
assert(Xw.size() == nw);
assert(ovlp.size() >= nw);
assert(G.num_elements() == G_host.num_elements());
assert(G.extensions() == G_host.extensions());
@ -244,15 +244,15 @@ public:
// check structure dimensions
if (iref == 0)
{
if (denom.size(0) != nw)
if (denom.size() != nw)
{
denom = mpi3CVector(iextensions<1u>{nw}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (DMWork.size(0) != nw || DMWork.size(1) != 3 || DMWork.size(2) != dm_size)
if (std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != 3 || std::get<2>(DMWork.sizes()) != dm_size)
{
DMWork = mpi3CTensor({nw, 3, dm_size}, shared_allocator<ComplexType>{TG.TG_local()});
}
if (Gr_host.size(0) != nw || Gr_host.size(1) != nsp || Gr_host.size(2) != npts || Gr_host.size(3) != npts)
if (std::get<0>(Gr_host.sizes()) != nw || std::get<1>(Gr_host.sizes()) != nsp || std::get<2>(Gr_host.sizes()) != npts || std::get<3>(Gr_host.sizes()) != npts)
{
Gr_host = mpi3C4Tensor({nw, nsp, npts, npts}, shared_allocator<ComplexType>{TG.TG_local()});
}
@ -261,9 +261,9 @@ public:
}
else
{
if (denom.size(0) != nw || DMWork.size(0) != nw || DMWork.size(1) != 3 || DMWork.size(2) != dm_size ||
Gr_host.size(0) != nw || Gr_host.size(1) != nsp || Gr_host.size(2) != npts || Gr_host.size(3) != npts ||
DMAverage.size(0) != nave || DMAverage.size(1) != 3 || DMAverage.size(2) != dm_size)
if (std::get<0>(denom.sizes()) != nw || std::get<0>(DMWork.sizes()) != nw || std::get<1>(DMWork.sizes()) != 3 || std::get<2>(DMWork.sizes()) != dm_size ||
std::get<0>(Gr_host.sizes()) != nw || std::get<1>(Gr_host.sizes()) != nsp || std::get<2>(Gr_host.sizes()) != npts || std::get<3>(Gr_host.sizes()) != npts ||
std::get<0>(DMAverage.sizes()) != nave || std::get<1>(DMAverage.sizes()) != 3 || std::get<2>(DMAverage.sizes()) != dm_size)
APP_ABORT(" Error: Invalid state in accumulate_reference. \n\n\n");
}
@ -280,7 +280,7 @@ public:
// T1[iw][ispin][i][r] = sum_j G[iw][ispin][i][j] * Psi(j,r)
int i0, iN;
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), int(G2D.size(0)), TG.TG_local().size());
std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), int(std::get<0>(G2D.sizes())), TG.TG_local().size());
ma::product(G2D.sliced(i0, iN), Orbitals, T.sliced(i0, iN));
TG.TG_local().barrier();
@ -386,7 +386,7 @@ public:
template<class HostCVec>
void accumulate_block(int iav, HostCVec&& wgt, bool impsamp)
{
int nw(denom.size(0));
int nw(denom.size());
TG.TG_local().barrier();
// this is meant to be small, so serializing
if (TG.TG_local().root())

View File

@ -146,9 +146,9 @@ void reduced_density_matrix(boost::mpi3::communicator& world)
WalkerSet wset(TG, doc3.getRoot(), InfoMap["info0"], &rng);
auto initial_guess = WfnFac.getInitialGuess(wfn_name);
REQUIRE(initial_guess.size(0) == 2);
REQUIRE(initial_guess.size(1) == NMO);
REQUIRE(initial_guess.size(2) == NAEA);
REQUIRE(std::get<0>(initial_guess.sizes()) == 2);
REQUIRE(std::get<1>(initial_guess.sizes()) == NMO);
REQUIRE(std::get<2>(initial_guess.sizes()) == NAEA);
wset.resize(nwalk, initial_guess[0], initial_guess[0]);
using EstimPtr = std::shared_ptr<EstimatorBase>;
std::vector<EstimPtr> estimators;

View File

@ -116,7 +116,7 @@ public:
vn0(std::move(vn0_)),
SM_TMats({1, 1}, shared_allocator<SPComplexType>{TG.TG_local()})
{
local_nCV = Likn.size(1);
local_nCV = std::get<1>(Likn.sizes());
TG.Node().barrier();
}
@ -129,7 +129,7 @@ public:
CMatrix getOneBodyPropagatorMatrix(TaskGroup_& TG, boost::multi::array<ComplexType, 1> const& vMF)
{
int NMO = hij.size(0);
int NMO = hij.size();
// in non-collinear case with SO, keep SO matrix here and add it
// for now, stay collinear
CMatrix H1({NMO, NMO});
@ -185,7 +185,7 @@ public:
bool addEJ = true,
bool addEXX = true)
{
assert(E.size(1) >= 3);
assert(std::get<1>(E.sizes()) >= 3);
assert(nd >= 0);
assert(nd < haj.size());
if (walker_type == COLLINEAR)
@ -193,24 +193,24 @@ public:
else
assert(nd < Lank.size());
int nwalk = Gc.size(0);
int nwalk = Gc.size();
int nspin = (walker_type == COLLINEAR ? 2 : 1);
int NMO = hij.size(0);
int NMO = hij.size();
int nel[2];
nel[0] = Lank[nspin * nd].size(0);
nel[1] = ((nspin == 2) ? Lank[nspin * nd + 1].size(0) : 0);
assert(Lank[nspin * nd].size(1) == local_nCV);
assert(Lank[nspin * nd].size(2) == NMO);
nel[0] = Lank[nspin * nd].size();
nel[1] = ((nspin == 2) ? Lank[nspin * nd + 1].size() : 0);
assert(std::get<1>(Lank[nspin * nd].sizes()) == local_nCV);
assert(std::get<2>(Lank[nspin * nd].sizes()) == NMO);
if (nspin == 2)
{
assert(Lank[nspin * nd + 1].size(1) == local_nCV);
assert(Lank[nspin * nd + 1].size(2) == NMO);
assert(std::get<1>(Lank[nspin * nd + 1].sizes()) == local_nCV);
assert(std::get<2>(Lank[nspin * nd + 1].sizes()) == NMO);
}
assert(Gc.num_elements() == nwalk * (nel[0] + nel[1]) * NMO);
int getKr = KEright != nullptr;
int getKl = KEleft != nullptr;
if (E.size(0) != nwalk || E.size(1) < 3)
if (std::get<0>(E.sizes()) != nwalk || std::get<1>(E.sizes()) < 3)
APP_ABORT(
" Error in AFQMC/HamiltonianOperations/Real3IndexFactorization::energy(...). Incorrect matrix dimensions \n");
@ -247,14 +247,14 @@ public:
Knc = local_nCV;
if (getKr)
{
assert(KEright->size(0) == nwalk && KEright->size(1) == local_nCV);
assert(KEright->stride(0) == KEright->size(1));
assert(std::get<0>(KEright->sizes()) == nwalk && std::get<1>(KEright->sizes()) == local_nCV);
assert(KEright->stride(0) == std::get<1>(KEright->sizes()));
}
#if defined(MIXED_PRECISION)
if (getKl)
{
assert(KEleft->size(0) == nwalk && KEleft->size(1) == local_nCV);
assert(KEleft->stride(0) == KEleft->size(1));
assert(std::get<0>(KEleft->sizes()) == nwalk && std::get<1>(KEleft->sizes()) == local_nCV);
assert(KEleft->stride(0) == std::get<1>(KEleft->sizes()));
}
#else
if (getKl)
@ -423,8 +423,8 @@ public:
{
using BType = typename std::decay<MatB>::type::element;
using AType = typename std::decay<MatA>::type::element;
boost::multi::array_ref<BType, 2> v_(to_address(v.origin()), {v.size(0), 1});
boost::multi::array_ref<const AType, 2> X_(to_address(X.origin()), {X.size(0), 1});
boost::multi::array_ref< BType, 2> v_(to_address(v.origin()), {std::get<0>(v.sizes()), 1});
boost::multi::array_ref<const AType, 2> X_(to_address(X.origin()), {std::get<0>(X.sizes()), 1});
return vHS(X_, v_, a, c);
}
@ -436,11 +436,11 @@ public:
{
using XType = typename std::decay_t<typename MatA::element>;
using vType = typename std::decay<MatB>::type::element;
assert(Likn.size(1) == X.size(0));
assert(Likn.size(0) == v.size(0));
assert(X.size(1) == v.size(1));
assert(std::get<1>(Likn.sizes()) == std::get<0>(X.sizes()));
assert(std::get<0>(Likn.sizes()) == std::get<0>(v.sizes()));
assert(std::get<1>(X.sizes()) == std::get<1>(v.sizes()));
long ik0, ikN;
std::tie(ik0, ikN) = FairDivideBoundary(long(TG.TG_local().rank()), long(Likn.size(0)), long(TG.TG_local().size()));
std::tie(ik0, ikN) = FairDivideBoundary(long(TG.TG_local().rank()), long(Likn.size()), long(TG.TG_local().size()));
// setup buffer space if changing precision in X or v
size_t vmem(0), Xmem(0);
if (not std::is_same<XType, SPComplexType>::value)
@ -486,7 +486,7 @@ public:
if (not std::is_same<vType, SPComplexType>::value)
{
copy_n_cast(to_address(vsp[ik0].origin()), vsp.size(1) * (ikN - ik0), to_address(v[ik0].origin()));
copy_n_cast(to_address(vsp[ik0].origin()), std::get<1>(vsp.sizes()) * (ikN - ik0), to_address(v[ik0].origin()));
}
TG.TG_local().barrier();
}
@ -500,8 +500,8 @@ public:
{
using BType = typename std::decay<MatB>::type::element;
using AType = typename std::decay<MatA>::type::element;
boost::multi::array_ref<BType, 2> v_(to_address(v.origin()), {v.size(0), 1});
boost::multi::array_cref<AType, 2> G_(to_address(G.origin()), {G.size(0), 1});
boost::multi::array_ref<BType, 2> v_(to_address(v.origin()), {std::get<0>(v.sizes()), 1});
boost::multi::array_cref<AType, 2> G_(to_address(G.origin()), {std::get<0>(G.sizes()), 1});
return vbias(G_, v_, a, c, k);
}
@ -556,13 +556,13 @@ public:
boost::multi::array_ref<SPComplexType, 2> vsp(vptr, v.extensions());
TG.TG_local().barrier();
if (haj.size(0) == 1)
if (haj.size() == 1)
{
assert(Lakn.size(0) == G.size(0));
assert(Lakn.size(1) == v.size(0));
assert(G.size(1) == v.size(1));
assert(std::get<0>(Lakn.sizes()) == std::get<0>(G.sizes()));
assert(std::get<1>(Lakn.sizes()) == std::get<0>(v.sizes()));
assert(std::get<1>(G.sizes()) == std::get<1>(v.sizes()));
std::tie(ic0, icN) =
FairDivideBoundary(long(TG.TG_local().rank()), long(Lakn.size(1)), long(TG.TG_local().size()));
FairDivideBoundary(long(TG.TG_local().rank()), long(std::get<1>(Lakn.sizes())), long(TG.TG_local().size()));
if (walker_type == CLOSED)
a *= 2.0;
@ -572,11 +572,11 @@ public:
else
{
// multideterminant is not half-rotated, so use Likn
assert(Likn.size(0) == G.size(0));
assert(Likn.size(1) == v.size(0));
assert(G.size(1) == v.size(1));
assert(std::get<0>(Likn.sizes()) == std::get<0>(G.sizes()));
assert(std::get<1>(Likn.sizes()) == std::get<0>(v.sizes()));
assert(std::get<1>(G.sizes()) == std::get<1>(v.sizes()));
std::tie(ic0, icN) =
FairDivideBoundary(long(TG.TG_local().rank()), long(Likn.size(1)), long(TG.TG_local().size()));
FairDivideBoundary(long(TG.TG_local().rank()), long(std::get<1>(Likn.sizes())), long(TG.TG_local().size()));
if (walker_type == CLOSED)
a *= 2.0;
@ -586,7 +586,7 @@ public:
// copy data back if changing precision
if (not std::is_same<vType, SPComplexType>::value)
{
copy_n_cast(to_address(vsp[ic0].origin()), vsp.size(1) * (icN - ic0), to_address(v[ic0].origin()));
copy_n_cast(to_address(vsp[ic0].origin()), std::get<1>(vsp.sizes()) * (icN - ic0), to_address(v[ic0].origin()));
}
TG.TG_local().barrier();
}

View File

@ -123,13 +123,13 @@ public:
Lnak(std::move(move_vector<shmSpC3Tensor>(std::move(vnak)))),
vn0(std::move(vn0_))
{
local_nCV = Likn.size(1);
local_nCV = std::get<1>(Likn.sizes());
size_t lnak(0);
for (auto& v : Lnak)
lnak += v.num_elements();
for (int i = 0; i < hij.size(0); i++)
for (int i = 0; i < std::get<0>(hij.sizes()); i++)
{
for (int j = 0; j < hij.size(1); j++)
for (int j = 0; j < std::get<1>(hij.sizes()); j++)
{
hij_dev[i][j] = ComplexType(hij[i][j]);
}
@ -152,7 +152,7 @@ public:
boost::multi::array<ComplexType, 2> getOneBodyPropagatorMatrix(TaskGroup_& TG,
boost::multi::array<ComplexType, 1> const& vMF)
{
int NMO = hij.size(0);
int NMO = hij.size();
// in non-collinear case with SO, keep SO matrix here and add it
// for now, stay collinear
@ -210,7 +210,7 @@ public:
bool addEJ = true,
bool addEXX = true)
{
assert(E.size(1) >= 3);
assert(std::get<1>(E.sizes()) >= 3);
assert(nd >= 0);
assert(nd < haj.size());
if (walker_type == COLLINEAR)
@ -218,24 +218,24 @@ public:
else
assert(nd < Lnak.size());
int nwalk = Gc.size(0);
int nwalk = Gc.size();
int nspin = (walker_type == COLLINEAR ? 2 : 1);
int NMO = hij.size(0);
int NMO = hij.size();
int nel[2];
nel[0] = Lnak[nspin * nd].size(1);
nel[1] = ((nspin == 2) ? Lnak[nspin * nd + 1].size(1) : 0);
assert(Lnak[nspin * nd].size(0) == local_nCV);
assert(Lnak[nspin * nd].size(2) == NMO);
nel[0] = std::get<1>(Lnak[nspin * nd].sizes());
nel[1] = ((nspin == 2) ? std::get<1>(Lnak[nspin * nd + 1].sizes()) : 0);
assert(std::get<0>(Lnak[nspin * nd].sizes()) == local_nCV);
assert(std::get<2>(Lnak[nspin * nd].sizes()) == NMO);
if (nspin == 2)
{
assert(Lnak[nspin * nd + 1].size(0) == local_nCV);
assert(Lnak[nspin * nd + 1].size(2) == NMO);
assert(std::get<0>(Lnak[nspin * nd + 1].sizes()) == local_nCV);
assert(std::get<2>(Lnak[nspin * nd + 1].sizes()) == NMO);
}
assert(Gc.num_elements() == nwalk * (nel[0] + nel[1]) * NMO);
int getKr = KEright != nullptr;
int getKl = KEleft != nullptr;
if (E.size(0) != nwalk || E.size(1) < 3)
if (std::get<0>(E.sizes()) != nwalk || std::get<1>(E.sizes()) < 3)
APP_ABORT(" Error in AFQMC/HamiltonianOperations/Real3IndexFactorization_batched_v2::energy(...). Incorrect "
"matrix dimensions \n");
@ -255,13 +255,13 @@ public:
Knc = local_nCV;
if (getKr)
{
assert(KEright->size(0) == nwalk && KEright->size(1) == local_nCV);
assert(KEright->stride(0) == KEright->size(1));
assert(std::get<0>(KEright->sizes()) == nwalk && std::get<1>(KEright->sizes()) == local_nCV);
assert(KEright->stride(0) == std::get<1>(KEright->sizes()));
}
if (getKl)
{
assert(KEleft->size(0) == nwalk && KEleft->size(1) == local_nCV);
assert(KEleft->stride(0) == KEleft->size(1));
assert(std::get<0>(KEleft->sizes()) == nwalk && std::get<1>(KEleft->sizes()) == local_nCV);
assert(KEleft->stride(0) == std::get<1>(KEleft->sizes()));
}
}
else if (getKr or getKl)
@ -387,8 +387,8 @@ public:
{
using BType = typename std::decay<MatB>::type::element;
using AType = typename std::decay<MatA>::type::element;
boost::multi::array_ref<BType, 2, decltype(v.origin())> v_(v.origin(), {v.size(0), 1});
boost::multi::array_ref<AType, 2, decltype(X.origin())> X_(X.origin(), {X.size(0), 1});
boost::multi::array_ref<BType, 2, decltype(v.origin())> v_(v.origin(), {std::get<0>(v.sizes()), 1});
boost::multi::array_ref<AType, 2, decltype(X.origin())> X_(X.origin(), {std::get<0>(X.sizes()), 1});
return vHS(X_, v_, a, c);
}
@ -400,9 +400,9 @@ public:
{
using XType = typename std::decay_t<typename MatA::element>;
using vType = typename std::decay<MatB>::type::element;
assert(Likn.size(1) == X.size(0));
assert(Likn.size(0) == v.size(0));
assert(X.size(1) == v.size(1));
assert(std::get<1>(Likn.sizes()) == std::get<0>(X.sizes()));
assert(std::get<0>(Likn.sizes()) == std::get<0>(v.sizes()));
assert(std::get<1>(X.sizes()) == std::get<1>(v.sizes()));
// setup buffer space if changing precision in X or v
size_t vmem(0), Xmem(0);
if (not std::is_same<XType, SPComplexType>::value)
@ -454,8 +454,8 @@ public:
{
using BType = typename std::decay<MatB>::type::element;
using AType = typename std::decay<MatA>::type::element;
boost::multi::array_ref<BType, 2, decltype(v.origin())> v_(v.origin(), {v.size(0), 1});
boost::multi::array_ref<AType const, 2, decltype(G.origin())> G_(G.origin(), {G.size(0), 1});
boost::multi::array_ref<BType, 2, decltype(v.origin())> v_(v.origin(), {v.size(), 1});
boost::multi::array_ref<AType const, 2, decltype(G.origin())> G_(G.origin(), {G.size(), 1});
return vbias(G_, v_, a, c, k);
}
@ -506,25 +506,25 @@ public:
boost::multi::array_cref<SPComplexType const, 2, const_sp_pointer> Gsp(Gptr, G.extensions());
boost::multi::array_ref<SPComplexType, 2, sp_pointer> vsp(vptr, v.extensions());
if (haj.size(0) == 1)
if (haj.size() == 1)
{
int nwalk = v.size(1);
int nwalk = std::get<1>(v.sizes());
if (walker_type == COLLINEAR)
{
assert(G.size(1) == v.size(1));
assert(std::get<1>(G.sizes()) == std::get<1>(v.sizes()));
int NMO, nel[2];
NMO = Lnak[0].size(2);
nel[0] = Lnak[0].size(1);
nel[1] = Lnak[1].size(1);
NMO = std::get<2>(Lnak[0].sizes());
nel[0] = std::get<1>(Lnak[0].sizes());
nel[1] = std::get<1>(Lnak[1].sizes());
double c_[2];
c_[0] = c;
c_[1] = c;
if (std::abs(c) < 1e-8)
c_[1] = 1.0;
assert((nel[0]+nel[1])*NMO == G.size(0));
assert((nel[0]+nel[1])*NMO == std::get<0>(G.sizes()));
for (int ispin = 0, is0 = 0; ispin < 2; ispin++)
{
assert(Lnak[ispin].size(0) == v.size(0));
assert(std::get<0>(Lnak[ispin].sizes()) == std::get<0>(v.sizes()));
SpCMatrix_ref Ln(make_device_ptr(Lnak[ispin].origin()), {local_nCV, nel[ispin] * NMO});
ma::product(SPComplexType(a), Ln, Gsp.sliced(is0, is0 + nel[ispin] * NMO), SPComplexType(c_[ispin]), vsp);
is0 += nel[ispin] * NMO;
@ -532,19 +532,19 @@ public:
}
else
{
assert(G.size(1) == v.size(1));
assert(Lnak[0].size(1) * Lnak[0].size(2) == G.size(0));
assert(Lnak[0].size(0) == v.size(0));
SpCMatrix_ref Ln(make_device_ptr(Lnak[0].origin()), {local_nCV, Lnak[0].size(1) * Lnak[0].size(2)});
assert(std::get<1>(G.sizes()) == std::get<1>(v.sizes()));
assert(std::get<1>(Lnak[0].sizes()) * std::get<2>(Lnak[0].sizes()) == std::get<0>(G.sizes()));
assert(std::get<0>(Lnak[0].sizes()) == std::get<0>(v.sizes()));
SpCMatrix_ref Ln(make_device_ptr(Lnak[0].origin()), {local_nCV, std::get<1>(Lnak[0].sizes()) * std::get<2>(Lnak[0].sizes())});
ma::product(SPComplexType(a), Ln, Gsp, SPComplexType(c), vsp);
}
}
else
{
// multideterminant is not half-rotated, so use Likn
assert(Likn.size(0) == G.size(0));
assert(Likn.size(1) == v.size(0));
assert(G.size(1) == v.size(1));
assert(std::get<0>(Likn.sizes()) == std::get<0>(G.sizes()));
assert(std::get<1>(Likn.sizes()) == std::get<0>(v.sizes()));
assert(std::get<1>(G.sizes()) == std::get<1>(v.sizes()));
ma::product(SPValueType(a), ma::T(Likn), Gsp, SPValueType(c), vsp);
}
@ -557,12 +557,12 @@ public:
template<class Mat, class MatB>
void generalizedFockMatrix(Mat&& G, MatB&& Fp, MatB&& Fm)
{
int nwalk = G.size(0);
int nwalk = G.size();
int nspin = (walker_type == COLLINEAR ? 2 : 1);
int NMO = hij.size(0);
int NMO = hij.size();
int nel[2];
assert(Fp.size(0) == nwalk);
assert(Fm.size(0) == nwalk);
assert(Fp.size() == nwalk);
assert(Fm.size() == nwalk);
assert(G[0].num_elements() == nspin * NMO * NMO);
assert(Fp[0].num_elements() == nspin * NMO * NMO);
assert(Fm[0].num_elements() == nspin * NMO * NMO);

View File

@ -132,7 +132,7 @@ public:
CMatrix getOneBodyPropagatorMatrix(TaskGroup_& TG, CVector const& vMF)
{
int NMO = hij.size(0);
int NMO = hij.size();
// in non-collinear case with SO, keep SO matrix here and add it
// for now, stay collinear
CMatrix H1({NMO, NMO});
@ -188,14 +188,14 @@ public:
bool addEJ = true,
bool addEXX = true)
{
assert(E.size(1) >= 3);
assert(std::get<1>(E.sizes()) >= 3);
assert(k >= 0 && k < haj.size());
assert(k >= 0 && k < Vakbl_view.size());
if (Gcloc.num_elements() < Gc.size(1) * Vakbl_view[k].size(0))
Gcloc.reextent(iextensions<1u>(Vakbl_view[k].size(0) * Gc.size(1)));
boost::multi::array_ref<SPComplexType, 2> buff(Gcloc.data(), {long(Vakbl_view[k].size(0)), long(Gc.size(1))});
if (Gcloc.num_elements() < std::get<1>(Gc.sizes()) * std::get<0>(Vakbl_view[k].sizes()))
Gcloc.reextent(iextensions<1u>(std::get<0>(Vakbl_view[k].sizes()) * std::get<1>(Gc.sizes())));
boost::multi::array_ref<SPComplexType, 2> buff(Gcloc.data(), {long(std::get<0>(Vakbl_view[k].sizes())), long(std::get<1>(Gc.sizes()))});
int nwalk = Gc.size(1);
int nwalk = std::get<1>(Gc.sizes());
int getKr = Kr != nullptr;
int getKl = Kl != nullptr;
if (std::get<0>(E.sizes()) != nwalk || std::get<1>(E.sizes()) < 3)
@ -204,9 +204,9 @@ public:
for (int n = 0; n < nwalk; n++)
std::fill_n(E[n].origin(), 3, ComplexType(0.));
if (addEJ and getKl)
assert(Kl->size(0) == nwalk && Kl->size(1) == SpvnT[k].size(0));
assert(std::get<0>(Kl->sizes()) == nwalk && std::get<1>(Kl->sizes()) == std::get<0>(SpvnT[k].sizes()));
if (addEJ and getKr)
assert(Kr->size(0) == nwalk && Kr->size(1) == SpvnT[k].size(0));
assert(std::get<0>(Kr->sizes()) == nwalk && std::get<1>(Kr->sizes()) == std::get<0>(SpvnT[k].sizes()));
#if defined(MIXED_PRECISION)
size_t mem_needs = Gc.num_elements();
@ -239,17 +239,17 @@ public:
if (separateEJ && addEJ)
{
using ma::T;
if (Gcloc.num_elements() < SpvnT[k].size(0) * Gc.size(1))
Gcloc.reextent(iextensions<1u>(SpvnT[k].size(0) * Gc.size(1)));
assert(SpvnT_view[k].size(1) == Gc.size(0));
if (Gcloc.num_elements() < std::get<0>(SpvnT[k].sizes()) * std::get<1>(Gc.sizes()))
Gcloc.reextent(iextensions<1u>(std::get<0>(SpvnT[k].sizes()) * std::get<1>(Gc.sizes())));
assert(std::get<1>(SpvnT_view[k].sizes()) == std::get<0>(Gc.sizes()));
RealType scl = (walker_type == CLOSED ? 4.0 : 1.0);
// SpvnT*G
boost::multi::array_ref<SPComplexType, 2> v_(Gcloc.origin() + SpvnT_view[k].local_origin()[0] * Gc.size(1),
{long(SpvnT_view[k].size(0)), long(Gc.size(1))});
boost::multi::array_ref<SPComplexType, 2> v_(Gcloc.origin() + SpvnT_view[k].local_origin()[0] * std::get<1>(Gc.sizes()),
{long(std::get<0>(SpvnT_view[k].sizes())), long(std::get<1>(Gc.sizes()))});
ma::product(SpvnT_view[k], Gsp, v_);
if (getKl || getKr)
{
for (int wi = 0; wi < Gc.size(1); wi++)
for (int wi = 0; wi < std::get<1>(Gc.sizes()); wi++)
{
auto _v_ = v_(v_.extension(0), wi);
if (getKl)
@ -266,7 +266,7 @@ public:
}
}
}
for (int wi = 0; wi < Gc.size(1); wi++)
for (int wi = 0; wi < std::get<1>(Gc.sizes()); wi++)
E[wi][2] = 0.5 * scl * static_cast<ComplexType>(ma::dot(v_(v_.extension(0), wi), v_(v_.extension(0), wi)));
}
#if defined(MIXED_PRECISION)
@ -288,8 +288,8 @@ public:
{
using BType = typename std::decay<MatB>::type::element;
using AType = typename std::decay<MatA>::type::element;
boost::multi::array_ref<BType, 2, decltype(v.origin())> v_(v.origin(), {v.size(0), 1});
boost::multi::array_ref<AType, 2, decltype(X.origin())> X_(X.origin(), {X.size(0), 1});
boost::multi::array_ref<BType, 2, decltype(v.origin())> v_(v.origin(), {v.size(), 1});
boost::multi::array_ref<AType, 2, decltype(X.origin())> X_(X.origin(), {X.size(), 1});
return vHS(X_, v_, a, c);
}
@ -301,9 +301,9 @@ public:
{
using vType = typename std::decay<MatB>::type::element;
using XType = typename std::decay_t<typename MatA::element>;
assert(Spvn.size(1) == X.size(0));
assert(Spvn.size(0) == v.size(0));
assert(X.size(1) == v.size(1));
assert(std::get<1>(Spvn.sizes()) == std::get<0>(X.sizes()));
assert(std::get<0>(Spvn.sizes()) == std::get<0>(v.sizes()));
assert(std::get<1>(X.sizes()) == std::get<1>(v.sizes()));
// setup buffer space if changing precision in X or v
size_t vmem(0), Xmem(0);
@ -345,7 +345,7 @@ public:
comm->barrier();
boost::multi::array_ref<SPComplexType, 2> v_(to_address(vsp[Spvn_view.local_origin()[0]].origin()),
{long(Spvn_view.size(0)), long(vsp.size(1))});
{long(std::get<0>(Spvn_view.sizes())), long(std::get<1>(vsp.sizes()))});
ma::product(SPValueType(a), Spvn_view, Xsp, SPValueType(c), v_);
// copy data back if changing precision
@ -365,8 +365,8 @@ public:
{
using BType = typename std::decay<MatB>::type::element;
using AType = typename std::decay<MatA>::type::element;
boost::multi::array_ref<BType, 2, decltype(v.origin())> v_(v.origin(), {v.size(0), 1});
boost::multi::array_cref<AType, 2, decltype(G.origin())> G_(G.origin(), {G.size(0), 1});
boost::multi::array_ref<BType, 2, decltype(v.origin())> v_(v.origin(), {std::get<0>(v.sizes()), 1});
boost::multi::array_cref<AType, 2, decltype(G.origin())> G_(G.origin(), {std::get<0>(G.sizes()), 1});
return vbias(G_, v_, a, c, k);
}
@ -382,9 +382,9 @@ public:
k = 0;
if (walker_type == CLOSED)
a *= 2.0;
assert(SpvnT[k].size(1) == G.size(0));
assert(SpvnT[k].size(0) == v.size(0));
assert(G.size(1) == v.size(1));
assert(std::get<1>(SpvnT[k].sizes()) == std::get<0>(G.sizes()));
assert(std::get<0>(SpvnT[k].sizes()) == std::get<0>(v.sizes()));
assert(std::get<1>(G.sizes()) == std::get<1>(v.sizes()));
// setup buffer space if changing precision in G or v
size_t vmem(0), Gmem(0);
@ -425,7 +425,7 @@ public:
boost::multi::array_ref<SPComplexType, 2> vsp(vptr, v.extensions());
comm->barrier();
boost::multi::array_ref<SPComplexType, 2> v_(to_address(vsp[SpvnT_view[k].local_origin()[0]].origin()),
{long(SpvnT_view[k].size(0)), long(vsp.size(1))});
{long(std::get<0>(SpvnT_view[k].sizes())), long(std::get<1>(vsp.sizes()))});
ma::product(SpT2(a), SpvnT_view[k], Gsp, SpT2(c), v_);
// copy data back if changing precision

View File

@ -133,43 +133,43 @@ public:
vn0(std::move(v0_)),
E0(e0_)
{
gnmu = Luv.size(1);
grotnmu = rotMuv.size(1);
gnmu = std::get<1>(Luv.sizes());
grotnmu = std::get<1>(rotMuv.sizes());
if (haj.size() > 1)
APP_ABORT(" Error: THC not yet implemented for multiple references.\n");
assert(comm);
// current partition over 'u' for L/Piu
assert(Luv.size(0) == Piu.size(1));
assert(Luv.size() == std::get<1>(Piu.sizes()));
for (int i = 0; i < rotcPua.size(); i++)
{
// rot Ps are not yet distributed
assert(rotcPua[i].size(0) == rotPiu.size(1));
assert(rotcPua[i].size() == std::get<1>(rotPiu.sizes()));
if (walker_type == CLOSED)
assert(rotcPua[i].size(1) == nup);
assert(std::get<1>(rotcPua[i].sizes()) == nup);
else if (walker_type == COLLINEAR)
assert(rotcPua[i].size(1) == nup + ndown);
assert(std::get<1>(rotcPua[i].sizes()) == nup + ndown);
else if (walker_type == NONCOLLINEAR)
assert(rotcPua[i].size(1) == nup + ndown);
assert(std::get<1>(rotcPua[i].sizes()) == nup + ndown);
}
for (int i = 0; i < cPua.size(); i++)
{
assert(cPua[i].size(0) == Luv.size(0));
assert(cPua[i].size() == Luv.size());
if (walker_type == CLOSED)
assert(cPua[i].size(1) == nup);
assert(std::get<1>(cPua[i].sizes()) == nup);
else if (walker_type == COLLINEAR)
assert(cPua[i].size(1) == nup + ndown);
assert(std::get<1>(cPua[i].sizes()) == nup + ndown);
else if (walker_type == NONCOLLINEAR)
assert(cPua[i].size(1) == nup + ndown);
assert(std::get<1>(cPua[i].sizes()) == nup + ndown);
}
if (walker_type == NONCOLLINEAR)
{
assert(Piu.size(0) == 2 * NMO);
assert(rotPiu.size(0) == 2 * NMO);
assert(Piu.size() == 2 * NMO);
assert(rotPiu.size() == 2 * NMO);
}
else
{
assert(Piu.size(0) == NMO);
assert(rotPiu.size(0) == NMO);
assert(Piu.size() == NMO);
assert(rotPiu.size() == NMO);
}
}
@ -186,7 +186,7 @@ public:
{
using std::copy_n;
using std::fill_n;
int NMO = hij.size(0);
int NMO = hij.size();
// in non-collinear case with SO, keep SO matrix here and add it
// for now, stay collinear
@ -257,9 +257,9 @@ public:
if (k > 0)
APP_ABORT(" Error: THC not yet implemented for multiple references.\n");
// G[nel][nmo]
assert(E.size(0) == G.size(0));
assert(E.size(1) == 3);
int nwalk = G.size(0);
assert(std::get<0>(E.sizes()) == std::get<0>(G.sizes()));
assert(std::get<1>(E.sizes()) == 3);
int nwalk = G.size();
int getKr = Kr != nullptr;
int getKl = Kl != nullptr;
@ -274,17 +274,17 @@ public:
if (not(addEJ || addEXX))
return;
int nmo_ = rotPiu.size(0);
int nu = rotMuv.size(0);
int nmo_ = rotPiu.size();
int nu = rotMuv.size();
int nu0 = rotnmu0;
int nv = rotMuv.size(1);
int nel_ = rotcPua[0].size(1);
int nv = std::get<1>(rotMuv.sizes());
int nel_ = std::get<1>(rotcPua[0].sizes());
int nspin = (walker_type == COLLINEAR) ? 2 : 1;
assert(G.size(1) == nel_ * nmo_);
assert(std::get<1>(G.sizes()) == nel_ * nmo_);
if (addEJ and getKl)
assert(Kl->size(0) == nwalk && Kl->size(1) == nu);
assert(std::get<0>(Kl->sizes()) == nwalk && std::get<1>(Kl->sizes()) == nu);
if (addEJ and getKr)
assert(Kr->size(0) == nwalk && Kr->size(1) == nu);
assert(std::get<0>(Kr->sizes()) == nwalk && std::get<1>(Kr->sizes()) == nu);
using ma::T;
int u0, uN;
std::tie(u0, uN) = FairDivideBoundary(comm->rank(), nu, comm->size());
@ -667,8 +667,8 @@ public:
{
using XType = typename std::decay_t<typename MatA::element>;
using vType = typename std::decay<MatB>::type::element;
boost::multi::array_ref<vType, 2, decltype(v.origin())> v_(v.origin(), {1, v.size(0)});
boost::multi::array_ref<XType const, 2, decltype(X.origin())> X_(X.origin(), {X.size(0), 1});
boost::multi::array_ref<vType, 2, decltype(v.origin())> v_(v.origin(), {1, v.size()});
boost::multi::array_ref<XType const, 2, decltype(X.origin())> X_(X.origin(), {X.size(), 1});
vHS(X_, v_, a, c);
}
@ -681,18 +681,18 @@ public:
using ma::T;
using XType = typename std::decay_t<typename MatA::element>;
using vType = typename std::decay<MatB>::type::element;
int nwalk = X.size(1);
int nwalk = std::get<1>(X.sizes());
#if defined(QMC_COMPLEX)
int nchol = 2 * Luv.size(1);
int nchol = 2 * std::get<1>(Luv.sizes());
#else
int nchol = Luv.size(1);
int nchol = std::get<1>(Luv.sizes());
#endif
int nmo_ = Piu.size(0);
int nu = Piu.size(1);
assert(Luv.size(0) == nu);
assert(X.size(0) == nchol);
assert(v.size(0) == nwalk);
assert(v.size(1) == nmo_ * nmo_);
int nmo_ = std::get<0>(Piu.sizes());
int nu = std::get<1>(Piu.sizes());
assert(std::get<0>(Luv.sizes()) == nu);
assert(std::get<0>(X.sizes()) == nchol);
assert(std::get<0>(v.sizes()) == nwalk);
assert(std::get<1>(v.sizes()) == nmo_ * nmo_);
size_t memory_needs = nu * nwalk;
if (not std::is_same<XType, SPComplexType>::value)
@ -824,8 +824,8 @@ public:
{
using GType = typename std::decay_t<typename MatA::element>;
using vType = typename std::decay<MatB>::type::element;
boost::multi::array_ref<vType, 2, decltype(v.origin())> v_(v.origin(), {v.size(0), 1});
boost::multi::array_ref<GType const, 2, decltype(G.origin())> G_(G.origin(), {1, G.size(0)});
boost::multi::array_ref<vType, 2, decltype(v.origin())> v_(v.origin(), {std::get<0>(v.sizes()), 1});
boost::multi::array_ref<GType const, 2, decltype(G.origin())> G_(G.origin(), {1, std::get<0>(G.sizes())});
vbias(G_, v_, a, c, k);
}
@ -839,17 +839,17 @@ public:
using vType = typename std::decay<MatB>::type::element;
if (k > 0)
APP_ABORT(" Error: THC not yet implemented for multiple references.\n");
int nwalk = G.size(0);
int nmo_ = Piu.size(0);
int nu = Piu.size(1);
int nel_ = cPua[0].size(1);
int nwalk = std::get<0>(G.sizes());
int nmo_ = std::get<0>(Piu.sizes());
int nu = std::get<1>(Piu.sizes());
int nel_ = std::get<1>(cPua[0].sizes());
#if defined(QMC_COMPLEX)
int nchol = 2 * Luv.size(1);
int nchol = 2 * std::get<1>(Luv.sizes());
#else
int nchol = Luv.size(1);
int nchol = std::get<1>(Luv.sizes());
#endif
assert(v.size(1) == nwalk);
assert(v.size(0) == nchol);
assert(std::get<1>(v.sizes()) == nwalk);
assert(std::get<0>(v.sizes()) == nchol);
using ma::T;
int c0, cN;
std::tie(c0, cN) = FairDivideBoundary(comm->rank(), nchol, comm->size());
@ -902,9 +902,9 @@ public:
#if defined(QMC_COMPLEX)
// reinterpret as RealType matrices with 2x the columns
Array_ref<SPRealType, 2> Luv_R(pointer_cast<SPRealType>(make_device_ptr(Luv.origin())),
{Luv.size(0), 2 * Luv.size(1)});
{std::get<0>(Luv.sizes()), 2 * std::get<1>(Luv.sizes())});
Array_ref<SPRealType, 2> Guu_R(pointer_cast<SPRealType>(Guu.origin()), {nu, 2 * nwalk});
Array_ref<SPRealType, 2> vsp_R(pointer_cast<SPRealType>(vsp.origin()), {vsp.size(0), 2 * vsp.size(1)});
Array_ref<SPRealType, 2> vsp_R(pointer_cast<SPRealType>(vsp.origin()), {std::get<0>(vsp.sizes()), 2 * std::get<1>(vsp.sizes())});
ma::product(SPRealType(a), T(Luv_R(Luv_R.extension(0), {c0, cN})), Guu_R, SPRealType(c), vsp_R.sliced(c0, cN));
#else
ma::product(SPRealType(a), T(Luv(Luv.extension(0), {c0, cN})), Guu, SPRealType(c), vsp.sliced(c0, cN));
@ -919,7 +919,7 @@ public:
Array_ref<SPRealType, 2> Luv_R(pointer_cast<SPRealType>(make_device_ptr(Luv.origin())),
{Luv.size(0), 2 * Luv.size(1)});
Array_ref<SPRealType, 2> Guu_R(pointer_cast<SPRealType>(Guu.origin()), {nu, 2 * nwalk});
Array_ref<SPRealType, 2> vsp_R(pointer_cast<SPRealType>(vsp.origin()), {vsp.size(0), 2 * vsp.size(1)});
Array_ref<SPRealType, 2> vsp_R(pointer_cast<SPRealType>(vsp.origin()), {std::get<0>(vsp.sizes()), 2 * std::get<1>(vsp.sizes())});
ma::product(SPRealType(a), T(Luv_R(Luv_R.extension(0), {c0, cN})), Guu_R, SPRealType(c), vsp_R.sliced(c0, cN));
#else
ma::product(SPRealType(a), T(Luv(Luv.extension(0), {c0, cN})), Guu, SPRealType(c), vsp.sliced(c0, cN));
@ -927,7 +927,7 @@ public:
}
if (not std::is_same<vType, SPComplexType>::value)
{
copy_n_cast(make_device_ptr(vsp[c0].origin()), vsp.size(1) * (cN - c0), make_device_ptr(v[c0].origin()));
copy_n_cast(make_device_ptr(vsp[c0].origin()), std::get<1>(vsp.sizes()) * (cN - c0), make_device_ptr(v[c0].origin()));
}
comm->barrier();
}
@ -939,13 +939,13 @@ public:
}
bool distribution_over_cholesky_vectors() const { return false; }
int number_of_ke_vectors() const { return rotMuv.size(0); }
int number_of_ke_vectors() const { return std::get<0>(rotMuv.sizes()); }
#if defined(QMC_COMPLEX)
int local_number_of_cholesky_vectors() const { return 2 * Luv.size(1); }
int global_number_of_cholesky_vectors() const { return 2 * Luv.size(1); }
int local_number_of_cholesky_vectors() const { return 2 * std::get<1>(Luv.sizes()); }
int global_number_of_cholesky_vectors() const { return 2 * std::get<1>(Luv.sizes()); }
#else
int local_number_of_cholesky_vectors() const { return Luv.size(1); }
int global_number_of_cholesky_vectors() const { return Luv.size(1); }
int local_number_of_cholesky_vectors() const { return std::get<1>(Luv.sizes()); }
int global_number_of_cholesky_vectors() const { return std::get<1>(Luv.sizes()); }
#endif
int global_origin_cholesky_vector() const { return 0; }
@ -964,16 +964,16 @@ protected:
template<class MatA, class MatB>
void Guu_from_compact(MatA const& G, MatB&& Guu)
{
int nmo_ = int(Piu.size(0));
int nu = int(Piu.size(1));
int nel_ = cPua[0].size(1);
int nmo_ = int(std::get<0>(Piu.sizes()));
int nu = int(std::get<1>(Piu.sizes()));
int nel_ = std::get<1>(cPua[0].sizes());
int u0, uN;
std::tie(u0, uN) = FairDivideBoundary(comm->rank(), nu, comm->size());
int nw = G.size(0);
int nw = std::get<0>(G.sizes());
assert(G.size(0) == Guu.size(1));
assert(G.size(1) == nel_ * nmo_);
assert(Guu.size(0) == nu);
assert(std::get<0>(G.sizes()) == std::get<1>(Guu.sizes()));
assert(std::get<1>(G.sizes()) == nel_ * nmo_);
assert(std::get<0>(Guu.sizes()) == nu);
ComplexType a = (walker_type == CLOSED) ? ComplexType(2.0) : ComplexType(1.0);
Array<SPComplexType, 2> T1({(uN - u0), nw * nel_},
@ -1007,15 +1007,15 @@ protected:
void Guu_from_full(MatA const& G, MatB&& Guu)
{
using std::fill_n;
int nmo_ = int(Piu.size(0));
int nu = int(Piu.size(1));
int nmo_ = int(std::get<0>(Piu.sizes()));
int nu = int(std::get<1>(Piu.sizes()));
int u0, uN;
std::tie(u0, uN) = FairDivideBoundary(comm->rank(), nu, comm->size());
int nwalk = G.size(0);
int nwalk = G.size();
assert(G.size(0) == Guu.size(1));
assert(Guu.size(0) == nu);
assert(G.size(1) == nmo_ * nmo_);
assert(std::get<0>(G.sizes()) == std::get<1>(Guu.sizes()));
assert(std::get<0>(Guu.sizes()) == nu);
assert(std::get<1>(G.sizes()) == nmo_ * nmo_);
// calculate how many walkers can be done concurrently
long Bytes = default_buffer_size_in_MB * 1024L * 1024L;
@ -1058,11 +1058,11 @@ protected:
static_assert(std::decay<MatB>::type::dimensionality == 3, "Wrong dimensionality");
static_assert(std::decay<MatC>::type::dimensionality == 2, "Wrong dimensionality");
static_assert(std::decay<MatD>::type::dimensionality == 3, "Wrong dimensionality");
int nmo_ = int(rotPiu.size(0));
int nu = int(rotMuv.size(0)); // potentially distributed over nodes
int nv = int(rotMuv.size(1)); // not distributed over nodes
int nw = int(G.size(0));
assert(rotPiu.size(1) == nv);
int nmo_ = int(std::get<0>(rotPiu.sizes()));
int nu = int(std::get<0>(rotMuv.sizes())); // potentially distributed over nodes
int nv = int(std::get<1>(rotMuv.sizes())); // not distributed over nodes
int nw = int(G.size());
assert(std::get<1>(rotPiu.sizes()) == nv);
int v0, vN;
std::tie(v0, vN) = FairDivideBoundary(comm->rank(), nv, comm->size());
int k0, kN;

View File

@ -302,8 +302,8 @@ inline void writeTHCOps(hdf_archive& dump,
shmCMatrix& vn0,
ValueType E0)
{
size_t gnmu(Luv.size(1));
size_t grotnmu(rotMuv.size(1));
size_t gnmu(std::get<1>(Luv.sizes()));
size_t grotnmu(std::get<1>(rotMuv.sizes()));
if (TGwfn.Global().root())
{
dump.push("HamiltonianOperations");

View File

@ -99,20 +99,20 @@ inline void calculate_energy(EMat&& locV, const MatA& Gc, MatB&& Gcloc, const Sp
// W[nwalk][2][NMO][NAEA]
assert(locV.dimensionality == 2);
assert(Gc.size(1) == Gcloc.size(1));
assert(Vakbl.size(0) == Gcloc.size(0));
assert(Gc.size(0) == Vakbl.size(1));
assert(std::get<1>(Gc.sizes()) == std::get<1>(Gcloc.sizes()));
assert(Vakbl.size(0) == std::get<0>(Gcloc.sizes()));
assert(std::get<0>(Gc.sizes()) == Vakbl.size(1));
using Type = typename std::decay<EMat>::type::element;
const Type half = Type(0.5);
int nwalk = Gc.size(1);
int nwalk = std::get<1>(Gc.sizes());
// Vakbl * Gc(bl,nw) = Gcloc(ak,nw)
ma::product(Vakbl, Gc, std::forward<MatB>(Gcloc));
// E2(nw) = 0.5*Gc(:,nw)*Gcloc(:,nw)
int r0 = Vakbl.local_origin()[0];
for (int i = 0, iend = Gcloc.size(0); i < iend; i++)
for (int i = 0, iend = Gcloc.size(); i < iend; i++)
for (int n = 0; n < nwalk; n++)
#if defined(MIXED_PRECISION)
locV[n][1] += static_cast<Type>(Gc[i + r0][n] * Gcloc[i][n]);

View File

@ -235,7 +235,7 @@ void ham_ops_basic_serial(boost::mpi3::communicator& world)
}
TG.local_barrier();
ComplexType Xsum = 0, Xsum2 = 0;
for (int i = 0; i < X.size(0); i++)
for (int i = 0; i < X.size(); i++)
{
Xsum += X[i][0];
Xsum2 += ComplexType(0.5) * X[i][0] * X[i][0];
@ -260,12 +260,12 @@ void ham_ops_basic_serial(boost::mpi3::communicator& world)
ComplexType Vsum = 0;
if (HOps.transposed_vHS())
{
for (int i = 0; i < vHS.size(1); i++)
for (int i = 0; i < std::get<1>(vHS.sizes()); i++)
Vsum += vHS[0][i];
}
else
{
for (int i = 0; i < vHS.size(0); i++)
for (int i = 0; i < std::get<0>(vHS.sizes()); i++)
Vsum += vHS[i][0];
}
if (std::abs(file_data.Vsum) > 1e-8)

View File

@ -65,10 +65,10 @@ HamiltonianOperations RealDenseHamiltonian::getHamiltonianOperations(bool pureSD
assert(PsiT.size() % 2 == 0);
int nspins = ((type != COLLINEAR) ? 1 : 2);
int ndet = PsiT.size() / nspins;
int nup = PsiT[0].size(0);
int nup = std::get<0>(PsiT[0].sizes());
int ndown = 0;
if (nspins == 2)
ndown = PsiT[1].size(0);
ndown = std::get<0>(PsiT[1].sizes());
int NEL = nup + ndown;
// distribute work over equivalent nodes in TGprop.TG() across TG.Global()
@ -186,11 +186,11 @@ HamiltonianOperations RealDenseHamiltonian::getHamiltonianOperations(bool pureSD
<< " Problems reading /Hamiltonian/DenseFactorized/L. \n";
APP_ABORT("");
}
if (Likn.size(0) != NMO * NMO || Likn.size(1) != local_ncv)
if (std::get<0>(Likn.sizes()) != NMO * NMO || std::get<1>(Likn.sizes()) != local_ncv)
{
app_error() << " Error in RealDenseHamiltonian::getHamiltonianOperations():"
<< " Problems reading /Hamiltonian/DenseFactorized/L. \n"
<< " Unexpected dimensions: " << Likn.size(0) << " " << Likn.size(1) << std::endl;
<< " Unexpected dimensions: " << std::get<0>(Likn.sizes()) << " " << std::get<1>(Likn.sizes()) << std::endl;
APP_ABORT("");
}
dump.pop();

View File

@ -185,11 +185,11 @@ HamiltonianOperations RealDenseHamiltonian_v2::getHamiltonianOperations(bool pur
<< " Problems reading /Hamiltonian/DenseFactorized/L. \n";
APP_ABORT("");
}
if (Likn.size(0) != NMO * NMO || Likn.size(1) != local_ncv)
if (std::get<0>(Likn.sizes()) != NMO * NMO || std::get<1>(Likn.sizes()) != local_ncv)
{
app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
<< " Problems reading /Hamiltonian/DenseFactorized/L. \n"
<< " Unexpected dimensins: " << Likn.size(0) << " " << Likn.size(1) << std::endl;
<< " Unexpected dimensins: " << std::get<0>(Likn.sizes()) << " " << std::get<1>(Likn.sizes()) << std::endl;
APP_ABORT("");
}
dump.pop();

View File

@ -244,7 +244,7 @@ HamiltonianOperations THCHamiltonian::getHamiltonianOperations(bool pureSD,
auto itT = Tuv.origin();
for (size_t i = 0; i < Muv.num_elements(); ++i, ++itT, ++itM)
*(itT) = ma::conj(*itT) * (*itM);
boost::multi::array<SPValueType, 2> T_({static_cast<boost::multi::size_t>(Tuv.size(1)), NMO});
boost::multi::array<SPValueType, 2> T_({static_cast<boost::multi::size_t>(std::get<1>(Tuv.sizes())), NMO});
ma::product(T(Tuv), H(Piu__), T_);
ma::product(SPValueType(-0.5), T(T_), T(Piu__({0, long(NMO)}, {long(c0), long(cN)})), SPValueType(0.0), v0_);
@ -295,7 +295,7 @@ HamiltonianOperations THCHamiltonian::getHamiltonianOperations(bool pureSD,
auto itT = Tuv.origin();
for (size_t i = 0; i < Muv.num_elements(); ++i, ++itT, ++itM)
*(itT) = ma::conj(*itT) * (*itM);
boost::multi::array<SPValueType, 2> T_({static_cast<boost::multi::size_t>(Tuv.size(1)), NMO});
boost::multi::array<SPValueType, 2> T_({static_cast<boost::multi::size_t>(std::get<1>(Tuv.sizes())), NMO});
ma::product(T(Tuv), H(Piu), T_);
ma::product(SPValueType(-0.5), T(T_), T(Piu({0, long(NMO)}, {long(c0), long(cN)})), SPValueType(0.0), v0_);

View File

@ -342,7 +342,7 @@ inline void rotateHijkl(std::string& type,
{
// Qk[norb*NAEA,nvec]
// Rl[nvec,norb*NAEA]
int n0_, n1_, sz_ = Qk.size(0);
int n0_, n1_, sz_ = Qk.size();
std::tie(n0_, n1_) = FairDivideBoundary(coreid, sz_, ncores);
if (n1_ - n0_ > 0)
ma::transpose(Qk.sliced(n0_, n1_), Rl(Rl.extension(0), {n0_, n1_}));
@ -798,7 +798,7 @@ inline void rotateHijkl_single_node(std::string& type,
{
// Qk[norb*NAEA,nvec]
// Rl[nvec,norb*NAEA]
int n0_, n1_, sz_ = Qk.size(0);
int n0_, n1_, sz_ = std::get<0>(Qk.sizes());
std::tie(n0_, n1_) = FairDivideBoundary(coreid, sz_, ncores);
if (n1_ - n0_ > 0)
ma::transpose(Qk.sliced(n0_, n1_), Rl(Rl.extension(0), {n0_, n1_}));

View File

@ -51,9 +51,9 @@ inline void count_Qk_x_Rl(WALKER_TYPES walker_type,
const SPRealType cut)
{
using Type = typename std::decay<MatTa>::type::element;
assert(Qk.size(0) == Ta.size(0));
assert(Qk.size(1) == Rl.size(0));
assert(Rl.size(1) == Rl.size(1));
assert(std::get<0>(Qk.sizes()) == std::get<0>(Ta.sizes()));
assert(std::get<1>(Qk.sizes()) == std::get<0>(Rl.sizes()));
assert(std::get<1>(Rl.sizes()) == std::get<1>(Rl.sizes()));
int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
bool amIAlpha = true;
@ -61,13 +61,13 @@ inline void count_Qk_x_Rl(WALKER_TYPES walker_type,
amIAlpha = false;
int bl0 = -1, blN = -1;
int nwork = std::min(int(Rl.size(1)), ncores);
int nwork = std::min(int(std::get<1>(Rl.sizes())), ncores);
if (coreid < nwork)
std::tie(bl0, blN) = FairDivideBoundary(coreid, int(Rl.size(1)), nwork);
std::tie(bl0, blN) = FairDivideBoundary(coreid, int(std::get<1>(Rl.sizes())), nwork);
int ka0 = -1, kaN = -1;
nwork = std::min(int(Qk.size(0)), ncores);
nwork = std::min(int(std::get<0>(Qk.sizes())), ncores);
if (coreid < nwork)
std::tie(ka0, kaN) = FairDivideBoundary(coreid, int(Qk.size(0)), nwork);
std::tie(ka0, kaN) = FairDivideBoundary(coreid, int(std::get<0>(Qk.sizes())), nwork);
Type four(4.0);
Type two(2.0);
@ -211,9 +211,9 @@ inline void Qk_x_Rl(WALKER_TYPES walker_type,
const SPRealType cut)
{
using Type = typename std::decay<MatTa>::type::element;
assert(Qk.size(0) == Ta.size(0));
assert(Qk.size(1) == Rl.size(0));
assert(Rl.size(1) == Rl.size(1));
assert(std::get<0>(Qk.sizes()) == std::get<0>(Ta.sizes()));
assert(std::get<1>(Qk.sizes()) == std::get<0>(Rl.sizes()));
assert(std::get<1>(Rl.sizes()) == std::get<1>(Rl.sizes()));
int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
bool amIAlpha = true;
@ -222,12 +222,12 @@ inline void Qk_x_Rl(WALKER_TYPES walker_type,
int bl0 = -1, blN = -1;
int ka0 = -1, kaN = -1;
int nwork = std::min(int(Rl.size(1)), ncores);
int nwork = std::min(int(std::get<1>(Rl.sizes())), ncores);
if (coreid < nwork)
std::tie(bl0, blN) = FairDivideBoundary(coreid, int(Rl.size(1)), nwork);
nwork = std::min(int(Qk.size(0)), ncores);
std::tie(bl0, blN) = FairDivideBoundary(coreid, int(std::get<1>(Rl.sizes())), nwork);
nwork = std::min(int(std::get<0>(Qk.sizes())), ncores);
if (coreid < nwork)
std::tie(ka0, kaN) = FairDivideBoundary(coreid, int(Qk.size(0)), nwork);
std::tie(ka0, kaN) = FairDivideBoundary(coreid, int(std::get<0>(Qk.sizes())), nwork);
Type four(4.0);
Type two(2.0);

View File

@ -166,6 +166,7 @@ public:
return static_cast<size_type>(pointers_end_[i] - pointers_begin_[i]);
}
auto shape() const { return std::array<size_type, 2>{{size(), size2_}}; }
auto sizes() const {return shape();}
template<typename integer_type = size_type>
auto size(integer_type d) const
{
@ -1150,6 +1151,7 @@ protected:
auto num_non_zero_elements() const { return size_type(self_.pointers_end_[i_] - self_.pointers_begin_[i_]); }
auto capacity() const { return self_.capacity(i_); }
auto shape() const { return std::array<size_type, 1>{{self_.size2_}}; }
auto sizes() const {return shape();}
template<typename integer_type = size_type>
auto size(integer_type d) const
{

View File

@ -60,8 +60,8 @@ CSR construct_csr_matrix_single_input(MultiArray2D&& M, double cutoff, char TA,
{
if (TA == 'N')
{
nr = M.size(0);
nc = M.size(1);
nr = std::get<0>(M.sizes());
nc = std::get<1>(M.sizes());
counts.resize(nr);
for (int_type i = 0; i < nr; i++)
for (int_type j = 0; j < nc; j++)
@ -70,11 +70,11 @@ CSR construct_csr_matrix_single_input(MultiArray2D&& M, double cutoff, char TA,
}
else
{
nr = M.size(1);
nc = M.size(0);
nr = std::get<1>(M.sizes());
nc = std::get<0>(M.sizes());
counts.resize(nr);
for (int_type i = 0; i < M.size(0); i++)
for (int_type j = 0; j < M.size(1); j++)
for (int_type i = 0; i < std::get<0>(M.sizes()); i++)
for (int_type j = 0; j < std::get<1>(M.sizes()); j++)
if (std::abs(M[i][j]) > cutoff)
++counts[j];
}
@ -99,15 +99,15 @@ CSR construct_csr_matrix_single_input(MultiArray2D&& M, double cutoff, char TA,
}
else if (TA == 'T')
{
for (int_type i = 0; i < M.size(1); i++)
for (int_type j = 0; j < M.size(0); j++)
for (int_type i = 0; i < std::get<1>(M.sizes()); i++)
for (int_type j = 0; j < std::get<0>(M.sizes()); j++)
if (std::abs(M[j][i]) > cutoff)
csr_mat.emplace_back({i, j}, static_cast<typename CSR::value_type>(M[j][i]));
}
else if (TA == 'H')
{
for (int_type i = 0; i < M.size(1); i++)
for (int_type j = 0; j < M.size(0); j++)
for (int_type i = 0; i < std::get<1>(M.sizes()); i++)
for (int_type j = 0; j < std::get<0>(M.sizes()); j++)
if (std::abs(M[j][i]) > cutoff)
csr_mat.emplace_back({i, j}, static_cast<typename CSR::value_type>(ma::conj(M[j][i])));
}

View File

@ -71,15 +71,15 @@ inline void write_distributed_MA(MultiArray& A,
std::vector<size_t> ndim(4 * nnodes_per_TG);
ndim[0] = offset[0];
ndim[1] = offset[1];
ndim[2] = A.size(0);
ndim[3] = A.size(1);
ndim[2] = std::get<0>(A.sizes());
ndim[3] = std::get<1>(A.sizes());
TG.TG_Cores().all_reduce_in_place_n(ndim.begin(), ndim.size(), std::plus<>());
// write local piece
{
using Mat_ref = boost::multi::array_ref<value_type, 2>;
Mat_ref A_(to_address(A.origin()), A.extensions());
hyperslab_proxy<Mat_ref, 2> slab(A_, gdim, std::array<size_t, 2>{size_t(A.size(0)), size_t(A.size(1))}, offset);
hyperslab_proxy<Mat_ref, 2> slab(A_, gdim, std::array<size_t, 2>{size_t(std::get<0>(A.sizes())), size_t(std::get<1>(A.sizes()))}, offset);
dump.write(slab, name);
}
@ -101,8 +101,8 @@ inline void write_distributed_MA(MultiArray& A,
// all tasks on the TG have a section of the matrix
ndim[4 * TG.TG_Cores().rank()] = offset[0];
ndim[4 * TG.TG_Cores().rank() + 1] = offset[1];
ndim[4 * TG.TG_Cores().rank() + 2] = A.size(0);
ndim[4 * TG.TG_Cores().rank() + 3] = A.size(1);
ndim[4 * TG.TG_Cores().rank() + 2] = std::get<0>(A.sizes());
ndim[4 * TG.TG_Cores().rank() + 3] = std::get<1>(A.sizes());
TG.TG_Cores().all_reduce_in_place_n(ndim.begin(), ndim.size(), std::plus<>());
TG.TG_Cores().send_n(to_address(A.origin()), A.num_elements(), 0, TG.TG_Cores().rank());
}

View File

@ -17,6 +17,8 @@
#include <complex>
#include <type_traits>
#include "multi/array.hpp"
using std::complex;
using std::string;
@ -44,8 +46,8 @@ void verify_approx(M1 const& A, M2 const& B)
// casting in case operator[] returns a fancy reference
using element1 = typename std::decay<M1>::type::element;
using element2 = typename std::decay<M2>::type::element;
REQUIRE(A.size(0) == B.size(0));
for (int i = 0; i < A.size(0); i++)
REQUIRE(std::get<0>(A.sizes()) == std::get<0>(B.sizes()));
for (int i = 0; i < std::get<0>(A.sizes()); i++)
myREQUIRE(element1(A[i]), element2(B[i]));
}
@ -56,8 +58,8 @@ template<class M1,
typename = void>
void verify_approx(M1 const& A, M2 const& B)
{
REQUIRE(A.size(0) == B.size(0));
for (int i = 0; i < A.size(0); i++)
REQUIRE(A.size() == B.size());
for (int i = 0; i < A.size(); i++)
verify_approx(A[i], B[i]);
}

View File

@ -42,19 +42,19 @@ template<class T,
MultiArray1D axpy(char TA, T a, SparseArray1D&& x, MultiArray1D&& y)
{
using ma::conj;
assert(x.size(0) == y.size(0));
assert(std::get<0>(x.sizes()) == std::get<0>(y.sizes()));
auto vals = x.non_zero_values_data();
auto cols = x.non_zero_indices2_data();
if (TA == 'C')
for (std::size_t i = 0, iend = x.num_non_zero_elements(); i < iend; ++i, ++vals, ++cols)
{
assert(*cols >= 0 && *cols < y.size(0));
assert(*cols >= 0 && *cols < y.size());
y[*cols] += ma::conj(*vals) * a;
}
else
for (std::size_t i = 0, iend = x.num_non_zero_elements(); i < iend; ++i, ++vals, ++cols)
{
assert(*cols >= 0 && *cols < y.size(0));
assert(*cols >= 0 && *cols < y.size());
y[*cols] += (*vals) * a;
}
return std::forward<MultiArray1D>(y);
@ -378,8 +378,8 @@ MultiArray2D transpose(csr_matrix&& A, MultiArray2D&& AT)
{
using integer = typename std::decay<csr_matrix>::type::index_type;
using Type = typename std::decay<MultiArray2D>::type::element;
assert(A.size(0) == AT.size(1));
assert(A.size(1) == AT.size(0));
assert(std::get<0>(A.sizes()) == std::get<1>(AT.sizes()));
assert(std::get<1>(A.sizes()) == std::get<0>(AT.sizes()));
auto& comm = *A.getAlloc().commP_;
integer r0, rN, nrows = integer(A.size(0));
integer rank = comm.rank(), size = comm.size();

View File

@ -48,14 +48,14 @@ MultiArray2DY&& copy(MultiArray2DX&& x, MultiArray2DY&& y)
assert(x.stride(1) == 1);
assert(y.stride(1) == 1);
assert(x.size() == y.size());
assert(x.size(1) == y.size(1));
if ((x.stride() == x.size(1)) && (y.stride() == y.size(1)))
assert(std::get<1>(x.sizes()) == std::get<1>(y.sizes()));
if ((x.stride() == std::get<1>(x.sizes())) && (y.stride() == std::get<1>(y.sizes())))
{
copy(x.num_elements(), pointer_dispatch(x.origin()), 1, pointer_dispatch(y.origin()), 1);
}
else
{
copy2D(x.size(), x.size(1), pointer_dispatch(x.origin()), x.stride(), pointer_dispatch(y.origin()), y.stride());
copy2D(x.size(), std::get<1>(x.sizes()), pointer_dispatch(x.origin()), x.stride(), pointer_dispatch(y.origin()), y.stride());
}
return std::forward<MultiArray2DY>(y);
}
@ -70,16 +70,16 @@ MultiArrayNDY&& copy(MultiArrayNDX&& x, MultiArrayNDY&& y)
{
#ifndef NDEBUG
// only on contiguous arrays
long sz(x.size());
for (int i = 1; i < int(std::decay<MultiArrayNDX>::type::dimensionality); ++i)
sz *= x.size(i);
assert(x.num_elements() == sz);
assert(x.stride(std::decay<MultiArrayNDX>::type::dimensionality - 1) == 1);
sz = y.size();
for (int i = 1; i < int(std::decay<MultiArrayNDY>::type::dimensionality); ++i)
sz *= y.size(i);
assert(y.num_elements() == sz);
assert(y.stride(std::decay<MultiArrayNDY>::type::dimensionality - 1) == 1);
// long sz(x.size());
// for (int i = 1; i < int(std::decay<MultiArrayNDX>::type::dimensionality); ++i)
// sz *= x.size(i);
// assert(x.num_elements() == sz);
assert(std::get<std::decay<MultiArrayNDX>::type::dimensionality - 1>(x.strides()) == 1);
// sz = y.size();
// for (int i = 1; i < int(std::decay<MultiArrayNDY>::type::dimensionality); ++i)
// sz *= y.size(i);
// assert(y.num_elements() == sz);
assert(std::get<std::decay<MultiArrayNDY>::type::dimensionality - 1>(y.strides()) == 1);
assert(x.num_elements() == y.num_elements());
#endif
copy(x.num_elements(), pointer_dispatch(x.origin()), 1, pointer_dispatch(y.origin()), 1);
@ -104,9 +104,9 @@ template<class MultiArray2Dx,
typename = void>
typename std::decay<MultiArray2Dx>::type::element dot(MultiArray2Dx&& x, MultiArray2Dy&& y)
{
assert(x.stride() == x.size(1)); // only on contiguous arrays
assert(x.stride() == std::get<1>(x.sizes())); // only on contiguous arrays
assert(x.stride(1) == 1); // only on contiguous arrays
assert(y.stride() == y.size(1)); // only on contiguous arrays
assert(y.stride() == std::get<1>(y.sizes())); // only on contiguous arrays
assert(y.stride(1) == 1); // only on contiguous arrays
assert(x.num_elements() == y.num_elements());
return dot(x.num_elements(), pointer_dispatch(x.origin()), 1, pointer_dispatch(y.origin()), 1);
@ -129,11 +129,11 @@ template<class T,
MultiArrayND&& scal(T a, MultiArrayND&& x)
{
#ifndef NDEBUG
long sz(x.size());
for (int i = 1; i < int(std::decay<MultiArrayND>::type::dimensionality); ++i)
sz *= x.size(i);
assert(x.num_elements() == sz);
assert(x.stride(std::decay<MultiArrayND>::type::dimensionality - 1) == 1); // only on contiguous arrays
// long sz(x.size());
// for (int i = 1; i < int(std::decay<MultiArrayND>::type::dimensionality); ++i)
// sz *= x.size(i);
// assert(x.num_elements() == sz);
assert(std::get<std::decay<MultiArrayND>::type::dimensionality - 1>(x.strides()) == 1); // only on contiguous arrays
#endif
scal(x.num_elements(), a, pointer_dispatch(x.origin()), 1);
return std::forward<MultiArrayND>(x);
@ -183,7 +183,7 @@ MultiArray2DB&& axpy(T x, MultiArray2DA const& a, MultiArray2DB&& b)
assert(a.num_elements() == b.num_elements());
assert(a.stride() == a.size(1)); // only on contiguous arrays
assert(a.stride(1) == 1); // only on contiguous arrays
assert(b.stride() == b.size(1)); // only on contiguous arrays
assert(b.stride() == std::get<1>(b.sizes())); // only on contiguous arrays
assert(b.stride(1) == 1); // only on contiguous arrays
axpy(a.num_elements(), x, pointer_dispatch(a.origin()), 1, pointer_dispatch(b.origin()), 1);
return std::forward<MultiArray2DB>(b);
@ -201,11 +201,11 @@ MultiArray1DY&& gemv(T alpha, MultiArray2DA const& A, MultiArray1DX const& x, T
{
assert((IN == 'N') || (IN == 'T') || (IN == 'C'));
if (IN == 'T' or IN == 'C')
assert(x.size() == A.size(1) and y.size() == A.size());
assert(x.size() == std::get<1>(A.sizes()) and y.size() == A.size());
else if (IN == 'N')
assert(x.size() == A.size() and y.size() == A.size(1));
assert(x.size() == A.size() and y.size() == std::get<1>(A.sizes()));
assert(A.stride(1) == 1); // gemv is not implemented for arrays with non-leading stride != 1
int M = A.size(1);
int M = std::get<1>(A.sizes());
int N = A.size();
gemv(IN, M, N, alpha, pointer_dispatch(A.origin()), A.stride(), pointer_dispatch(x.origin()), x.stride(), beta,
pointer_dispatch(y.origin()), y.stride());
@ -244,31 +244,31 @@ MultiArray2DC&& gemm(T alpha, MultiArray2DA const& a, MultiArray2DB const& b, T
int K = -1;
if (TA == 'N' and TB == 'N')
{
M = a.size(1);
M = std::get<1>(a.sizes());
N = b.size();
K = a.size();
assert(a.size() == b.size(1) and c.size() == b.size() and c.size(1) == a.size(1));
assert(a.size() == std::get<1>(b.sizes()) and c.size() == b.size() and std::get<1>(c.sizes()) == std::get<1>(a.sizes()));
}
if ((TA == 'T' or TA == 'C') and (TB == 'T' or TB == 'C'))
{
M = a.size();
N = b.size(1);
K = a.size(1);
assert(a.size(1) == b.size() and c.size() == b.size(1) and c.size(1) == a.size());
N = std::get<1>(b.sizes());
K = std::get<1>(a.sizes());
assert(std::get<1>(a.sizes()) == b.size() and c.size() == std::get<1>(b.sizes()) and std::get<1>(c.sizes()) == a.size());
}
if ((TA == 'T' or TA == 'C') and TB == 'N')
{
M = a.size();
N = b.size();
K = a.size(1);
assert(a.size(1) == b.size(1) and c.size() == b.size() and c.size(1) == a.size());
K = std::get<1>(a.sizes());
assert(std::get<1>(a.sizes()) == std::get<1>(b.sizes()) and c.size() == b.size() and std::get<1>(c.sizes()) == a.size());
}
if (TA == 'N' and (TB == 'T' or TB == 'C'))
{
M = std::get<1>(a.sizes());
N = std::get<1>(b.sizes());
K = a.size();
assert(a.size() == b.size() and c.size() == b.size(1) and c.size(1) == a.size(1));
assert(a.size() == b.size() and c.size() == std::get<1>(b.sizes()) and std::get<1>(c.sizes()) == std::get<1>(a.sizes()));
}
gemm(TA, TB, M, N, K, alpha, pointer_dispatch(a.origin()), a.stride(), pointer_dispatch(b.origin()), b.stride(),
beta, pointer_dispatch(c.origin()), c.stride());
@ -299,31 +299,31 @@ MultiArray3DC&& gemmStridedBatched(T alpha, MultiArray3DA const& a, MultiArray3D
int K = -1;
if (TA == 'N' and TB == 'N')
{
M = a.size(2);
N = b.size(1);
K = a.size(1);
assert(a.size(1) == b.size(2) and c.size(1) == b.size(1) and c.size(2) == a.size(2));
M = std::get<2>(a.sizes());
N = std::get<1>(b.sizes());
K = std::get<1>(a.sizes());
assert(std::get<1>(a.sizes()) == std::get<2>(b.sizes()) and std::get<1>(c.sizes()) == std::get<1>(b.sizes()) and std::get<2>(c.sizes()) == std::get<2>(a.sizes()));
}
if ((TA == 'T' or TA == 'C') and (TB == 'T' or TB == 'C'))
{
M = a.size(1);
N = b.size(2);
K = a.size(2);
assert(a.size(2) == b.size(1) and c.size(1) == b.size(2) and c.size(2) == a.size(1));
M = std::get<1>(a.sizes());
N = std::get<2>(b.sizes());
K = std::get<2>(a.sizes());
assert(std::get<2>(a.sizes()) == std::get<1>(b.sizes()) and std::get<1>(c.sizes()) == std::get<2>(b.sizes()) and std::get<2>(c.sizes()) == std::get<1>(a.sizes()));
}
if ((TA == 'T' or TA == 'C') and TB == 'N')
{
M = a.size(1);
N = b.size(1);
K = a.size(2);
assert(a.size(2) == b.size(2) and c.size(1) == b.size(1) and c.size(2) == a.size(1));
M = std::get<1>(a.sizes());
N = std::get<1>(b.sizes());
K = std::get<2>(a.sizes());
assert(std::get<2>(a.sizes()) == std::get<2>(b.sizes()) and std::get<1>(c.sizes()) == std::get<1>(b.sizes()) and std::get<2>(c.sizes()) == std::get<1>(a.sizes()));
}
if (TA == 'N' and (TB == 'T' or TB == 'C'))
{
M = a.size(2);
N = b.size(2);
K = a.size(1);
assert(a.size(1) == b.size(1) and c.size(1) == b.size(2) and c.size(2) == a.size(2));
M = std::get<2>(a.sizes());
N = std::get<2>(b.sizes());
K = std::get<1>(a.sizes());
assert(std::get<1>(a.sizes()) == std::get<1>(b.sizes()) and std::get<1>(c.sizes()) == std::get<2>(b.sizes()) and std::get<2>(c.sizes()) == std::get<2>(a.sizes()));
}
gemmStridedBatched(TA, TB, M, N, K, alpha, pointer_dispatch(a.origin()), a.stride(1), a.stride(),
pointer_dispatch(b.origin()), b.stride(1), b.stride(), beta, pointer_dispatch(c.origin()),
@ -355,25 +355,25 @@ MultiArray2DC&& geam(T alpha, MultiArray2DA const& a, T beta, MultiArray2DB cons
assert((TB == 'N') || (TB == 'T') || (TB == 'C'));
if (TA == 'N' and TB == 'N')
{
assert(a.size() == c.size() and a.size(1) == c.size(1));
assert(b.size() == c.size() and b.size(1) == c.size(1));
assert(a.size() == c.size() and std::get<1>(a.sizes()) == std::get<1>(c.sizes()));
assert(b.size() == c.size() and std::get<1>(b.sizes()) == std::get<1>(c.sizes()));
}
if ((TA == 'T' or TA == 'C') and (TB == 'T' or TB == 'C'))
{
assert(a.size(1) == c.size() and a.size() == c.size(1));
assert(b.size(1) == c.size() and b.size() == c.size(1));
assert(std::get<1>(a.sizes()) == c.size() and a.size() == std::get<1>(c.sizes()));
assert(std::get<1>(b.sizes()) == c.size() and b.size() == std::get<1>(c.sizes()));
}
if ((TA == 'T' or TA == 'C') and TB == 'N')
{
assert(a.size(1) == c.size() and a.size() == c.size(1));
assert(b.size() == c.size() and b.size(1) == c.size(1));
assert(std::get<1>(a.sizes()) == c.size() and a.size() == std::get<1>(c.sizes()));
assert(b.size() == c.size() and std::get<1>(b.sizes()) == std::get<1>(c.sizes()));
}
if (TA == 'N' and (TB == 'T' or TB == 'C'))
{
assert(a.size() == c.size() and a.size(1) == c.size(1));
assert(b.size(1) == c.size() and b.size() == c.size(1));
assert(a.size() == c.size() and std::get<1>(a.sizes()) == std::get<1>(c.sizes()));
assert(std::get<1>(b.sizes()) == c.size() and b.size() == std::get<1>(c.sizes()));
}
geam(TA, TB, c.size(1), c.size(), alpha, pointer_dispatch(a.origin()), a.stride(), beta,
geam(TA, TB, std::get<1>(c.sizes()), c.size(), alpha, pointer_dispatch(a.origin()), a.stride(), beta,
pointer_dispatch(b.origin()), b.stride(), pointer_dispatch(c.origin()), c.stride());
return std::forward<MultiArray2DC>(c);
}
@ -391,13 +391,13 @@ MultiArray2DC&& geam(T alpha, MultiArray2DA const& a, MultiArray2DC&& c)
assert((TA == 'N') || (TA == 'T') || (TA == 'C'));
if (TA == 'N')
{
assert(a.size() == c.size() and a.size(1) == c.size(1));
assert(a.size() == c.size() and std::get<1>(a.sizes()) == std::get<1>(c.sizes()));
}
if ((TA == 'T' or TA == 'C'))
{
assert(a.size(1) == c.size() and a.size() == c.size(1));
assert(std::get<1>(a.sizes()) == c.size() and a.size() == std::get<1>(c.sizes()));
}
geam(TA, TA, c.size(1), c.size(), alpha, pointer_dispatch(a.origin()), a.stride(), T(0),
geam(TA, TA, std::get<1>(c.sizes()), c.size(), alpha, pointer_dispatch(a.origin()), a.stride(), T(0),
pointer_dispatch(a.origin()), a.stride(), pointer_dispatch(c.origin()), c.stride());
return std::forward<MultiArray2DC>(c);
}

View File

@ -51,10 +51,10 @@ template<class T,
typename = typename std::enable_if<std::decay<MultiArray1D>::type::dimensionality == 1>::type>
void adotpby(T const alpha, MultiArray2Dx const& x, MultiArray2Dy const& y, Q const beta, MultiArray1D res)
{
if (x.size(0) != y.size(0) || x.size(0) != res.size(0) || x.size(1) != y.size(1) || x.stride(1) != 1 ||
if (std::get<0>(x.sizes()) != std::get<0>(y.sizes()) || std::get<0>(x.sizes()) != std::get<0>(res.sizes()) || std::get<1>(x.sizes()) != std::get<1>(y.sizes()) || x.stride(1) != 1 ||
y.stride(1) != 1)
throw std::runtime_error(" Error: Inconsistent matrix dimensions in adotpby(2D).\n");
strided_adotpby(x.size(0), x.size(1), alpha, pointer_dispatch(x.origin()), x.stride(0), pointer_dispatch(y.origin()),
strided_adotpby(std::get<0>(x.sizes()), std::get<1>(x.sizes()), alpha, pointer_dispatch(x.origin()), x.stride(0), pointer_dispatch(y.origin()),
y.stride(0), beta, to_address(res.origin()), res.stride(0));
}
@ -80,9 +80,9 @@ MultiArray2DB&& axty(T const alpha, MultiArray2DA const& A, MultiArray2DB&& B)
{
assert(A.num_elements() == B.num_elements());
assert(A.stride(1) == 1);
assert(A.stride(0) == A.size(1));
assert(A.stride(0) == std::get<1>(A.sizes()));
assert(B.stride(1) == 1);
assert(B.stride(0) == B.size(1));
assert(B.stride(0) == std::get<1>(B.sizes()));
axty(A.num_elements(), alpha, pointer_dispatch(A.origin()), 1, pointer_dispatch(B.origin()), 1);
return B;
}
@ -99,10 +99,10 @@ template<class T,
MultiArray2DB&& acAxpbB(T const alpha, MultiArray2DA const& A, MultiArray1D const& x, T const beta, MultiArray2DB&& B)
{
assert(A.num_elements() == B.num_elements());
assert(A.size(0) == B.size(0));
assert(A.size(1) == B.size(1));
assert(A.size(1) == x.size(0));
acAxpbB(A.size(1), A.size(0), alpha, pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(x.origin()),
assert(std::get<0>(A.sizes()) == std::get<0>(B.sizes()));
assert(std::get<1>(A.sizes()) == std::get<1>(B.sizes()));
assert(std::get<1>(A.sizes()) == std::get<0>(x.sizes()));
acAxpbB(std::get<1>(A.sizes()), std::get<0>(A.sizes()), alpha, pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(x.origin()),
x.stride(0), beta, pointer_dispatch(B.origin()), B.stride(0));
return B;
}
@ -114,8 +114,8 @@ template<class T,
typename = typename std::enable_if<std::decay<MultiArray1Dy>::type::dimensionality == 1>::type>
MultiArray1Dy&& adiagApy(T const alpha, MultiArray2DA const& A, MultiArray1Dy&& y)
{
assert(A.size(0) == A.size(1));
assert(A.size(0) == y.size());
assert(std::get<0>(A.sizes()) == std::get<1>(A.sizes()));
assert(std::get<0>(A.sizes()) == y.size());
adiagApy(y.size(), alpha, pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(y.origin()), y.stride(0));
return y;
}
@ -134,7 +134,7 @@ auto sum(MultiArray2D const& A)
{
assert(A.stride(1) == 1);
// blas call assumes fortran ordering
return sum(A.size(1), A.size(0), pointer_dispatch(A.origin()), A.stride(0));
return sum(std::get<1>(A.sizes()), std::get<0>(A.sizes()), pointer_dispatch(A.origin()), A.stride(0));
}
template<class MultiArray3D,
@ -144,8 +144,8 @@ template<class MultiArray3D,
auto sum(MultiArray3D const& A)
{
// only arrays and array_refs for now
assert(A.stride(0) == A.size(1) * A.size(2));
assert(A.stride(1) == A.size(2));
assert(A.stride(0) == std::get<1>(A.sizes()) * std::get<2>(A.sizes()));
assert(A.stride(1) == std::get<2>(A.sizes()));
assert(A.stride(2) == 1);
return sum(A.num_elements(), pointer_dispatch(A.origin()), 1);
}
@ -158,9 +158,9 @@ template<class MultiArray4D,
auto sum(MultiArray4D const& A)
{
// only arrays and array_refs for now
assert(A.stride(0) == A.size(1) * A.size(2) * A.size(3));
assert(A.stride(1) == A.size(2) * A.size(3));
assert(A.stride(2) == A.size(3));
assert(A.stride(0) == std::get<1>(A.sizes()) * std::get<2>(A.sizes()) * std::get<3>(A.sizes()));
assert(A.stride(1) == std::get<2>(A.sizes()) * std::get<3>(A.sizes()));
assert(A.stride(2) == std::get<3>(A.sizes()));
assert(A.stride(3) == 1);
return sum(A.num_elements(), pointer_dispatch(A.origin()), 1);
}
@ -170,7 +170,7 @@ template<class T,
typename = typename std::enable_if<std::decay<MultiArray1D>::type::dimensionality == 1>>
MultiArray1D&& setVector(T alpha, MultiArray1D&& a)
{
set1D(a.size(0), alpha, pointer_dispatch(a.origin()), a.stride(0));
set1D(std::get<0>(a.sizes()), alpha, pointer_dispatch(a.origin()), a.stride(0));
return std::forward<MultiArray1D>(a);
}
@ -183,7 +183,7 @@ void zero_complex_part(MultiArray1D&& a)
template<class MultiArray2D, typename = std::enable_if_t<std::decay<MultiArray2D>::type::dimensionality == 2>>
MultiArray2D&& set_identity(MultiArray2D&& m)
{
set_identity(m.size(1), m.size(0), pointer_dispatch(m.origin()), m.stride(0));
set_identity(std::get<1>(m.sizes()), std::get<0>(m.sizes()), pointer_dispatch(m.origin()), m.stride(0));
return std::forward<MultiArray2D>(m);
}
@ -192,7 +192,7 @@ template<class MultiArray3D,
typename = void>
MultiArray3D&& set_identity(MultiArray3D&& m)
{
set_identity_strided(m.size(0), m.stride(0), m.size(2), m.size(1), pointer_dispatch(m.origin()), m.stride(1));
set_identity_strided(std::get<0>(m.sizes()), m.stride(0), std::get<2>(m.sizes()), std::get<1>(m.sizes()), pointer_dispatch(m.origin()), m.stride(1));
return std::forward<MultiArray3D>(m);
}
@ -202,7 +202,7 @@ template<class T,
MultiArray2D&& fill(MultiArray2D&& m, T const& value)
{
using qmcplusplus::afqmc::fill2D;
fill2D(m.size(0), m.size(1), pointer_dispatch(m.origin()), m.stride(0), value);
fill2D(std::get<0>(m.sizes()), std::get<1>(m.sizes()), pointer_dispatch(m.origin()), m.stride(0), value);
return std::forward<MultiArray2D>(m);
}
@ -213,10 +213,10 @@ template<class MultiArray3D,
typename = typename std::enable_if<std::decay<MultiArray2D>::type::dimensionality == 2>>
void get_diagonal_strided(MultiArray3D const& B, MultiArray2D&& A)
{
if (A.size(0) != B.size(0) || A.size(1) != B.size(1) || A.size(1) != B.size(2) || A.stride(1) != 1 ||
if (std::get<0>(A.sizes()) != std::get<0>(B.sizes()) || std::get<1>(A.sizes()) != std::get<1>(B.sizes()) || std::get<1>(A.sizes()) != std::get<2>(B.sizes()) || A.stride(1) != 1 ||
B.stride(2) != 1)
throw std::runtime_error(" Error: Inconsistent matrix dimensions in get_diagonal_strided.\n");
get_diagonal_strided(A.size(0), A.size(1), pointer_dispatch(B.origin()), B.stride(1), B.stride(0),
get_diagonal_strided(std::get<0>(A.sizes()), std::get<1>(A.sizes()), pointer_dispatch(B.origin()), B.stride(1), B.stride(0),
pointer_dispatch(A.origin()), A.stride(0));
}
@ -231,13 +231,13 @@ void Matrix2MA(char TA, CSR const& A, MultiArray2D& M)
assert(TA == 'N' || TA == 'H' || TA == 'T' || TA == 'Z');
if (TA == 'N' || TA == 'Z')
{
if (M.size(0) != A.size(0) or M.size(1) != A.size(1))
if (std::get<0>(M.sizes()) != std::get<0>(A.sizes()) or std::get<1>(M.sizes()) != std::get<1>(A.sizes()))
M.reextent({static_cast<typename MultiArray2D::size_type>(A.size(0)), static_cast<typename MultiArray2D::size_type>(A.size(1))});
}
else if (TA == 'T' || TA == 'H')
{
if (M.size(0) != A.size(1) or M.size(1) != A.size(0))
M.reextent({static_cast<typename MultiArray2D::size_type>(A.size(1)), static_cast<typename MultiArray2D::size_type>(A.size(0))});
if (std::get<0>(M.sizes()) != std::get<1>(A.sizes()) or std::get<1>(M.sizes()) != std::get<0>(A.sizes()))
M.reextent({static_cast<typename MultiArray2D::size_type>(std::get<1>(A.sizes())), static_cast<typename MultiArray2D::size_type>(std::get<0>(A.sizes()))});
}
else
{
@ -290,9 +290,9 @@ void Matrix2MAREF(char TA, CSR const& A, MultiArray2D& M)
using Type = typename MultiArray2D::element;
using int_type = typename CSR::int_type;
assert(TA == 'N' || TA == 'H' || TA == 'T' || TA == 'Z');
if ((TA == 'N' || TA == 'Z') && ((M.size(0) != A.size(0)) || (M.size(1) != A.size(1))))
if ((TA == 'N' || TA == 'Z') && ((std::get<0>(M.sizes()) != std::get<0>(A.sizes())) || (std::get<1>(M.sizes()) != std::get<1>(A.sizes()))))
throw std::runtime_error(" Error: Wrong dimensions in Matrix2MAREF.\n");
else if ((TA == 'T' || TA == 'H') && ((M.size(0) != A.size(1)) || (M.size(1) != A.size(0))))
else if ((TA == 'T' || TA == 'H') && ((std::get<0>(M.sizes()) != std::get<1>(A.sizes())) || (std::get<1>(M.sizes()) != std::get<0>(A.sizes()))))
throw std::runtime_error(" Error: Wrong dimensions in Matrix2MAREF.\n");
using std::fill_n;
fill_n(M.origin(), M.num_elements(), Type(0));
@ -348,13 +348,13 @@ void Matrix2MA(char TA, CSR const& A, MultiArray2D& M, Vector const& occups)
assert(TA == 'N' || TA == 'H' || TA == 'T' || TA == 'Z');
if (TA == 'N' || TA == 'Z')
{
if (M.size(0) != nrows || M.size(1) != A.size(1))
M.reextent({nrows, static_cast<typename MultiArray2D::size_type>(A.size(1))});
if (std::get<0>(M.sizes()) != nrows || std::get<1>(M.sizes()) != std::get<1>(A.sizes()))
M.reextent({nrows, static_cast<typename MultiArray2D::size_type>(std::get<1>(A.sizes()))});
}
else if (TA == 'T' || TA == 'H')
{
if (M.size(1) != nrows || M.size(0) != A.size(1))
M.reextent({static_cast<typename MultiArray2D::size_type>(A.size(1)), nrows});
if (std::get<1>(M.sizes()) != nrows || std::get<0>(M.sizes()) != std::get<1>(A.sizes()))
M.reextent({static_cast<typename MultiArray2D::size_type>(std::get<1>(A.sizes())), nrows});
}
else
throw std::runtime_error(" Error: Unknown operation in Matrix2MA.\n");
@ -369,7 +369,7 @@ void Matrix2MA(char TA, CSR const& A, MultiArray2D& M, Vector const& occups)
{
for (int i = 0; i < nrows; i++)
{
assert(occups[i] >= 0 && occups[i] < A.size(0));
assert(occups[i] >= 0 && occups[i] < A.size());
int ik = occups[i];
for (int ip = pbegin[ik]; ip < pend[ik]; ip++)
M[i][c0[ip - p0]] = static_cast<Type>(v0[ip - p0]);
@ -379,7 +379,7 @@ void Matrix2MA(char TA, CSR const& A, MultiArray2D& M, Vector const& occups)
{
for (int i = 0; i < nrows; i++)
{
assert(occups[i] >= 0 && occups[i] < A.size(0));
assert(occups[i] >= 0 && occups[i] < A.size());
int ik = occups[i];
for (int ip = pbegin[ik]; ip < pend[ik]; ip++)
M[i][c0[ip - p0]] = static_cast<Type>(ma::conj(v0[ip - p0]));
@ -389,7 +389,7 @@ void Matrix2MA(char TA, CSR const& A, MultiArray2D& M, Vector const& occups)
{
for (int i = 0; i < nrows; i++)
{
assert(occups[i] >= 0 && occups[i] < A.size(0));
assert(occups[i] >= 0 && occups[i] < A.size());
int ik = occups[i];
for (int ip = pbegin[ik]; ip < pend[ik]; ip++)
M[c0[ip - p0]][i] = static_cast<Type>(v0[ip - p0]);
@ -399,7 +399,7 @@ void Matrix2MA(char TA, CSR const& A, MultiArray2D& M, Vector const& occups)
{
for (int i = 0; i < nrows; i++)
{
assert(occups[i] >= 0 && occups[i] < A.size(0));
assert(occups[i] >= 0 && occups[i] < A.size());
int ik = occups[i];
for (int ip = pbegin[ik]; ip < pend[ik]; ip++)
M[c0[ip - p0]][i] = static_cast<Type>(ma::conj(v0[ip - p0]));
@ -419,13 +419,13 @@ void Matrix2MA(char TA, MA const& A, MultiArray2D& M)
assert(TA == 'N' || TA == 'H' || TA == 'T' || TA == 'Z');
if (TA == 'N' || TA == 'Z')
{
if (M.size(0) != A.size(0) or M.size(1) != A.size(1))
M.reextent({A.size(0), A.size(1)});
if (std::get<0>(M.sizes()) != std::get<0>(A.sizes()) or std::get<1>(M.sizes()) != std::get<1>(A.sizes()))
M.reextent({std::get<0>(A.sizes()), std::get<1>(A.sizes())});
}
else if (TA == 'T' || TA == 'H')
{
if (M.size(0) != A.size(1) or M.size(1) != A.size(0))
M.reextent({A.size(1), A.size(0)});
if (std::get<0>(M.sizes()) != std::get<1>(A.sizes()) or std::get<1>(M.sizes()) != std::get<0>(A.sizes()))
M.reextent({std::get<1>(A.sizes()), std::get<0>(A.sizes())});
}
else
{
@ -439,34 +439,34 @@ void Matrix2MA(char TA, MA const& A, MultiArray2D& M)
TA = 'C';
if (TA == 'Z')
{
for (int i = 0; i < M.size(0); i++)
for (int j = 0; j < M.size(1); j++)
for (int i = 0; i < std::get<0>(M.sizes()); i++)
for (int j = 0; j < std::get<1>(M.sizes()); j++)
M[i][j] = ma::conj(A[i][j]);
}
else if (not std::is_same<ptrA, ptrM>::value)
{
if (TA == 'N')
{
for (int i = 0; i < M.size(0); i++)
for (int j = 0; j < M.size(1); j++)
for (int i = 0; i < std::get<0>(M.sizes()); i++)
for (int j = 0; j < std::get<1>(M.sizes()); j++)
M[i][j] = A[i][j];
}
else if (TA == 'T')
{
for (int i = 0; i < M.size(0); i++)
for (int j = 0; j < M.size(1); j++)
for (int i = 0; i < std::get<0>(M.sizes()); i++)
for (int j = 0; j < std::get<1>(M.sizes()); j++)
M[i][j] = A[j][i];
}
else if (TA == 'C')
{
for (int i = 0; i < M.size(0); i++)
for (int j = 0; j < M.size(1); j++)
for (int i = 0; i < std::get<0>(M.sizes()); i++)
for (int j = 0; j < std::get<1>(M.sizes()); j++)
M[i][j] = ma::conj(A[j][i]);
}
}
else
{
geam(TA, TA, M.size(1), M.size(0), Type2(1.0), pointer_dispatch(A.origin()), A.stride(0), Type2(0.0),
geam(TA, TA, std::get<1>(M.sizes()), std::get<0>(M.sizes()), Type2(1.0), pointer_dispatch(A.origin()), A.stride(0), Type2(0.0),
pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(M.origin()), M.stride(0));
}
}
@ -483,12 +483,12 @@ void Matrix2MAREF(char TA, MA const& A, MultiArray2D& M)
assert(TA == 'N' || TA == 'H' || TA == 'T' || TA == 'Z');
if (TA == 'N' || TA == 'Z')
{
if (M.size(0) != A.size(0) or M.size(1) != A.size(1))
if (std::get<0>(M.sizes()) != std::get<0>(A.sizes()) or std::get<1>(M.sizes()) != std::get<1>(A.sizes()))
throw std::runtime_error(" Error: Wrong dimensions in Matrix2MAREF.\n");
}
else if (TA == 'T' || TA == 'H')
{
if (M.size(0) != A.size(1) or M.size(1) != A.size(0))
if (std::get<0>(M.sizes()) != std::get<1>(A.sizes()) or std::get<1>(M.sizes()) != std::get<0>(A.sizes()))
throw std::runtime_error(" Error: Wrong dimensions in Matrix2MAREF.\n");
}
else
@ -504,34 +504,34 @@ void Matrix2MAREF(char TA, MA const& A, MultiArray2D& M)
if (TA == 'Z')
{
// bad i gpu's
for (int i = 0; i < M.size(0); i++)
for (int j = 0; j < M.size(1); j++)
for (int i = 0; i < std::get<0>(M.sizes()); i++)
for (int j = 0; j < std::get<1>(M.sizes()); j++)
M[i][j] = ma::conj(A[i][j]);
}
else if (not std::is_same<ptrA, ptrM>::value)
{
if (TA == 'N')
{
for (int i = 0; i < M.size(0); i++)
for (int j = 0; j < M.size(1); j++)
for (int i = 0; i < std::get<0>(M.sizes()); i++)
for (int j = 0; j < std::get<1>(M.sizes()); j++)
M[i][j] = A[i][j];
}
else if (TA == 'T')
{
for (int i = 0; i < M.size(0); i++)
for (int j = 0; j < M.size(1); j++)
for (int i = 0; i < std::get<0>(M.sizes()); i++)
for (int j = 0; j < std::get<1>(M.sizes()); j++)
M[i][j] = A[j][i];
}
else if (TA == 'C')
{
for (int i = 0; i < M.size(0); i++)
for (int j = 0; j < M.size(1); j++)
for (int i = 0; i < std::get<0>(M.sizes()); i++)
for (int j = 0; j < std::get<1>(M.sizes()); j++)
M[i][j] = ma::conj(A[j][i]);
}
}
else
{
geam(TA, TA, M.size(1), M.size(0), Type2(1.0), pointer_dispatch(A.origin()), A.stride(0), Type2(0.0),
geam(TA, TA, std::get<1>(M.sizes()), std::get<0>(M.sizes()), Type2(1.0), pointer_dispatch(A.origin()), A.stride(0), Type2(0.0),
pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(M.origin()), M.stride(0));
}
}

View File

@ -32,19 +32,19 @@ int getrf_optimal_workspace_size(MultiArray2D&& A)
assert(A.stride(1) == 1);
int res;
getrf_bufferSize(A.size(1), A.size(0), pointer_dispatch(A.origin()), A.stride(0), res);
getrf_bufferSize(std::get<1>(A.sizes()), std::get<0>(A.sizes()), pointer_dispatch(A.origin()), A.stride(0), res);
return res;
}
template<class MultiArray2D, class Array1D, class Buffer>
MultiArray2D&& getrf(MultiArray2D&& m, Array1D& pivot, Buffer&& WORK)
{
assert(m.stride(0) >= std::max(std::size_t(1), std::size_t(m.size(1))));
assert(m.stride(0) >= std::max(std::size_t(1), std::size_t(std::get<1>(m.sizes()))));
assert(m.stride(1) == 1);
assert(pivot.size() >= std::min(m.size(1), m.size(0) + 1));
assert(pivot.size() >= std::min(std::get<1>(m.sizes()), std::get<0>(m.sizes()) + 1));
int status = -1;
getrf(m.size(1), m.size(0), pointer_dispatch(m.origin()), m.stride(0), pointer_dispatch(pivot.data()), status,
getrf(std::get<1>(m.sizes()), std::get<0>(m.sizes()), pointer_dispatch(m.origin()), m.stride(0), pointer_dispatch(pivot.data()), status,
pointer_dispatch(WORK.data()));
//assert(status==0);
return std::forward<MultiArray2D>(m);
@ -54,9 +54,9 @@ template<class MultiArray2D>
int getri_optimal_workspace_size(MultiArray2D&& A)
{
assert(A.stride(1) == 1);
assert(A.size(0) == A.size(1));
assert(std::get<0>(A.sizes()) ==std::get<1>(A.sizes()));
int lwork = -1;
getri_bufferSize(A.size(0), pointer_dispatch(A.origin()), A.stride(0), lwork);
getri_bufferSize(A.size(), pointer_dispatch(A.origin()), A.stride(), lwork);
return lwork;
}
@ -65,11 +65,11 @@ MultiArray2D&& getri(MultiArray2D&& A, MultiArray1D const& IPIV, Buffer&& WORK)
{
// assert(A.stride(0) > std::max(std::size_t(1), A.size(1)));
assert(A.stride(1) == 1);
assert(IPIV.size() >= size_t(A.size(0)));
assert(WORK.size() >= std::max(std::size_t(1), size_t(A.size(0))));
assert(IPIV.size() >= size_t(A.size()));
assert(WORK.size() >= std::max(std::size_t(1), size_t(A.size())));
int status = -1;
getri(A.size(0), pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(IPIV.data()),
getri(A.size(), pointer_dispatch(A.origin()), A.stride(), pointer_dispatch(IPIV.data()),
pointer_dispatch(WORK.data()), WORK.size(), status);
assert(status == 0);
return std::forward<MultiArray2D>(A);
@ -82,7 +82,7 @@ int geqrf_optimal_workspace_size(MultiArray2D&& A)
assert(A.stride(1) == 1);
int res;
geqrf_bufferSize(A.size(1), A.size(0), pointer_dispatch(A.origin()), A.stride(0), res);
geqrf_bufferSize(std::get<1>(A.sizes()), std::get<0>(A.sizes()), pointer_dispatch(A.origin()), A.stride(0), res);
return res;
}
@ -93,11 +93,11 @@ MultiArray2D&& geqrf(MultiArray2D&& A, Array1D&& TAU, Buffer&& WORK)
//assert(A.stride(0) > std::max(std::size_t(1), A.size(0)));
assert(A.stride(1) == 1);
assert(TAU.stride(0) == 1);
assert(TAU.size() >= std::max(std::size_t(1), size_t(std::min(A.size(0), A.size(1)))));
assert(WORK.size() >= std::max(std::size_t(1), size_t(A.size(0))));
assert(TAU.size() >= std::max(std::size_t(1), size_t(std::min(std::get<0>(A.sizes()), std::get<1>(A.sizes())))));
assert(WORK.size() >= std::max(std::size_t(1), size_t(A.size())));
int status = -1;
geqrf(A.size(1), A.size(0), pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(TAU.origin()),
geqrf(std::get<1>(A.sizes()), std::get<0>(A.sizes()), pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(TAU.origin()),
pointer_dispatch(WORK.data()), WORK.size(), status);
assert(status == 0);
return std::forward<MultiArray2D>(A);
@ -110,7 +110,7 @@ int gelqf_optimal_workspace_size(MultiArray2D&& A)
assert(A.stride(1) == 1);
int res;
gelqf_bufferSize(A.size(1), A.size(0), pointer_dispatch(A.origin()), A.stride(0), res);
gelqf_bufferSize(std::get<1>(A.sizes()), std::get<0>(A.sizes()), pointer_dispatch(A.origin()), A.stride(0), res);
return res;
}
@ -120,11 +120,11 @@ MultiArray2D&& gelqf(MultiArray2D&& A, Array1D&& TAU, Buffer&& WORK)
assert(A.stride(1) > 0);
assert(A.stride(1) == 1);
assert(TAU.stride(0) == 1);
assert(TAU.size() >= std::max(std::size_t(1), size_t(std::min(A.size(0), A.size(1)))));
assert(WORK.size() >= std::max(std::size_t(1), size_t(A.size(1))));
assert(TAU.size() >= std::max(std::size_t(1), size_t(std::min(std::get<0>(A.sizes()), std::get<1>(A.sizes())))));
assert(WORK.size() >= std::max(std::size_t(1), size_t(std::get<1>(A.sizes()))));
int status = -1;
gelqf(A.size(1), A.size(0), pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(TAU.data()),
gelqf(std::get<1>(A.sizes()), std::get<0>(A.sizes()), pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(TAU.data()),
pointer_dispatch(WORK.data()), WORK.size(), status);
assert(status == 0);
return std::forward<MultiArray2D>(A);
@ -138,7 +138,7 @@ int gqr_optimal_workspace_size(MultiArray2D&& A)
assert(A.stride(1) == 1);
int res;
gqr_bufferSize(A.size(1), A.size(0), std::max(std::size_t(1), size_t(std::min(A.size(0), A.size(1)))),
gqr_bufferSize(std::get<1>(A.sizes()), std::get<0>(A.sizes()), std::max(std::size_t(1), size_t(std::min(std::get<0>(A.sizes()), std::get<1>(A.sizes())))),
pointer_dispatch(A.origin()), A.stride(0), res);
return res;
}
@ -148,11 +148,11 @@ MultiArray2D&& gqr(MultiArray2D&& A, Array1D&& TAU, Buffer&& WORK)
{
assert(A.stride(1) == 1);
assert(TAU.stride(0) == 1);
assert(TAU.size() >= std::max(std::size_t(1), size_t(std::min(A.size(0), A.size(1)))));
assert(WORK.size() >= std::max(std::size_t(1), size_t(A.size(0))));
assert(TAU.size() >= std::max(std::size_t(1), size_t(std::min(std::get<0>(A.sizes()), std::get<1>(A.sizes())))));
assert(WORK.size() >= std::max(std::size_t(1), size_t(A.size())));
int status = -1;
gqr(A.size(1), A.size(0), std::max(std::size_t(1), size_t(std::min(A.size(0), A.size(1)))),
gqr(std::get<1>(A.sizes()), std::get<0>(A.sizes()), std::max(std::size_t(1), size_t(std::min(std::get<0>(A.sizes()), std::get<1>(A.sizes())))),
pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(TAU.origin()), pointer_dispatch(WORK.data()),
WORK.size(), status);
assert(status == 0);
@ -166,7 +166,7 @@ int glq_optimal_workspace_size(MultiArray2D&& A)
assert(A.stride(1) == 1);
int res;
glq_bufferSize(A.size(1), A.size(0), std::max(std::size_t(1), size_t(std::min(A.size(0), A.size(1)))),
glq_bufferSize(std::get<1>(A.sizes()), std::get<0>(A.sizes()), std::max(std::size_t(1), size_t(std::min(std::get<0>(A.sizes()), std::get<1>(A.sizes())))),
pointer_dispatch(A.origin()), A.stride(0), res);
return res;
}
@ -176,11 +176,11 @@ MultiArray2D&& glq(MultiArray2D&& A, Array1D&& TAU, Buffer&& WORK)
{
assert(A.stride(1) == 1);
assert(TAU.stride(0) == 1);
assert(TAU.size() >= std::max(std::size_t(1), size_t(std::min(A.size(0), A.size(1)))));
assert(WORK.size() >= std::max(std::size_t(1), size_t(A.size(1))));
assert(TAU.size() >= std::max(std::size_t(1), size_t(std::min(std::get<0>(A.sizes()), std::get<1>(A.sizes())))));
assert(WORK.size() >= std::max(std::size_t(1), size_t(std::get<1>(A.sizes()))));
int status = -1;
glq(A.size(1), A.size(0), std::max(std::size_t(1), size_t(std::min(A.size(0), A.size(1)))),
glq(std::get<1>(A.sizes()), std::get<0>(A.sizes()), std::max(std::size_t(1), size_t(std::min(std::get<0>(A.sizes()), std::get<1>(A.sizes())))),
pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(TAU.data()), pointer_dispatch(WORK.data()),
WORK.size(), status);
assert(status == 0);
@ -190,9 +190,9 @@ MultiArray2D&& glq(MultiArray2D&& A, Array1D&& TAU, Buffer&& WORK)
template<class MultiArray2D, typename = typename std::enable_if_t<MultiArray2D::dimensionality == 2>>
MultiArray2D&& potrf(MultiArray2D&& A)
{
assert(A.size(0) == A.size(1));
assert(std::get<0>(A.sizes()) == std::get<1>(A.sizes()));
int INFO;
potrf('U', A.size(0), pointer_dispatch(A.origin()), A.stride(0), INFO);
potrf('U', A.size(), pointer_dispatch(A.origin()), A.stride(0), INFO);
if (INFO != 0)
throw std::runtime_error(" error in ma::potrf: Error code != 0");
}
@ -204,7 +204,7 @@ int gesvd_optimal_workspace_size(MultiArray2D&& A)
assert(A.stride(1) == 1);
int res;
gesvd_bufferSize(A.size(1), A.size(0), pointer_dispatch(A.origin()), res);
gesvd_bufferSize(std::get<1>(A.sizes()), std::get<0>(A.sizes()), pointer_dispatch(A.origin()), res);
return res;
}
@ -225,7 +225,7 @@ MultiArray2D&& gesvd(char jobU,
// in F: At = (U * S * VT)t = VTt * S * Ut
// so I need to switch U <--> VT when calling fortran interface
int status = -1;
gesvd(jobVT, jobU, A.size(1), A.size(0), pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(S.origin()),
gesvd(jobVT, jobU, std::get<1>(A.sizes()), std::get<0>(A.sizes()), pointer_dispatch(A.origin()), A.stride(0), pointer_dispatch(S.origin()),
pointer_dispatch(VT.origin()), VT.stride(0), // !!!
pointer_dispatch(U.origin()), U.stride(0), // !!!
pointer_dispatch(WORK.data()), WORK.size(), pointer_dispatch(RWORK.origin()), status);
@ -243,7 +243,7 @@ std::pair<MultiArray1D, MultiArray2D> symEig(MultiArray2D const& A)
using Type = typename MultiArray2D::element;
using RealType = typename qmcplusplus::afqmc::remove_complex<Type>::value_type;
using extensions = typename boost::multi::layout_t<1u>::extensions_type;
assert(A.size() == A.size(1));
assert(A.size() == std::get<1>(A.sizes()));
assert(A.stride(1) == 1);
assert(A.size() > 0);
int N = A.size();
@ -316,10 +316,10 @@ std::pair<MultiArray1D, MultiArray2D> symEigSelect(MultiArray2DA& A, int neig)
static_assert(std::is_same<Type, TypeA>::value, "Wrong types.");
using RealType = typename qmcplusplus::afqmc::remove_complex<Type>::value_type;
using extensions = typename boost::multi::layout_t<1u>::extensions_type;
assert(A.size(0) == A.size(1));
assert(std::get<0>(A.sizes()) == std::get<1>(A.sizes()));
assert(A.stride(1) == 1);
assert(A.size(0) > 0);
int N = A.size(0);
assert(std::get<0>(A.sizes()) > 0);
int N = std::get<0>(A.sizes());
int LDA = A.stride(0);
MultiArray1D eigVal(extensions{neig});
@ -394,14 +394,14 @@ std::pair<MultiArray1D, MultiArray2D> genEigSelect(MultiArray2DA& A, MultiArray2
static_assert(std::is_same<TypeA, TypeB>::value, "Wrong types.");
using RealType = typename qmcplusplus::afqmc::remove_complex<Type>::value_type;
using extensions = typename boost::multi::layout_t<1u>::extensions_type;
assert(A.size(0) == A.size(1));
assert(A.size(0) == S.size(0));
assert(S.size(0) == S.size(1));
assert(std::get<0>(A.sizes()) == std::get<1>(A.sizes()));
assert(std::get<0>(A.sizes()) == std::get<0>(S.sizes()));
assert(std::get<0>(S.sizes()) == std::get<1>(S.sizes()));
assert(A.stride(1) == 1);
assert(A.size(0) > 0);
assert(std::get<0>(A.sizes()) > 0);
assert(S.stride(1) == 1);
assert(S.size(0) > 0);
int N = A.size(0);
assert(std::get<0>(S.sizes()) > 0);
int N = std::get<0>(A.sizes());
int LDA = A.stride(0);
int LDS = S.stride(0);

View File

@ -43,10 +43,10 @@ template<class MultiArray2D, typename = typename std::enable_if<(MultiArray2D::d
bool is_hermitian(MultiArray2D const& A)
{
using ma::conj;
if (A.size() != A.size(1))
if (A.size() != std::get<1>(A.sizes()))
return false;
for (int i = 0; i != A.size(); ++i)
for (int j = i + 1; j != A.size(1); ++j)
for (int i = 0; i != std::get<0>(A.sizes()); ++i)
for (int j = i + 1; j != std::get<1>(A.sizes()); ++j)
if (std::abs(A[i][j] - ma::conj(A[j][i])) > 1e-12)
return false;
return true;
@ -260,16 +260,16 @@ MultiArray2DC&& product(T alpha, SparseMatrixA const& A, MultiArray2DB const& B,
{
assert(arg(A).size() == std::forward<MultiArray2DC>(C).size());
assert( arg(A).size(1) == arg(B).size() );
assert( arg(B).size(1) == std::get<1>(std::forward<MultiArray2DC>(C).sizes()) );
assert( std::get<1>(arg(B).sizes()) == std::get<1>(std::forward<MultiArray2DC>(C).sizes()) );
}
else
{
assert(arg(A).size() == arg(B).size());
assert(arg(A).size(1) == std::forward<MultiArray2DC>(C).size());
assert(arg(B).size(1) == std::forward<MultiArray2DC>(C).size(1));
assert(std::get<1>(arg(A).sizes()) == std::forward<MultiArray2DC>(C).size());
assert(std::get<1>(arg(B).sizes()) == std::get<1>(std::forward<MultiArray2DC>(C).sizes()));
}
csrmm(op_tag<SparseMatrixA>::value, arg(A).size(), arg(B).size(1), arg(A).size(1), elementA(alpha), "GxxCxx",
csrmm(op_tag<SparseMatrixA>::value, arg(A).size(), std::get<1>(arg(B).sizes()), std::get<1>(arg(A).sizes()), elementA(alpha), "GxxCxx",
pointer_dispatch(arg(A).non_zero_values_data()), pointer_dispatch(arg(A).non_zero_indices2_data()),
pointer_dispatch(arg(A).pointers_begin()), pointer_dispatch(arg(A).pointers_end()),
pointer_dispatch(arg(B).origin()), arg(B).stride(), elementA(beta), pointer_dispatch(C.origin()), C.stride());
@ -428,7 +428,7 @@ void BatchedProduct(char TA,
for (int i = 0; i < nbatch; i++)
{
csrmm(TA, (*A[i]).size(), (*B[i]).size(1), (*A[i]).size(1), elementA(alpha), "GxxCxx",
csrmm(TA, (*A[i]).size(), std::get<1>((*B[i]).sizes()), std::get<1>((*A[i]).sizes()), elementA(alpha), "GxxCxx",
pointer_dispatch((*A[i]).non_zero_values_data()), pointer_dispatch((*A[i]).non_zero_indices2_data()),
pointer_dispatch((*A[i]).pointers_begin()), pointer_dispatch((*A[i]).pointers_end()),
pointer_dispatch((*B[i]).origin()), (*B[i]).stride(), elementA(beta), pointer_dispatch((*C[i]).origin()),

View File

@ -277,7 +277,7 @@ TEST_CASE("Awiu_Biu_Cuw", "[Numerics][batched_operations]")
ComplexType alpha = 0.5;
// C = alpha * numpy.einsum('wnu,nu->uw', A, B)
using ma::Awiu_Biu_Cuw;
Awiu_Biu_Cuw(nu, nw, nn, alpha, A.origin(), B.origin(), B.size(1), C.origin(), C.size(1));
Awiu_Biu_Cuw(nu, nw, nn, alpha, A.origin(), B.origin(), std::get<1>(B.sizes()), C.origin(), std::get<1>(C.sizes()));
Tensor2D<ComplexType> ref({nu, nw}, 4.0, alloc);
ref[1][0] = 3.0;
ref[1][1] = 3.0;
@ -296,7 +296,7 @@ TEST_CASE("Aijk_Bkj_Cik", "[Numerics][batched_operations]")
Tensor2D<ComplexType> C({ni, nk}, 0.0, alloc);
// C = alpha * numpy.einsum('wnu,nu->uw', A, B)
using ma::Aijk_Bkj_Cik;
Aijk_Bkj_Cik(ni, nj, nk, A.origin(), A.size(1), A.stride(0), B.origin(), B.stride(0), C.origin(), C.stride(0));
Aijk_Bkj_Cik(ni, nj, nk, A.origin(), std::get<1>(A.sizes()), A.stride(0), B.origin(), B.stride(0), C.origin(), C.stride(0));
Tensor2D<ComplexType> ref({ni, nk}, 4.0, alloc);
ref[0][0] = 2.0;
ref[1][0] = 2.0;
@ -331,8 +331,8 @@ TEST_CASE("element_wise_Aij_Bjk_Ckij", "[Numerics][batched_operations]")
Tensor2D<ComplexType> A({ni, nj}, 3.0, alloc);
Tensor2D<ComplexType> B({nj, nk}, 2.0, alloc);
Tensor3D<ComplexType> C({nk, ni, nj}, 0.0, alloc);
element_wise_Aij_Bjk_Ckij('N', ni, nj, nk, A.origin(), A.stride(0), B.origin(), B.stride(0), C.origin(), C.size(1),
C.size(2));
element_wise_Aij_Bjk_Ckij('N', ni, nj, nk, A.origin(), A.stride(0), B.origin(), B.stride(0), C.origin(), std::get<1>(C.sizes()),
std::get<2>(C.sizes()));
Tensor3D<ComplexType> ref({nk, ni, nj}, 6.0, alloc);
verify_approx(C, ref);
}
@ -340,8 +340,8 @@ TEST_CASE("element_wise_Aij_Bjk_Ckij", "[Numerics][batched_operations]")
Tensor2D<ComplexType> A({ni, nj}, ComplexType(0.0, -3.0), alloc);
Tensor2D<ComplexType> B({nj, nk}, ComplexType(1.0, 2.0), alloc);
Tensor3D<ComplexType> C({nk, ni, nj}, 0.0, alloc);
element_wise_Aij_Bjk_Ckij('C', ni, nj, nk, A.origin(), A.stride(0), B.origin(), B.stride(0), C.origin(), C.size(1),
C.size(2));
element_wise_Aij_Bjk_Ckij('C', ni, nj, nk, A.origin(), A.stride(0), B.origin(), B.stride(0), C.origin(), std::get<1>(C.sizes()),
std::get<2>(C.sizes()));
Tensor3D<ComplexType> ref({nk, ni, nj}, ComplexType(-6.0, 3.0), alloc);
verify_approx(C, ref);
}
@ -359,7 +359,7 @@ void test_Aij_Bjk_Ckji()
Tensor2D<T1> A({ni, nj}, -3.0, alloc_a);
Tensor2D<T2> B({nj, nk}, T2(1.0, 2.0), alloc_b);
Tensor3D<T2> C({nk, nj, ni}, 0.0, alloc_b);
element_wise_Aij_Bjk_Ckji(ni, nj, nk, A.origin(), A.stride(0), B.origin(), B.stride(0), C.origin(), C.size(2),
element_wise_Aij_Bjk_Ckji(ni, nj, nk, A.origin(), A.stride(0), B.origin(), B.stride(0), C.origin(), std::get<2>(C.sizes()),
C.stride(0));
Tensor3D<T2> ref({nk, nj, ni}, T2(-3.0, -6.0), alloc_b);
verify_approx(C, ref);
@ -383,7 +383,7 @@ TEST_CASE("inplace_product", "[Numerics][batched_operations]")
Tensor3D<ComplexType> A({nb, ni, nj}, ComplexType(1.0, -3.0), alloc);
Tensor2D<double> B({ni, nj}, 2.0, dalloc);
using ma::inplace_product;
inplace_product(nb, ni, nj, B.origin(), B.size(1), A.origin(), A.size(2));
inplace_product(nb, ni, nj, B.origin(), std::get<1>(B.sizes()), A.origin(), std::get<2>(A.sizes()));
Tensor3D<ComplexType> ref({nb, ni, nj}, ComplexType(2.0, -6.0), alloc);
verify_approx(A, ref);
}

View File

@ -321,11 +321,11 @@ void test_dense_matrix_mult()
array<std::complex<double>, 2> A({3, 3});
array<std::complex<double>, 2> B({3, 3});
for (int i = 0, k = 0; i < A.size(0); i++)
for (int j = 0; j < A.size(1); j++, k++)
for (int i = 0, k = 0; i < std::get<0>(A.sizes()); i++)
for (int j = 0; j < std::get<1>(A.sizes()); j++, k++)
A[i][j] = m_a[k];
for (int i = 0, k = 0; i < A.size(0); i++)
for (int j = 0; j < A.size(1); j++, k++)
for (int i = 0, k = 0; i < std::get<0>(A.sizes()); i++)
for (int j = 0; j < std::get<1>(A.sizes()); j++, k++)
B[i][j] = m_b[k];
array<std::complex<double>, 2> C = ma::exp(A);

View File

@ -79,7 +79,7 @@ TEST_CASE("determinant_from_getrf", "[Numerics][determinant]")
double log_factor = 0.0;
double detx = 0.06317052169675352;
using ma::determinant_from_getrf;
double ovlp = determinant_from_getrf(x.size(0), lu.origin(), lu.size(1), pivot.origin(), log_factor);
double ovlp = determinant_from_getrf(std::get<0>(x.sizes()), lu.origin(), std::get<1>(lu.sizes()), pivot.origin(), log_factor);
REQUIRE(ovlp == Approx(detx));
}
@ -104,8 +104,8 @@ TEST_CASE("strided_determinant_from_getrf", "[Numerics][determinant]")
double log_factor = 0.0;
double detx = 0.06317052169675352;
using ma::strided_determinant_from_getrf;
strided_determinant_from_getrf(x.size(0), lus.origin(), lu.size(1), lu.num_elements(), pivot.origin(), pivot.size(1),
log_factor, to_address(ovlps.origin()), lus.size(0));
strided_determinant_from_getrf(std::get<0>(x.sizes()), lus.origin(), std::get<1>(lu.sizes()), lu.num_elements(), pivot.origin(), std::get<1>(pivot.sizes()),
log_factor, to_address(ovlps.origin()), std::get<0>(lus.sizes()));
REQUIRE(ovlps[0] == Approx(detx));
REQUIRE(ovlps[1] == Approx(detx));
REQUIRE(ovlps[2] == Approx(detx));
@ -132,7 +132,7 @@ TEST_CASE("batched_determinant_from_getrf", "[Numerics][determinant]")
double log_factor = 0.0;
double detx = 0.06317052169675352;
using ma::batched_determinant_from_getrf;
batched_determinant_from_getrf(x.size(0), lu_array.data(), lu.size(1), pivot.origin(), pivot.size(1), log_factor,
batched_determinant_from_getrf(std::get<0>(x.sizes()), lu_array.data(), std::get<1>(lu.sizes()), pivot.origin(), std::get<1>(pivot.sizes()), log_factor,
to_address(ovlps.origin()), lu_array.size());
REQUIRE(ovlps[0] == Approx(detx));
REQUIRE(ovlps[1] == Approx(detx));
@ -160,7 +160,7 @@ TEST_CASE("batched_determinant_from_getrf_complex", "[Numerics][determinant]")
std::complex<double> log_factor = 0.0;
std::complex<double> detx = 0.06317052169675352;
using ma::batched_determinant_from_getrf;
batched_determinant_from_getrf(x.size(0), lu_array.data(), lu.size(1), pivot.origin(), pivot.size(1), log_factor,
batched_determinant_from_getrf(std::get<0>(x.sizes()), lu_array.data(), std::get<1>(lu.sizes()), pivot.origin(), std::get<1>(pivot.sizes()), log_factor,
to_address(ovlps.origin()), lu_array.size());
REQUIRE(ovlps[0] == ComplexApprox(detx));
REQUIRE(ovlps[1] == ComplexApprox(detx));

View File

@ -76,13 +76,13 @@ TEST_CASE("axpyBatched", "[Numerics][misc_kernels]")
Tensor2D<std::complex<double>> x({3, 4}, 1.0, alloc);
Tensor1D<std::complex<double>> a(iextensions<1u>{3}, 2.0, alloc);
std::vector<pointer<std::complex<double>>> x_batched, y_batched;
for (int i = 0; i < x.size(0); i++)
for (int i = 0; i < std::get<0>(x.sizes()); i++)
{
x_batched.emplace_back(x[i].origin());
y_batched.emplace_back(y[i].origin());
}
using ma::axpyBatched;
axpyBatched(x.size(1), to_address(a.origin()), x_batched.data(), 1, y_batched.data(), 1, x_batched.size());
axpyBatched(std::get<1>(x.sizes()), to_address(a.origin()), x_batched.data(), 1, y_batched.data(), 1, x_batched.size());
// 1 + 2 = 3.
Tensor2D<std::complex<double>> ref({3, 4}, 3.0, alloc);
verify_approx(y, ref);

View File

@ -56,13 +56,13 @@ void AFQMCBasePropagator::step(int nsteps_, WlkSet& wset, RealType Eshift, RealT
if (transposed_G_)
G_ext = iextensions<2u>{nwalk, Gsize};
if (MFfactor.size(0) != nsteps || MFfactor.size(1) != nwalk)
if (std::get<0>(MFfactor.sizes()) != nsteps || std::get<1>(MFfactor.sizes()) != nwalk)
MFfactor = CMatrix({long(nsteps), long(nwalk)});
if (hybrid_weight.size(0) != nsteps || hybrid_weight.size(1) != nwalk)
if (std::get<0>(hybrid_weight.sizes()) != nsteps || std::get<1>(hybrid_weight.sizes()) != nwalk)
hybrid_weight = CMatrix({long(nsteps), long(nwalk)});
if (new_overlaps.size(0) != nwalk)
if (std::get<0>(new_overlaps.sizes()) != nwalk)
new_overlaps = CVector(iextensions<1u>{nwalk});
if (new_energies.size(0) != nwalk || new_energies.size(1) != 3)
if (std::get<0>(new_energies.sizes()) != nwalk || std::get<1>(new_energies.sizes()) != 3)
new_energies = CMatrix({long(nwalk), 3});
@ -280,15 +280,15 @@ void AFQMCBasePropagator::BackPropagate(int nbpsteps, int nStabalize, WlkSet& ws
C3Tensor_ref vHS3D(make_device_ptr(vHS.origin()), vhs3d_ext);
auto&& Fields(*wset.getFields());
assert(Fields.size(0) >= nbpsteps);
assert(Fields.size(1) == globalnCV);
assert(Fields.size(2) == nwalk);
assert(std::get<0>(Fields.sizes()) >= nbpsteps);
assert(std::get<1>(Fields.sizes()) == globalnCV);
assert(std::get<2>(Fields.sizes()) == nwalk);
int nrow(NMO * npol);
int ncol(NAEA + ((walker_type == CLOSED) ? 0 : NAEB));
assert(Refs.size(0) == nwalk);
int nrefs = Refs.size(1);
assert(Refs.size(2) == nrow * ncol);
assert(Refs.size() == nwalk);
int nrefs = std::get<1>(Refs.sizes());
assert(std::get<2>(Refs.sizes()) == nrow * ncol);
int cv0, cvN;
std::tie(cv0, cvN) = FairDivideBoundary(TG.getLocalTGRank(), globalnCV, TG.getNCoresPerTG());
@ -299,8 +299,8 @@ void AFQMCBasePropagator::BackPropagate(int nbpsteps, int nStabalize, WlkSet& ws
if (walker_type == COLLINEAR)
nx = 2;
assert(detR.size(0) == nwalk);
assert(detR.size(1) == nrefs * nx);
assert(std::get<0>(detR.sizes()) == nwalk);
assert(std::get<1>(detR.sizes()) == nrefs * nx);
std::fill_n(detR.origin(), detR.num_elements(), ComplexType(1.0, 0.0));
// from now on, individual work on each walker/step
@ -467,7 +467,7 @@ void AFQMCBasePropagator::apply_propagators(char TA,
}
else
{
if (local_vHS.size(0) != NMO || local_vHS.size(1) != NMO)
if (std::get<0>(local_vHS.sizes()) != NMO || std::get<1>(local_vHS.sizes()) != NMO)
local_vHS = CMatrix({NMO, NMO});
// vHS3D[M][M][nstep*nwalk]: need temporary buffer in this case
if (walker_type == COLLINEAR)
@ -561,11 +561,11 @@ void AFQMCBasePropagator::apply_propagators_batched(char TA, WSet& wset, int ni,
}
else
{
if (local_vHS.size(0) != nbatch || local_vHS.size(1) != NMO * NMO)
if (std::get<0>(local_vHS.sizes()) != nbatch || std::get<1>(local_vHS.sizes()) != NMO * NMO)
local_vHS = CMatrix({nbatch, NMO * NMO});
// vHS3D[M][M][nstep*nwalk]: need temporary buffer in this case
int N2 = vHS3D.size(0) * vHS3D.size(1);
CMatrix_ref vHS2D(vHS3D.origin(), {N2, vHS3D.size(2)});
int N2 = std::get<0>(vHS3D.sizes()) * std::get<1>(vHS3D.sizes());
CMatrix_ref vHS2D(vHS3D.origin(), {N2, std::get<2>(vHS3D.sizes())});
C3Tensor_ref local3D(local_vHS.origin(), {nbatch, NMO, NMO});
int nt = ni * nwalk;
for (int iw = 0; iw < nwalk; iw += nbatch, nt += nbatch)
@ -711,7 +711,7 @@ void AFQMCBasePropagator::assemble_X(size_t nsteps,
TG.local_barrier();
ComplexType im(0.0, 1.0);
int nCV = int(X.size(0));
int nCV = int(X.size());
// generate random numbers
if (addRAND)
{

View File

@ -60,14 +60,14 @@ void AFQMCDistributedPropagator::step(int nsteps_, WlkSet& wset, RealType Eshift
if (transposed_G_)
G_ext = iextensions<2u>{nwalk, Gsize};
if (MFfactor.size(1) != nsteps || MFfactor.size(2) != nwalk)
if (std::get<1>(MFfactor.sizes()) != nsteps || std::get<2>(MFfactor.sizes()) != nwalk)
MFfactor = C3Tensor({2, nsteps, nwalk});
if (hybrid_weight.size(1) != nsteps || hybrid_weight.size(2) != nwalk)
if (std::get<1>(hybrid_weight.sizes()) != nsteps || std::get<2>(hybrid_weight.sizes()) != nwalk)
hybrid_weight = C3Tensor({2, nsteps, nwalk});
if (new_overlaps.size(0) != nwalk)
if (std::get<0>(new_overlaps.sizes()) != nwalk)
new_overlaps = CVector(iextensions<1u>{nwalk});
if (new_energies.size(0) != nwalk || new_energies.size(1) != 3)
if (std::get<0>(new_energies.sizes()) != nwalk || std::get<1>(new_energies.sizes()) != 3)
new_energies = CMatrix({nwalk, 3});
// Summary of temporary memory usage:

View File

@ -66,13 +66,13 @@ void AFQMCDistributedPropagatorDistCV::step(int nsteps_, WlkSet& wset, RealType
if (transposed_G_)
G_ext = iextensions<2u>{nwalk, Gsize};
if (MFfactor.size(0) != nsteps || MFfactor.size(1) != nwalk)
if (std::get<0>(MFfactor.sizes()) != nsteps || std::get<1>(MFfactor.sizes()) != nwalk)
MFfactor = CMatrix({long(nsteps), long(nwalk)});
if (hybrid_weight.size(0) != nsteps || hybrid_weight.size(1) != nwalk)
if (std::get<0>(hybrid_weight.sizes()) != nsteps || std::get<1>(hybrid_weight.sizes()) != nwalk)
hybrid_weight = CMatrix({long(nsteps), long(nwalk)});
if (new_overlaps.size(0) != nwalk)
if (std::get<0>(new_overlaps.sizes()) != nwalk)
new_overlaps = CVector(iextensions<1u>{nwalk});
if (new_energies.size(0) != nwalk || new_energies.size(1) != 3)
if (std::get<0>(new_energies.sizes()) != nwalk || std::get<1>(new_energies.sizes()) != 3)
new_energies = CMatrix({long(nwalk), 3});
// Temporary memory usage summary:
@ -435,13 +435,13 @@ void AFQMCDistributedPropagatorDistCV::step_collective(int nsteps_, WlkSet& wset
if (transposed_G_)
G_ext = iextensions<2u>{nwalk, Gsize};
if (MFfactor.size(0) != nsteps || MFfactor.size(1) != nwalk)
if (std::get<0>(MFfactor.sizes()) != nsteps || std::get<1>(MFfactor.sizes()) != nwalk)
MFfactor = CMatrix({long(nsteps), long(nwalk)});
if (hybrid_weight.size(0) != nsteps || hybrid_weight.size(1) != nwalk)
if (std::get<0>(hybrid_weight.sizes()) != nsteps || std::get<1>(hybrid_weight.sizes()) != nwalk)
hybrid_weight = CMatrix({long(nsteps), long(nwalk)});
if (new_overlaps.size(0) != nwalk)
if (std::get<0>(new_overlaps.sizes()) != nwalk)
new_overlaps = CVector(iextensions<1u>{nwalk});
if (new_energies.size(0) != nwalk || new_energies.size(1) != 3)
if (std::get<0>(new_energies.sizes()) != nwalk || std::get<1>(new_energies.sizes()) != 3)
new_energies = CMatrix({long(nwalk), 3});
// Temporary memory usage summary:
@ -831,15 +831,15 @@ void AFQMCDistributedPropagatorDistCV::BackPropagate(int nbpsteps,
TG.local_barrier();
auto&& Fields(*wset.getFields());
assert(Fields.size(0) >= nbpsteps);
assert(Fields.size(1) == globalnCV);
assert(Fields.size(2) == nwalk);
assert(std::get<0>(Fields.sizes()) >= nbpsteps);
assert(std::get<1>(Fields.sizes()) == globalnCV);
assert(std::get<2>(Fields.sizes()) == nwalk);
int nrow(NMO * ((walker_type == NONCOLLINEAR) ? 2 : 1));
int ncol(NAEA + ((walker_type == CLOSED) ? 0 : NAEB));
assert(Refs.size(0) == nwalk);
int nrefs = Refs.size(1);
assert(Refs.size(2) == nrow * ncol);
assert(std::get<0>(Refs.sizes()) == nwalk);
int nrefs = std::get<1>(Refs.sizes());
assert(std::get<2>(Refs.sizes()) == nrow * ncol);
int cv0, cvN;
std::tie(cv0, cvN) = FairDivideBoundary(TG.getLocalTGRank(), localnCV, TG.getNCoresPerTG());
@ -852,8 +852,8 @@ void AFQMCDistributedPropagatorDistCV::BackPropagate(int nbpsteps,
if (walker_type == COLLINEAR)
nx = 2;
assert(detR.size(0) == nwalk);
assert(detR.size(1) == nrefs * nx);
assert(std::get<0>(detR.sizes()) == nwalk);
assert(std::get<1>(detR.sizes()) == nrefs * nx);
std::fill_n(detR.origin(), detR.num_elements(), SPComplexType(1.0, 0.0));
// from now on, individual work on each walker/step

View File

@ -39,7 +39,7 @@ void free_projection_walker_update(Wlk&& w,
int nwalk = w.size();
// constexpr if can be used to avoid the memory copy, by comparing the pointer types
// between WMat and Mat/OMat
if (work.size(0) < 7 || work.size(1) < nwalk)
if (std::get<0>(work.sizes()) < 7 || std::get<1>(work.sizes()) < nwalk)
work.reextent({7, nwalk});
w.getProperty(WEIGHT, work[0]);
@ -85,7 +85,7 @@ void hybrid_walker_update(Wlk&& w,
int nwalk = w.size();
// constexpr if can be used to avoid the memory copy, by comparing the pointer types
// between WMat and Mat/OMat
if (work.size(0) < 7 || work.size(1) < nwalk)
if (std::get<0>(work.sizes()) < 7 || std::get<1>(work.sizes()) < nwalk)
work.reextent({7, nwalk});
bool BackProp = (w.getBPPos() >= 0 && w.getBPPos() < w.NumBackProp());
@ -179,7 +179,7 @@ void local_energy_walker_update(Wlk&& w,
int nwalk = w.size();
// constexpr if can be used to avoid the memory copy, by comparing the pointer types
// between WMat and Mat/OMat
if (work.size(0) < 12 || work.size(1) < nwalk)
if (std::get<0>(work.sizes()) < 12 || std::get<1>(work.sizes()) < nwalk)
work.reextent({12, nwalk});
bool BackProp = (w.getBPPos() >= 0 && w.getBPPos() < w.NumBackProp());

View File

@ -46,9 +46,9 @@ P_Type generate1BodyPropagator(TaskGroup_& TG,
bool printP1eV = false)
{
assert(H1.dimensionality == 2);
assert(H1.size(0) == H1.size(1));
assert(std::get<0>(H1.sizes()) == std::get<1>(H1.sizes()));
assert(H1.stride(1) == 1);
int NMO = H1.size(0);
int NMO = H1.size();
if (TG.TG_local().root())
{
boost::multi::array<ComplexType, 2> v({NMO, NMO});
@ -84,13 +84,13 @@ P_Type generate1BodyPropagator(TaskGroup_& TG,
bool printP1eV = false)
{
assert(H1.dimensionality == 2);
assert(H1.size(0) == H1.size(1));
assert(std::get<0>(H1.sizes()) == std::get<1>(H1.sizes()));
assert(H1.stride(1) == 1);
assert(H1ext.dimensionality == 2);
assert(H1ext.size(0) == H1ext.size(1));
assert(std::get<0>(H1ext.sizes()) == std::get<1>(H1ext.sizes()));
assert(H1ext.stride(1) == 1);
assert(H1.size(0) == H1ext.size(1));
int NMO = H1.size(0);
assert(std::get<0>(H1.sizes()) == std::get<1>(H1ext.sizes()));
int NMO = H1.size();
if (TG.TG_local().root())
{
// boost::multi::array<ComplexType,2> v({NMO,NMO});

View File

@ -143,9 +143,9 @@ void propg_fac_shared(boost::mpi3::communicator& world)
WalkerSet wset(TG, doc3.getRoot(), InfoMap["info0"], &rng);
auto initial_guess = WfnFac.getInitialGuess(wfn_name);
REQUIRE(initial_guess.size(0) == 2);
REQUIRE(initial_guess.size(1) == NPOL * NMO);
REQUIRE(initial_guess.size(2) == NAEA);
REQUIRE(std::get<0>(initial_guess.sizes()) == 2);
REQUIRE(std::get<1>(initial_guess.sizes()) == NPOL * NMO);
REQUIRE(std::get<2>(initial_guess.sizes()) == NAEA);
wset.resize(nwalk, initial_guess[0], initial_guess[0]);
// initial_guess[1](XXX.extension(0),{0,NAEB}));
@ -317,9 +317,9 @@ void propg_fac_distributed(boost::mpi3::communicator& world, int ngrp)
WalkerSet wset(TG, doc3.getRoot(), InfoMap["info0"], &rng);
auto initial_guess = WfnFac.getInitialGuess(wfn_name);
REQUIRE(initial_guess.size(0) == 2);
REQUIRE(initial_guess.size(1) == NPOL * NMO);
REQUIRE(initial_guess.size(2) == NAEA);
REQUIRE(std::get<0>(initial_guess.sizes()) == 2);
REQUIRE(std::get<1>(initial_guess.sizes()) == NPOL * NMO);
REQUIRE(std::get<2>(initial_guess.sizes()) == NAEA);
wset.resize(nwalk, initial_guess[0], initial_guess[0]);
const char* propg_xml_block0 = "<Propagator name=\"prop0\"> \

View File

@ -82,8 +82,8 @@ public:
template<class MatA, class MatB, class MatC>
T MixedDensityMatrix(const MatA& hermA, const MatB& B, MatC&& C, T LogOverlapFactor, bool compact, bool herm = true)
{
int NMO = (herm ? hermA.size(1) : hermA.size(0));
int NAEA = (herm ? hermA.size(0) : hermA.size(1));
int NMO = (herm ? std::get<1>(hermA.sizes()) : std::get<0>(hermA.sizes()));
int NAEA = (herm ? std::get<0>(hermA.sizes()) : std::get<1>(hermA.sizes()));
TMatrix TNN({NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TNM({NAEA, NMO}, buffer_manager.get_generator().template get_allocator<T>());
TVector WORK(iextensions<1u>{work_size}, buffer_manager.get_generator().template get_allocator<T>());
@ -95,8 +95,8 @@ public:
template<class MatA, class MatC>
T MixedDensityMatrix(const MatA& A, MatC&& C, T LogOverlapFactor, bool compact = false)
{
int NMO = A.size(0);
int NAEA = A.size(1);
int NMO = std::get<0>(A.sizes());
int NAEA = std::get<1>(A.sizes());
TMatrix TNN({NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TNM({NAEA, NMO}, buffer_manager.get_generator().template get_allocator<T>());
TVector WORK(iextensions<1u>{work_size}, buffer_manager.get_generator().template get_allocator<T>());
@ -113,8 +113,8 @@ public:
bool compact = false,
bool useSVD = false)
{
int NMO = A.size(0);
int NAEA = A.size(1);
int NMO = std::get<0>(A.sizes());
int NAEA = std::get<1>(A.sizes());
if (useSVD)
{
TMatrix TNN1({NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
@ -149,12 +149,12 @@ public:
MatQ&& QQ0,
bool compact = false)
{
int Nact = hermA.size(0);
int NEL = B.size(1);
int NMO = B.size(0);
assert(hermA.size(1) == B.size(0));
assert(QQ0.size(0) == Nact);
assert(QQ0.size(1) == NEL);
int Nact = std::get<0>(hermA.sizes());
int NEL = std::get<1>(B.sizes());
int NMO = std::get<0>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(QQ0.sizes()) == Nact);
assert(std::get<1>(QQ0.sizes()) == NEL);
TMatrix TNN({NEL, NEL}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TAB({Nact, NEL}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TNM({NEL, NMO}, buffer_manager.get_generator().template get_allocator<T>());
@ -174,10 +174,10 @@ public:
integer* ref,
bool compact = false)
{
int Nact = hermA.size(0);
int NEL = B.size(1);
int NMO = B.size(0);
assert(hermA.size(1) == B.size(0));
int Nact = std::get<0>(hermA.sizes());
int NEL = std::get<1>(B.sizes());
int NMO = std::get<0>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
TMatrix TNN({NEL, NEL}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TAB({Nact, NEL}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TNM({NEL, NMO}, buffer_manager.get_generator().template get_allocator<T>());
@ -191,7 +191,7 @@ public:
template<class MatA>
T Overlap(const MatA& A, T LogOverlapFactor)
{
int NAEA = A.size(1);
int NAEA = std::get<1>(A.sizes());
TMatrix TNN({NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TNN2({NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
IVector IWORK(iextensions<1u>{NAEA + 1}, buffer_manager.get_generator().template get_allocator<int>());
@ -201,7 +201,7 @@ public:
template<class MatA, class MatB>
T Overlap(const MatA& hermA, const MatB& B, T LogOverlapFactor, bool herm = true)
{
int NAEA = (herm ? hermA.size(0) : hermA.size(1));
int NAEA = (herm ? std::get<0>(hermA.sizes()) : std::get<1>(hermA.sizes()));
TMatrix TNN({NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TNN2({NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
IVector IWORK(iextensions<1u>{NAEA + 1}, buffer_manager.get_generator().template get_allocator<int>());
@ -211,7 +211,7 @@ public:
template<class MatA, class MatB>
T Overlap_noHerm(const MatA& A, const MatB& B, T LogOverlapFactor)
{
int NAEA = A.size(1);
int NAEA = std::get<1>(A.sizes());
TMatrix TNN({NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TNN2({NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
IVector IWORK(iextensions<1u>{NAEA + 1}, buffer_manager.get_generator().template get_allocator<int>());
@ -222,11 +222,11 @@ public:
template<typename integer, class MatA, class MatB, class MatC>
T OverlapForWoodbury(const MatA& hermA, const MatB& B, T LogOverlapFactor, integer* ref, MatC&& QQ0)
{
int Nact = hermA.size(0);
int NEL = B.size(1);
assert(hermA.size(1) == B.size(0));
assert(QQ0.size(0) == Nact);
assert(QQ0.size(1) == NEL);
int Nact = std::get<0>(hermA.sizes());
int NEL = std::get<1>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(QQ0.sizes()) == Nact);
assert(std::get<1>(QQ0.sizes()) == NEL);
TMatrix TNN({NEL, NEL}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix TMN({Nact, NEL}, buffer_manager.get_generator().template get_allocator<T>());
TVector WORK(iextensions<1u>{work_size}, buffer_manager.get_generator().template get_allocator<T>());
@ -239,14 +239,14 @@ public:
void Propagate(Mat&& A, const MatP1& P1, const MatV& V, int order = 6, char TA = 'N', bool noncollinear = false)
{
int npol = noncollinear ? 2 : 1;
int NMO = A.size(0);
int NAEA = A.size(1);
int NMO = std::get<0>(A.sizes());
int NAEA = std::get<1>(A.sizes());
int M = NMO / npol;
assert(NMO % npol == 0);
assert(P1.size(0) == NMO);
assert(P1.size(1) == NMO);
assert(V.size(0) == M);
assert(V.size(1) == M);
assert(std::get<0>(P1.sizes()) == NMO);
assert(std::get<1>(P1.sizes()) == NMO);
assert(std::get<0>(V.sizes()) == M);
assert(std::get<1>(V.sizes()) == M);
TMatrix TMN({NMO, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix T1({M, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix T2({M, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
@ -281,8 +281,8 @@ public:
{
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
// QR on the transpose
int NMO = A.size(0);
int NAEA = A.size(1);
int NMO = std::get<0>(A.sizes());
int NAEA = std::get<1>(A.sizes());
TMatrix AT({NAEA, NMO}, buffer_manager.get_generator().template get_allocator<T>());
TVector scl(iextensions<1u>{NMO}, buffer_manager.get_generator().template get_allocator<T>());
TVector TAU(iextensions<1u>{NMO}, buffer_manager.get_generator().template get_allocator<T>());
@ -292,10 +292,10 @@ public:
ma::geqrf(AT, TAU, WORK);
using ma::determinant_from_geqrf;
using ma::scale_columns;
T res = determinant_from_geqrf(AT.size(0), AT.origin(), AT.stride(0), scl.origin(), LogOverlapFactor);
T res = determinant_from_geqrf(std::get<0>(AT.sizes()), AT.origin(), AT.stride(0), scl.origin(), LogOverlapFactor);
ma::gqr(AT, TAU, WORK);
ma::transpose(AT, A);
scale_columns(A.size(0), A.size(1), A.origin(), A.stride(0), scl.origin());
scale_columns(std::get<0>(A.sizes()), std::get<1>(A.sizes()), A.origin(), A.stride(0), scl.origin());
#else
int NMO = A.size(0);
TVector TAU(iextensions<1u>{NMO}, buffer_manager.get_generator().template get_allocator<T>());

View File

@ -153,17 +153,17 @@ public:
static_assert(std::decay<MatV>::type::dimensionality == 3, " dimenionality == 3");
if (Ai.size() == 0)
return;
assert(Ai.size() == V.size(0));
assert(Ai.size() == std::get<0>(V.sizes()));
int nbatch = Ai.size();
int npol = noncollinear ? 2 : 1;
int NMO = (*Ai[0]).size(0);
int NAEA = (*Ai[0]).size(1);
int NMO = std::get<0>((*Ai[0]).sizes());
int NAEA = std::get<1>((*Ai[0]).sizes());
int M = NMO / npol;
assert(NMO % npol == 0);
assert(P1.size(0) == NMO);
assert(P1.size(1) == NMO);
assert(V.size(1) == M);
assert(V.size(2) == M);
assert(std::get<0>(P1.sizes()) == NMO);
assert(std::get<1>(P1.sizes()) == NMO);
assert(std::get<1>(V.sizes()) == M);
assert(std::get<2>(V.sizes()) == M);
TTensor TMN({nbatch, NMO, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
TTensor T1({nbatch, NMO, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
TTensor T2({nbatch, NMO, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
@ -233,10 +233,10 @@ public:
static_assert(pointedType<MatB>::dimensionality == 2, "Wrong dimensionality");
static_assert(std::decay<MatC>::type::dimensionality == 3, "Wrong dimensionality");
static_assert(std::decay<TVec>::type::dimensionality == 1, "Wrong dimensionality");
int NMO = (herm ? (*hermA[0]).size(1) : (*hermA[0]).size(0));
int NAEA = (herm ? (*hermA[0]).size(0) : (*hermA[0]).size(1));
int NMO = (herm ? std::get<1>((*hermA[0]).sizes()) : std::get<0>((*hermA[0]).sizes()));
int NAEA = (herm ? std::get<0>((*hermA[0]).sizes()) : std::get<1>((*hermA[0]).sizes()));
int nbatch = Bi.size();
assert(C.size(0) == nbatch);
assert(C.size() == nbatch);
assert(ovlp.size() == nbatch);
int n1 = nbatch, n2 = NAEA, n3 = NMO;
if (compact)
@ -269,8 +269,8 @@ public:
static_assert(pointedType<MatB>::dimensionality == 2, "Wrong dimensionality");
static_assert(pointedType<MatC>::dimensionality == 2, "Wrong dimensionality");
static_assert(std::decay<TVec>::type::dimensionality == 1, "Wrong dimensionality");
int NMO = (herm ? (*Left[0]).size(1) : (*Left[0]).size(0));
int NAEA = (herm ? (*Left[0]).size(0) : (*Left[0]).size(1));
int NMO = (herm ? std::get<1>((*Left[0]).sizes()) : std::get<0>((*Left[0]).sizes()));
int NAEA = (herm ? std::get<0>((*Left[0]).sizes()) : std::get<1>((*Left[0]).sizes()));
int nbatch = Left.size();
assert(Right.size() == nbatch);
assert(G.size() == nbatch);
@ -301,8 +301,8 @@ public:
return;
assert(hermA.size() > 0);
static_assert(std::decay<TVec>::type::dimensionality == 1, "Wrong dimensionality");
int NMO = (herm ? (*hermA[0]).size(1) : (*hermA[0]).size(0));
int NAEA = (herm ? (*hermA[0]).size(0) : (*hermA[0]).size(1));
int NMO = (herm ? std::get<1>((*hermA[0]).sizes()) : std::get<0>((*hermA[0]).sizes()));
int NAEA = (herm ? std::get<0>((*hermA[0]).sizes()) : std::get<1>((*hermA[0]).sizes()));
int nbatch = Bi.size();
assert(ovlp.size() == nbatch);
TTensor TNN3D({nbatch, NAEA, NAEA}, buffer_manager.get_generator().template get_allocator<T>());
@ -319,8 +319,8 @@ public:
// QR on the transpose
if (Ai.size() == 0)
return;
int NMO = (*Ai[0]).size(0);
int NAEA = (*Ai[0]).size(1);
int NMO = std::get<0>((*Ai[0]).sizes());
int NAEA = std::get<1>((*Ai[0]).sizes());
int nbatch = Ai.size();
TTensor AT({nbatch, NAEA, NMO}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix T_({nbatch, NMO}, buffer_manager.get_generator().template get_allocator<T>());
@ -357,8 +357,8 @@ public:
// QR on the transpose
if (Ai.size() == 0)
return;
int NMO = (*Ai[0]).size(0);
int NAEA = (*Ai[0]).size(1);
int NMO = std::get<0>((*Ai[0]).sizes());
int NAEA = std::get<1>((*Ai[0]).sizes());
int nbatch = Ai.size();
TTensor AT({nbatch, NAEA, NMO}, buffer_manager.get_generator().template get_allocator<T>());
TMatrix T_({nbatch, NMO}, buffer_manager.get_generator().template get_allocator<T>());

View File

@ -74,8 +74,8 @@ public:
bool compact = false,
bool herm = true)
{
int NMO = (herm ? hermA.size(1) : hermA.size(0));
int NAEA = (herm ? hermA.size(0) : hermA.size(1));
int NMO = (herm ? std::get<1>(hermA.sizes()) : std::get<0>(hermA.sizes()));
int NAEA = (herm ? std::get<0>(hermA.sizes()) : std::get<1>(hermA.sizes()));
set_shm_buffer(comm, NAEA * (NAEA + NMO));
assert(SM_TMats->num_elements() >= NAEA * (NAEA + NMO));
boost::multi::array_ref<T, 2> TNN(to_address(SM_TMats->origin()), {NAEA, NAEA});
@ -96,12 +96,12 @@ public:
communicator& comm,
bool compact = false)
{
int Nact = hermA.size(0);
int NEL = B.size(1);
int NMO = B.size(0);
assert(hermA.size(1) == B.size(0));
assert(QQ0.size(0) == Nact);
assert(QQ0.size(1) == NEL);
int Nact = std::get<0>(hermA.sizes());
int NEL = std::get<1>(B.sizes());
int NMO = std::get<0>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(QQ0.sizes()) == Nact);
assert(std::get<1>(QQ0.sizes()) == NEL);
set_shm_buffer(comm, NEL * (NEL + Nact + NMO));
assert(SM_TMats->num_elements() >= NEL * (NEL + Nact + NMO));
@ -122,7 +122,7 @@ public:
template<class MatA, class MatB>
T Overlap(const MatA& hermA, const MatB& B, T LogOverlapFactor, communicator& comm, bool herm = true)
{
int NAEA = (herm ? hermA.size(0) : hermA.size(1));
int NAEA = (herm ? std::get<0>(hermA.sizes()) : std::get<1>(hermA.sizes()));
set_shm_buffer(comm, 2 * NAEA * NAEA);
assert(SM_TMats->num_elements() >= 2 * NAEA * NAEA);
boost::multi::array_ref<T, 2> TNN(to_address(SM_TMats->origin()), {NAEA, NAEA});
@ -139,11 +139,11 @@ public:
MatC&& QQ0,
communicator& comm)
{
int Nact = hermA.size(0);
int NEL = B.size(1);
assert(hermA.size(1) == B.size(0));
assert(QQ0.size(0) == Nact);
assert(QQ0.size(1) == NEL);
int Nact = std::get<0>(hermA.sizes());
int NEL = std::get<1>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(QQ0.sizes()) == Nact);
assert(std::get<1>(QQ0.sizes()) == NEL);
set_shm_buffer(comm, NEL * (Nact + NEL));
assert(SM_TMats->num_elements() >= NEL * (Nact + NEL));
boost::multi::array_ref<T, 2> TNN(to_address(SM_TMats->origin()), {NEL, NEL});
@ -164,14 +164,14 @@ public:
bool noncollinear = false)
{
int npol = noncollinear ? 2 : 1;
int NMO = A.size(0);
int NAEA = A.size(1);
int NMO = std::get<0>(A.sizes());
int NAEA = std::get<1>(A.sizes());
int M = NMO / npol;
assert(NMO % npol == 0);
assert(P1.size(0) == NMO);
assert(P1.size(1) == NMO);
assert(V.size(0) == M);
assert(V.size(1) == M);
assert(std::get<0>(P1.sizes()) == NMO);
assert(std::get<1>(P1.sizes()) == NMO);
assert(std::get<0>(V.sizes()) == M);
assert(std::get<1>(V.sizes()) == M);
set_shm_buffer(comm, NAEA * (NMO + 2 * M));
assert(SM_TMats->num_elements() >= NAEA * (NMO + 2 * M));
boost::multi::array_ref<T, 2> T0(to_address(SM_TMats->origin()), {NMO, NAEA});

View File

@ -36,12 +36,12 @@ namespace base
template<class MatA, class MatB, class MatC>
inline void apply_expM(const MatA& V, MatB&& S, MatC& T1, MatC& T2, int order = 6, char TA = 'N')
{
assert(V.size(0) == V.size(1));
assert(V.size(1) == S.size(0));
assert(S.size(0) == T1.size(0));
assert(S.size(1) == T1.size(1));
assert(S.size(0) == T2.size(0));
assert(S.size(1) == T2.size(1));
assert(std::get<0>(V.sizes()) == std::get<1>(V.sizes()));
assert(std::get<1>(V.sizes()) == std::get<0>(S.sizes()));
assert(std::get<0>(S.sizes()) == std::get<0>(T1.sizes()));
assert(std::get<1>(S.sizes()) == std::get<1>(T1.sizes()));
assert(std::get<0>(S.sizes()) == std::get<0>(T2.sizes()));
assert(std::get<1>(S.sizes()) == std::get<1>(T2.sizes()));
using ma::H;
using ma::T;
@ -56,7 +56,7 @@ inline void apply_expM(const MatA& V, MatB&& S, MatC& T1, MatC& T2, int order =
// getting around issue in multi, fix later
//T1 = S;
T1.sliced(0, T1.size(0)) = S;
T1.sliced(0, std::get<0>(T1.sizes())) = S;
for (int n = 1; n <= order; n++)
{
ComplexType fact = im * static_cast<ComplexType>(1.0 / static_cast<double>(n));
@ -82,12 +82,12 @@ namespace shm
template<class MatA, class MatB, class MatC, class communicator>
inline void apply_expM(const MatA& V, MatB&& S, MatC& T1, MatC& T2, communicator& comm, int order = 6, char TA = 'N')
{
assert(V.size(0) == S.size(0));
assert(V.size(1) == S.size(0));
assert(S.size(0) == T1.size(0));
assert(S.size(1) == T1.size(1));
assert(S.size(0) == T2.size(0));
assert(S.size(1) == T2.size(1));
assert(std::get<0>(V.sizes()) == std::get<0>(S.sizes()));
assert(std::get<1>(V.sizes()) == std::get<0>(S.sizes()));
assert(std::get<0>(S.sizes()) == std::get<0>(T1.sizes()));
assert(std::get<1>(S.sizes()) == std::get<1>(T1.sizes()));
assert(std::get<0>(S.sizes()) == std::get<0>(T2.sizes()));
assert(std::get<1>(S.sizes()) == std::get<1>(T2.sizes()));
using ComplexType = typename std::decay<MatB>::type::element;
@ -100,7 +100,7 @@ inline void apply_expM(const MatA& V, MatB&& S, MatC& T1, MatC& T2, communicator
auto pT2(std::addressof(T2));
int M0, Mn;
std::tie(M0, Mn) = FairDivideBoundary(comm.rank(), int(S.size(0)), comm.size());
std::tie(M0, Mn) = FairDivideBoundary(comm.rank(), int(S.size()), comm.size());
assert(M0 <= Mn);
assert(M0 >= 0);
@ -118,7 +118,7 @@ inline void apply_expM(const MatA& V, MatB&& S, MatC& T1, MatC& T2, communicator
ma::product(fact, V.sliced(M0, Mn), *pT1, zero, (*pT2).sliced(M0, Mn));
// overload += ???
for (int i = M0; i < Mn; i++)
for (int j = 0, je = S.size(1); j < je; j++)
for (int j = 0, je = std::get<1>(S.sizes()); j < je; j++)
S[i][j] += (*pT2)[i][j];
comm.barrier();
std::swap(pT1, pT2);
@ -139,22 +139,22 @@ inline void apply_expM(const MatA& V, MatB&& S, MatC& T1, MatC& T2, int order =
static_assert(std::decay<MatA>::type::dimensionality == 3, " batched::apply_expM::dimenionality == 3");
static_assert(std::decay<MatB>::type::dimensionality == 3, " batched::apply_expM::dimenionality == 3");
static_assert(std::decay<MatC>::type::dimensionality == 3, " batched::apply_expM::dimenionality == 3");
assert(V.size(0) == S.size(0));
assert(V.size(0) == T1.size(0));
assert(V.size(0) == T2.size(0));
assert(V.size(1) == V.size(2));
assert(V.size(2) == S.size(1));
assert(S.size(1) == T1.size(1));
assert(S.size(2) == T1.size(2));
assert(S.size(1) == T2.size(1));
assert(S.size(2) == T2.size(2));
assert(std::get<0>(V.sizes()) == std::get<0>(S.sizes()));
assert(std::get<0>(V.sizes()) == std::get<0>(T1.sizes()));
assert(std::get<0>(V.sizes()) == std::get<0>(T2.sizes()));
assert(std::get<1>(V.sizes()) == std::get<2>(V.sizes()));
assert(std::get<2>(V.sizes()) == std::get<1>(S.sizes()));
assert(std::get<1>(S.sizes()) == std::get<1>(T1.sizes()));
assert(std::get<2>(S.sizes()) == std::get<2>(T1.sizes()));
assert(std::get<1>(S.sizes()) == std::get<1>(T2.sizes()));
assert(std::get<2>(S.sizes()) == std::get<2>(T2.sizes()));
// for now limit to continuous
assert(S.stride(0) == S.size(1) * S.size(2));
assert(T1.stride(0) == T1.size(1) * T1.size(2));
assert(T2.stride(0) == T2.size(1) * T2.size(2));
assert(S.stride(1) == S.size(2));
assert(T1.stride(1) == T1.size(2));
assert(T2.stride(1) == T2.size(2));
assert(S.stride(0) == std::get<1>(S.sizes()) * std::get<2>(S.sizes()));
assert(T1.stride(0) == std::get<1>(T1.sizes()) * std::get<2>(T1.sizes()));
assert(T2.stride(0) == std::get<1>(T2.sizes()) * std::get<2>(T2.sizes()));
assert(S.stride(1) == std::get<2>(S.sizes()));
assert(T1.stride(1) == std::get<2>(T1.sizes()));
assert(T2.stride(1) == std::get<2>(T2.sizes()));
assert(S.stride(2) == 1);
assert(T1.stride(2) == 1);
assert(T2.stride(2) == 1);
@ -197,22 +197,22 @@ inline void apply_expM_noncollinear(const MatA& V, MatB&& S, MatC& T1, MatC& T2,
static_assert(std::decay<MatA>::type::dimensionality == 3, " batched::apply_expM::dimenionality == 3");
static_assert(std::decay<MatB>::type::dimensionality == 3, " batched::apply_expM::dimenionality == 3");
static_assert(std::decay<MatC>::type::dimensionality == 3, " batched::apply_expM::dimenionality == 3");
assert(V.size(0) * 2 == S.size(0));
assert(V.size(0) * 2 == T1.size(0));
assert(V.size(0) * 2 == T2.size(0));
assert(V.size(1) == V.size(2));
assert(V.size(2) == S.size(1));
assert(S.size(1) == T1.size(1));
assert(S.size(2) == T1.size(2));
assert(S.size(1) == T2.size(1));
assert(S.size(2) == T2.size(2));
assert(std::get<0>(V.sizes()) * 2 == std::get<0>(S.sizes()));
assert(std::get<0>(V.sizes()) * 2 == std::get<0>(T1.sizes()));
assert(std::get<0>(V.sizes()) * 2 == std::get<0>(T2.sizes()));
assert(std::get<1>(V.sizes()) == std::get<2>(V.sizes()));
assert(std::get<2>(V.sizes()) == std::get<1>(S.sizes()));
assert(std::get<1>(S.sizes()) == std::get<1>(T1.sizes()));
assert(std::get<2>(S.sizes()) == std::get<2>(T1.sizes()));
assert(std::get<1>(S.sizes()) == std::get<1>(T2.sizes()));
assert(std::get<2>(S.sizes()) == std::get<2>(T2.sizes()));
// for now limit to continuous
assert(S.stride(0) == S.size(1) * S.size(2));
assert(T1.stride(0) == T1.size(1) * T1.size(2));
assert(T2.stride(0) == T2.size(1) * T2.size(2));
assert(S.stride(1) == S.size(2));
assert(T1.stride(1) == T1.size(2));
assert(T2.stride(1) == T2.size(2));
assert(S.stride(0) == std::get<1>(S.sizes()) * std::get<2>(S.sizes()));
assert(T1.stride(0) == std::get<1>(T1.sizes()) * std::get<2>(T1.sizes()));
assert(T2.stride(0) == std::get<1>(T2.sizes()) * std::get<2>(T2.sizes()));
assert(S.stride(1) == std::get<2>(S.sizes()));
assert(T1.stride(1) == std::get<2>(T1.sizes()));
assert(T2.stride(1) == std::get<2>(T2.sizes()));
assert(S.stride(2) == 1);
assert(T1.stride(2) == 1);
assert(T2.stride(2) == 1);
@ -228,26 +228,26 @@ inline void apply_expM_noncollinear(const MatA& V, MatB&& S, MatC& T1, MatC& T2,
using pointerA = typename std::decay<MatA>::type::element_const_ptr;
using pointerC = typename std::decay<MatC>::type::element_ptr;
int nbatch = S.size(0);
int nbatch = S.size();
int ldv = V.stride(1);
int M = T2.size(2);
int N = T2.size(1);
int K = T1.size(1);
int M = std::get<2>(T2.sizes());
int N = std::get<1>(T2.sizes());
int K = std::get<1>(T1.sizes());
std::vector<pointerA> Vi;
std::vector<pointerC> T1i;
std::vector<pointerC> T2i;
Vi.reserve(2 * V.size(0));
T1i.reserve(T1.size(0));
T2i.reserve(T2.size(0));
for (int i = 0; i < V.size(0); i++)
Vi.reserve(2 * V.size());
T1i.reserve(T1.size());
T2i.reserve(T2.size());
for (int i = 0; i < V.size(); i++)
{
Vi.emplace_back(ma::pointer_dispatch(V[i].origin()));
Vi.emplace_back(ma::pointer_dispatch(V[i].origin()));
}
for (int i = 0; i < T1.size(0); i++)
for (int i = 0; i < T1.size(); i++)
T1i.emplace_back(ma::pointer_dispatch(T1[i].origin()));
for (int i = 0; i < T2.size(0); i++)
for (int i = 0; i < T2.size(); i++)
T2i.emplace_back(ma::pointer_dispatch(T2[i].origin()));
auto pT1i(std::addressof(T1i));

View File

@ -63,23 +63,23 @@ Tp MixedDensityMatrix(const MatA& hermA,
bool herm = true)
{
// check dimensions are consistent
int NMO = (herm ? hermA.size(1) : hermA.size(0));
int NEL = (herm ? hermA.size(0) : hermA.size(1));
assert(NMO == B.size(0));
assert(NEL == B.size(1));
assert(NEL == T1.size(0));
assert(B.size(1) == T1.size(1));
int NMO = (herm ? std::get<1>(hermA.sizes()) : std::get<0>(hermA.sizes()));
int NEL = (herm ? std::get<0>(hermA.sizes()) : std::get<1>(hermA.sizes()));
assert(NMO == std::get<0>(B.sizes()));
assert(NEL == std::get<1>(B.sizes()));
assert(NEL == T1.size());
assert(std::get<1>(B.sizes()) == std::get<1>(T1.sizes()));
if (compact)
{
assert(C.size(0) == T1.size(1));
assert(C.size(1) == B.size(0));
assert(std::get<0>(C.sizes()) == std::get<1>(T1.sizes()));
assert(std::get<1>(C.sizes()) == std::get<0>(B.sizes()));
}
else
{
assert(T2.size(1) == B.size(0));
assert(T2.size(0) == T1.size(1));
assert(C.size(0) == NMO);
assert(C.size(1) == T2.size(1));
assert(std::get<1>(T2.sizes()) == B.size());
assert(T2.size() == std::get<1>(T1.sizes()));
assert(C.size() == NMO);
assert(std::get<1>(C.sizes()) == std::get<1>(T2.sizes()));
}
using ma::H;
@ -153,25 +153,25 @@ Tp MixedDensityMatrixForWoodbury(const MatA& hermA,
bool compact = true)
{
// check dimensions are consistent
int NEL = B.size(1);
assert(hermA.size(1) == B.size(0));
assert(hermA.size(0) == TAB.size(0));
assert(B.size(1) == TAB.size(1));
assert(B.size(1) == TNN.size(0));
assert(B.size(1) == TNN.size(1));
assert(hermA.size(0) == QQ0.size(0));
assert(B.size(1) == QQ0.size(1));
int NEL = std::get<1>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(hermA.sizes()) == std::get<0>(TAB.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TAB.sizes()));
assert(std::get<1>(B.sizes()) == std::get<0>(TNN.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<0>(hermA.sizes()) == std::get<0>(QQ0.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(QQ0.sizes()));
if (compact)
{
assert(C.size(0) == TNN.size(1));
assert(C.size(1) == B.size(0));
assert(std::get<0>(C.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<1>(C.sizes()) == std::get<0>(B.sizes()));
}
else
{
assert(TNM.size(1) == B.size(0));
assert(TNM.size(0) == TNN.size(1));
assert(C.size(0) == hermA.size(1));
assert(C.size(1) == TNM.size(1));
assert(std::get<1>(TNM.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(TNM.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<0>(C.sizes()) == std::get<1>(hermA.sizes()));
assert(std::get<1>(C.sizes()) == std::get<1>(TNM.sizes()));
}
using ma::T;
@ -234,23 +234,23 @@ Tp MixedDensityMatrixFromConfiguration(const MatA& hermA,
bool compact = true)
{
// check dimensions are consistent
int NEL = B.size(1);
assert(hermA.size(1) == B.size(0));
assert(hermA.size(0) == TAB.size(0));
assert(B.size(1) == TAB.size(1));
assert(B.size(1) == TNN.size(0));
assert(B.size(1) == TNN.size(1));
int NEL = std::get<1>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(hermA.sizes()) == std::get<0>(TAB.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TAB.sizes()));
assert(std::get<1>(B.sizes()) == std::get<0>(TNN.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TNN.sizes()));
if (compact)
{
assert(C.size(0) == TNN.size(1));
assert(C.size(1) == B.size(0));
assert(std::get<0>(C.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<1>(C.sizes()) == std::get<0>(B.sizes()));
}
else
{
assert(TNM.size(1) == B.size(0));
assert(TNM.size(0) == TNN.size(1));
assert(C.size(0) == hermA.size(1));
assert(C.size(1) == TNM.size(1));
assert(std::get<1>(TNM.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(TNM.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<0>(C.sizes()) == std::get<1>(hermA.sizes()));
assert(std::get<1>(C.sizes()) == std::get<1>(TNM.sizes()));
}
using ma::T;
@ -317,21 +317,21 @@ Tp MixedDensityMatrix_noHerm(const MatA& A,
bool compact = true)
{
// check dimensions are consistent
assert(A.size(0) == B.size(0));
assert(A.size(1) == B.size(1));
assert(A.size(1) == T1.size(0));
assert(B.size(1) == T1.size(1));
assert(std::get<0>(A.sizes()) == std::get<0>(B.sizes()));
assert(std::get<1>(A.sizes()) == std::get<1>(B.sizes()));
assert(std::get<1>(A.sizes()) == std::get<0>(T1.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(T1.sizes()));
if (compact)
{
assert(C.size(0) == T1.size(1));
assert(C.size(1) == B.size(0));
assert(std::get<0>(C.sizes()) == std::get<1>(T1.sizes()));
assert(std::get<1>(C.sizes()) == std::get<0>(B.sizes()));
}
else
{
assert(T2.size(1) == B.size(0));
assert(T2.size(0) == T1.size(1));
assert(C.size(0) == A.size(0));
assert(C.size(1) == T2.size(1));
assert(std::get<1>(T2.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(T2.sizes()) == std::get<1>(T1.sizes()));
assert(std::get<0>(C.sizes()) == std::get<0>(A.sizes()));
assert(std::get<1>(C.sizes()) == std::get<1>(T2.sizes()));
}
using ma::H;
@ -381,19 +381,19 @@ Tp MixedDensityMatrix_noHerm_wSVD(const MatA& A,
bool compact = true)
{
// check dimensions are consistent
assert(A.size(0) == B.size(0));
assert(A.size(1) == B.size(1));
assert(A.size(1) == U.size(0)); // [U] = [NxN]
assert(A.size(1) == U.size(1));
assert(A.size(1) == VT.size(0)); // [V] = [NxN]
assert(A.size(1) == VT.size(1));
assert(A.size(1) <= (6 * S.size(0) + 1)); // [S] = [N+1]
assert(A.size(1) == UA.size(0)); // [UA] = [NxM]
assert(A.size(0) == UA.size(1));
assert(std::get<0>(A.sizes()) == std::get<0>(B.sizes()));
assert(std::get<1>(A.sizes()) == std::get<1>(B.sizes()));
assert(std::get<1>(A.sizes()) == std::get<0>(U.sizes())); // [U] = [NxN]
assert(std::get<1>(A.sizes()) == std::get<1>(U.sizes()));
assert(std::get<1>(A.sizes()) == std::get<0>(VT.sizes())); // [V] = [NxN]
assert(std::get<1>(A.sizes()) == std::get<1>(VT.sizes()));
assert(std::get<1>(A.sizes()) <= (6 * S.size() + 1)); // [S] = [N+1]
assert(std::get<1>(A.sizes()) == std::get<0>(UA.sizes())); // [UA] = [NxM]
assert(std::get<0>(A.sizes()) == std::get<1>(UA.sizes()));
if (compact)
{
assert(C.size(0) == B.size(1));
assert(C.size(1) == B.size(0));
assert(std::get<0>(C.sizes()) == std::get<1>(B.sizes()));
assert(std::get<1>(C.sizes()) == std::get<0>(B.sizes()));
}
else
{
@ -409,7 +409,7 @@ Tp MixedDensityMatrix_noHerm_wSVD(const MatA& A,
using ma::T;
using ma::term_by_term_matrix_vector;
int N(U.size(0));
int N(U.size());
// T1 = H(A)*B
ma::product(H(A), B, U);
@ -453,7 +453,7 @@ Tp MixedDensityMatrix_noHerm_wSVD(const MatA& A,
// VT = VT * inv(S), which works since S is diagonal and real
term_by_term_matrix_vector(ma::TOp_DIV, 0, VT.size(0), VT.size(1), ma::pointer_dispatch(VT.origin()), VT.stride(0),
term_by_term_matrix_vector(ma::TOp_DIV, 0, std::get<0>(VT.sizes()), std::get<1>(VT.sizes()), ma::pointer_dispatch(VT.origin()), VT.stride(0),
ma::pointer_dispatch(S.origin()), 1);
// BV = H(VT) * H(U)
@ -505,13 +505,13 @@ Tp Overlap(const MatA& hermA,
Buffer&& WORK,
bool herm = true)
{
int NMO = (herm ? hermA.size(1) : hermA.size(0));
int NEL = (herm ? hermA.size(0) : hermA.size(1));
int NMO = (herm ? std::get<1>(hermA.sizes()) : std::get<0>(hermA.sizes()));
int NEL = (herm ? std::get<0>(hermA.sizes()) : std::get<1>(hermA.sizes()));
// check dimensions are consistent
assert(NMO == B.size(0));
assert(NEL == B.size(1));
assert(NEL == T1.size(0));
assert(B.size(1) == T1.size(1));
assert(NMO == std::get<0>(B.sizes()));
assert(NEL == std::get<1>(B.sizes()));
assert(NEL == std::get<0>(T1.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(T1.sizes()));
using ma::H;
using ma::T;
@ -545,14 +545,14 @@ Tp OverlapForWoodbury(const MatA& hermA,
TBuffer& WORK)
{
// check dimensions are consistent
int NEL = B.size(1);
assert(hermA.size(1) == B.size(0));
assert(hermA.size(0) == TMN.size(0));
assert(B.size(1) == TMN.size(1));
assert(B.size(1) == TNN.size(0));
assert(B.size(1) == TNN.size(1));
assert(hermA.size(0) == QQ0.size(0));
assert(B.size(1) == QQ0.size(1));
int NEL = std::get<1>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(hermA.sizes()) == std::get<0>(TMN.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TMN.sizes()));
assert(std::get<1>(B.sizes()) == std::get<0>(TNN.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<0>(hermA.sizes()) == std::get<0>(QQ0.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(QQ0.sizes()));
using ma::T;
@ -593,10 +593,10 @@ template<class Tp, class MatA, class MatB, class Mat, class Buffer, class IBuffe
Tp Overlap_noHerm(const MatA& A, const MatB& B, Tp LogOverlapFactor, Mat&& T1, IBuffer& IWORK, Buffer& WORK)
{
// check dimensions are consistent
assert(A.size(0) == B.size(0));
assert(A.size(1) == B.size(1));
assert(A.size(1) == T1.size(0));
assert(B.size(1) == T1.size(1));
assert(std::get<0>(A.sizes()) == std::get<0>(B.sizes()));
assert(std::get<1>(A.sizes()) == std::get<1>(B.sizes()));
assert(std::get<1>(A.sizes()) == std::get<0>(T1.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(T1.sizes()));
using ma::H;
using ma::T;
@ -642,30 +642,30 @@ Tp MixedDensityMatrix(const MatA& hermA,
bool compact = true,
bool herm = true)
{
int NMO = (herm ? hermA.size(1) : hermA.size(0));
int NEL = (herm ? hermA.size(0) : hermA.size(1));
int NMO = (herm ? std::get<1>(hermA.sizes()) : std::get<0>(hermA.sizes()));
int NEL = (herm ? std::get<0>(hermA.sizes()) : std::get<1>(hermA.sizes()));
// check dimensions are consistent
assert(NMO == B.size(0));
assert(NEL == B.size(1));
assert(NEL == T1.size(0));
assert(B.size(1) == T1.size(1));
assert(NMO == std::get<0>(B.sizes()));
assert(NEL == std::get<1>(B.sizes()));
assert(NEL == std::get<0>(T1.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(T1.sizes()));
if (compact)
{
assert(C.size(0) == T1.size(1));
assert(C.size(1) == B.size(0));
assert(std::get<0>(C.sizes()) == std::get<1>(T1.sizes()));
assert(std::get<1>(C.sizes()) == std::get<0>(B.sizes()));
}
else
{
assert(T2.size(1) == B.size(0));
assert(T2.size(0) == T1.size(1));
assert(C.size(0) == NMO);
assert(C.size(1) == T2.size(1));
assert(std::get<1>(T2.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(T2.sizes()) == std::get<1>(T1.sizes()));
assert(std::get<0>(C.sizes()) == NMO);
assert(std::get<1>(C.sizes()) == std::get<1>(T2.sizes()));
}
using ma::H;
using ma::T;
int N0, Nn, sz = B.size(1);
int N0, Nn, sz = std::get<1>(B.sizes());
std::tie(N0, Nn) = FairDivideBoundary(comm.rank(), sz, comm.size());
// T(B)*conj(A)
@ -718,7 +718,7 @@ Tp MixedDensityMatrix(const MatA& hermA,
comm.barrier();
sz = T2.size(1);
sz = std::get<1>(T2.sizes());
std::tie(N0, Nn) = FairDivideBoundary(comm.rank(), sz, comm.size());
// C = conj(A) * T2
@ -734,7 +734,7 @@ Tp MixedDensityMatrix(const MatA& hermA,
comm.barrier();
sz = T2.size(1);
sz = std::get<1>(T2.sizes());
std::tie(N0, Nn) = FairDivideBoundary(comm.rank(), sz, comm.size());
// C = T( B * T2) = T(T2) * T(B)
@ -768,18 +768,18 @@ Tp Overlap(const MatA& hermA,
communicator& comm,
bool herm = true)
{
int NMO = (herm ? hermA.size(1) : hermA.size(0));
int NEL = (herm ? hermA.size(0) : hermA.size(1));
int NMO = (herm ? std::get<1>(hermA.sizes()) : std::get<0>(hermA.sizes()));
int NEL = (herm ? std::get<0>(hermA.sizes()) : std::get<1>(hermA.sizes()));
// check dimensions are consistent
assert(NMO == B.size(0));
assert(NEL == B.size(1));
assert(NEL == T1.size(0));
assert(B.size(1) == T1.size(1));
assert(NMO == std::get<0>(B.sizes()));
assert(NEL == std::get<1>(B.sizes()));
assert(NEL == std::get<0>(T1.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(T1.sizes()));
using ma::H;
using ma::T;
int N0, Nn, sz = B.size(1);
int N0, Nn, sz = std::get<1>(B.sizes());
std::tie(N0, Nn) = FairDivideBoundary(comm.rank(), sz, comm.size());
// T(B)*conj(A)
@ -823,18 +823,18 @@ Tp OverlapForWoodbury(const MatA& hermA,
communicator& comm)
{
// check dimensions are consistent
int NEL = B.size(1);
assert(hermA.size(1) == B.size(0));
assert(hermA.size(0) == TMN.size(0));
assert(B.size(1) == TMN.size(1));
assert(B.size(1) == TNN.size(0));
assert(B.size(1) == TNN.size(1));
assert(hermA.size(0) == QQ0.size(0));
assert(B.size(1) == QQ0.size(1));
int NEL = std::get<1>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(hermA.sizes()) == std::get<0>(TMN.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TMN.sizes()));
assert(std::get<1>(B.sizes()) == std::get<0>(TNN.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<0>(hermA.sizes()) == std::get<0>(QQ0.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(QQ0.sizes()));
using ma::T;
int N0, Nn, sz = B.size(1);
int N0, Nn, sz = std::get<1>(B.sizes());
std::tie(N0, Nn) = FairDivideBoundary(comm.rank(), sz, comm.size());
Tp ovlp;
@ -851,7 +851,7 @@ Tp OverlapForWoodbury(const MatA& hermA,
comm.broadcast_n(&ovlp, 1, 0);
int M0, Mn;
sz = TMN.size(0);
sz = TMN.size();
std::tie(M0, Mn) = FairDivideBoundary(comm.rank(), sz, comm.size());
// QQ0 = TMN * inv(TNN)
@ -888,25 +888,25 @@ Tp MixedDensityMatrixForWoodbury(const MatA& hermA,
bool compact = true)
{
// check dimensions are consistent
int NEL = B.size(1);
assert(hermA.size(1) == B.size(0));
assert(hermA.size(0) == TAB.size(0));
assert(B.size(1) == TAB.size(1));
assert(B.size(1) == TNN.size(0));
assert(B.size(1) == TNN.size(1));
assert(hermA.size(0) == QQ0.size(0));
assert(B.size(1) == QQ0.size(1));
int NEL = std::get<1>(B.sizes());
assert(std::get<1>(hermA.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(hermA.sizes()) == std::get<0>(TAB.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TAB.sizes()));
assert(std::get<1>(B.sizes()) == std::get<0>(TNN.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<0>(hermA.sizes()) == std::get<0>(QQ0.sizes()));
assert(std::get<1>(B.sizes()) == std::get<1>(QQ0.sizes()));
if (compact)
{
assert(C.size(0) == TNN.size(1));
assert(C.size(1) == B.size(0));
assert(std::get<0>(C.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<1>(C.sizes()) == std::get<0>(B.sizes()));
}
else
{
assert(TNM.size(1) == B.size(0));
assert(TNM.size(0) == TNN.size(1));
assert(C.size(0) == hermA.size(1));
assert(C.size(1) == TNM.size(1));
assert(std::get<1>(TNM.sizes()) == std::get<0>(B.sizes()));
assert(std::get<0>(TNM.sizes()) == std::get<1>(TNN.sizes()));
assert(std::get<0>(C.sizes()) == std::get<1>(hermA.sizes()));
assert(std::get<1>(C.sizes()) == std::get<1>(TNM.sizes()));
}
using ma::T;
@ -933,7 +933,7 @@ Tp MixedDensityMatrixForWoodbury(const MatA& hermA,
comm.broadcast_n(&ovlp, 1, 0);
int P0, Pn;
std::tie(P0, Pn) = FairDivideBoundary(comm.rank(), int(TAB.size(0)), comm.size());
std::tie(P0, Pn) = FairDivideBoundary(comm.rank(), int(std::get<0>(TAB.sizes())), comm.size());
// QQ0 = TAB * inv(TNN)
if (P0 != Pn)
@ -951,7 +951,7 @@ Tp MixedDensityMatrixForWoodbury(const MatA& hermA,
if (N0 != Nn)
ma::product(T(TNN(TNN.extension(0), {N0, Nn})), T(B), TNM.sliced(N0, Nn));
int sz = TNM.size(1);
int sz = std::get<1>(TNM.sizes());
std::tie(N0, Nn) = FairDivideBoundary(comm.rank(), sz, comm.size());
comm.barrier();
@ -993,25 +993,25 @@ void MixedDensityMatrix(std::vector<MatA>& hermA,
using ma::T;
int nbatch = Bi.size();
int NMO = (herm ? (*hermA[0]).size(1) : (*hermA[0]).size(0));
int NEL = (herm ? (*hermA[0]).size(0) : (*hermA[0]).size(1));
int NMO = (herm ? std::get<1>((*hermA[0]).sizes()) : std::get<0>((*hermA[0]).sizes()));
int NEL = (herm ? std::get<0>((*hermA[0]).sizes()) : std::get<1>((*hermA[0]).sizes()));
assert((*Bi[0]).size(0) == NMO);
assert((*Bi[0]).size(1) == NEL);
assert(C.size(0) == nbatch);
assert(C.size(2) == NMO);
assert(std::get<0>((*Bi[0]).sizes()) == NMO);
assert(std::get<1>((*Bi[0]).sizes()) == NEL);
assert(C.size() == nbatch);
assert(std::get<2>(C.sizes()) == NMO);
if (compact)
assert(C.size(1) == NEL);
assert(std::get<1>(C.sizes()) == NEL);
else
assert(C.size(1) == NMO);
assert(std::get<1>(C.sizes()) == NMO);
assert(ovlp.size() == nbatch);
assert(TNN3D.size(1) == NEL);
assert(TNN3D.size(2) == NEL);
assert(std::get<1>(TNN3D.sizes()) == NEL);
assert(std::get<2>(TNN3D.sizes()) == NEL);
if (not compact)
{
assert(TNM3D.size(0) == nbatch);
assert(TNM3D.size(1) == NEL);
assert(TNM3D.size(2) == NMO);
assert(std::get<0>(TNM3D.sizes()) == nbatch);
assert(std::get<1>(TNM3D.sizes()) == NEL);
assert(std::get<2>(TNM3D.sizes()) == NMO);
}
assert(IWORK.num_elements() >= nbatch * (NEL + 1));
assert(TNN3D.stride(1) == NEL); // needed by getriBatched
@ -1149,25 +1149,25 @@ void DensityMatrices(std::vector<MatA> const& Left,
using ma::T;
int nbatch = Right.size();
int NMO = (herm ? (*Left[0]).size(1) : (*Left[0]).size(0));
int NEL = (herm ? (*Left[0]).size(0) : (*Left[0]).size(1));
int NMO = (herm ? std::get<1>((*Left[0]).sizes()) : std::get<0>((*Left[0]).sizes()));
int NEL = (herm ? std::get<0>((*Left[0]).sizes()) : std::get<1>((*Left[0]).sizes()));
assert((*Right[0]).size(0) == NMO);
assert((*Right[0]).size(1) == NEL);
assert(std::get<0>((*Right[0]).sizes()) == NMO);
assert(std::get<1>((*Right[0]).sizes()) == NEL);
assert(G.size() == nbatch);
assert((*G[0]).size(1) == NMO);
assert(std::get<1>((*G[0]).sizes()) == NMO);
if (compact)
assert((*G[0]).size(0) == NEL);
assert((*G[0]).size() == NEL);
else
assert((*G[0]).size(0) == NMO);
assert((*G[0]).size() == NMO);
assert(ovlp.size() == nbatch);
assert(TNN3D.size(1) == NEL);
assert(TNN3D.size(2) == NEL);
assert(std::get<1>(TNN3D.sizes()) == NEL);
assert(std::get<2>(TNN3D.sizes()) == NEL);
if (not compact)
{
assert(TNM3D.size(0) == nbatch);
assert(TNM3D.size(1) == NEL);
assert(TNM3D.size(2) == NMO);
assert(std::get<0>(TNM3D.sizes()) == nbatch);
assert(std::get<1>(TNM3D.sizes()) == NEL);
assert(std::get<2>(TNM3D.sizes()) == NMO);
}
assert(IWORK.num_elements() >= nbatch * (NEL + 1));
@ -1275,14 +1275,14 @@ void Overlap(std::vector<MatA>& hermA,
int nbatch = Bi.size();
assert(hermA.size() >= nbatch);
int NMO = (herm ? (*hermA[0]).size(1) : (*hermA[0]).size(0));
int NEL = (herm ? (*hermA[0]).size(0) : (*hermA[0]).size(1));
int NMO = (herm ? std::get<1>((*hermA[0]).sizes()) : std::get<0>((*hermA[0]).sizes()));
int NEL = (herm ? std::get<0>((*hermA[0]).sizes()) : std::get<1>((*hermA[0]).sizes()));
assert((*Bi[0]).size(0) == NMO);
assert((*Bi[0]).size(1) == NEL);
assert(std::get<0>((*Bi[0]).sizes()) == NMO);
assert(std::get<1>((*Bi[0]).sizes()) == NEL);
assert(ovlp.size() == nbatch);
assert(TNN3D.size(1) == NEL);
assert(TNN3D.size(2) == NEL);
assert(std::get<1>(TNN3D.sizes()) == NEL);
assert(std::get<2>(TNN3D.sizes()) == NEL);
assert(IWORK.num_elements() >= nbatch * (NEL + 1));
using pointer = typename std::decay<Mat>::type::element_ptr;

View File

@ -500,13 +500,13 @@ void halfRotateCholeskyMatrix(WALKER_TYPES type,
int Qdim = NAEA * (kN_alpha - k0_alpha) + NAEB * (kN_beta - k0_beta);
if (transpose)
{
assert(Q.size(0) == nvec);
assert(Q.size(1) == Qdim);
assert(std::get<0>(Q.sizes()) == nvec);
assert(std::get<1>(Q.sizes()) == Qdim);
}
else
{
assert(Q.size(0) == Qdim);
assert(Q.size(1) == nvec);
assert(std::get<0>(Q.sizes()) == Qdim);
assert(std::get<1>(Q.sizes()) == nvec);
}
std::tie(ak0, ak1) = FairDivideBoundary(coreid, Qdim, ncores);

View File

@ -62,10 +62,10 @@ void myREQUIRE(const std::complex<double>& a, const std::complex<double>& b)
template<class M1, class M2>
void check(M1&& A, M2& B)
{
REQUIRE(A.size(0) == B.size(0));
REQUIRE(A.size(1) == B.size(1));
for (int i = 0; i < A.size(0); i++)
for (int j = 0; j < A.size(1); j++)
REQUIRE(std::get<0>(A.sizes()) == std::get<0>(B.sizes()));
REQUIRE(std::get<1>(A.sizes()) == std::get<1>(B.sizes()));
for (int i = 0; i < std::get<0>(A.sizes()); i++)
for (int j = 0; j < std::get<1>(A.sizes()); j++)
myREQUIRE(A[i][j], B[i][j]);
}
@ -663,12 +663,12 @@ TEST_CASE("SDetOps_complex_mpi3", "[sdet_ops]")
array A({NEL, NMO});
array B({NMO, NEL});
for (int i = 0, k = 0; i < A.size(0); i++)
for (int j = 0; j < A.size(1); j++, k++)
for (int i = 0, k = 0; i < std::get<0>(A.sizes()); i++)
for (int j = 0; j < std::get<1>(A.sizes()); j++, k++)
A[i][j] = m_a[k];
for (int i = 0, k = 0; i < B.size(0); i++)
for (int j = 0; j < B.size(1); j++, k++)
for (int i = 0, k = 0; i < std::get<0>(B.sizes()); i++)
for (int j = 0; j < std::get<1>(B.sizes()); j++, k++)
B[i][j] = m_b[k];
array_ref Aref(m_a.data(), {NEL, NMO});
@ -836,12 +836,12 @@ TEST_CASE("SDetOps_complex_csr", "[sdet_ops]")
array A({NMO, NEL}); // Will be transposed when Acsr is built
array B({NMO, NEL});
for (int i = 0, k = 0; i < A.size(0); i++)
for (int j = 0; j < A.size(1); j++, k++)
for (int i = 0, k = 0; i < std::get<0>(A.sizes()); i++)
for (int j = 0; j < std::get<1>(A.sizes()); j++, k++)
A[i][j] = m_a[k];
for (int i = 0, k = 0; i < B.size(0); i++)
for (int j = 0; j < B.size(1); j++, k++)
for (int i = 0, k = 0; i < std::get<0>(B.sizes()); i++)
for (int j = 0; j < std::get<1>(B.sizes()); j++, k++)
B[i][j] = m_b[k];
boost::multi::array_ref<Type, 2> Bref(m_b.data(), {NMO, NEL});

View File

@ -0,0 +1,38 @@
////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source
// License. See LICENSE file in top directory for details.
//
// Copyright (c) 2022 QMCPACK developers.
//
// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//
// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
////////////////////////////////////////////////////////////////////////////////
#ifndef AFQMC_ARRAY_SIZE_HELP_HPP
#define AFQMC_ARRAY_SIZE_HELP_HPP
#include <array>
namespace qmcplusplus
{
namespace afqmc
{
template<class T> auto generic_sizes(T const& A)
->decltype(std::array<std::size_t, 2>{A.size(0), A.size(1)}) {
return std::array<std::size_t, 2>{A.size(0), A.size(1)}; }
template<typename T, boost::multi::dimensionality_type DIM, class Alloc>
auto generic_sizes(boost::multi::static_array<T, DIM, Alloc> const& A)
->decltype(A.sizes()) {
return A.sizes(); }
template<typename T, boost::multi::dimensionality_type DIM, typename ElementPtr>
auto generic_sizes(boost::multi::basic_array<T, DIM, ElementPtr> const& A)
->decltype(A.sizes()) {
return A.sizes(); }
} // namespace afqmc
} // namespace qmcplusplus
#endif

View File

@ -50,7 +50,7 @@ inline int swapWalkersSimple(WlkBucket& wset,
NumContexts = comm.size();
MyContext = comm.rank();
static_assert(std::decay<Mat>::type::dimensionality == 2, "Wrong dimensionality");
if (wlk_size != Wexcess.size(1))
if (wlk_size != std::get<1>(Wexcess.sizes()))
throw std::runtime_error("Array dimension error in swapWalkersSimple().");
if (1 != Wexcess.stride(1))
throw std::runtime_error("Array shape error in swapWalkersSimple().");
@ -78,7 +78,7 @@ inline int swapWalkersSimple(WlkBucket& wset,
int nsend = 0;
if (deltaN <= 0 && wset.size() != CurrNumPerNode[MyContext])
throw std::runtime_error("error in swapWalkersSimple().");
if (deltaN > 0 && (wset.size() != NewNumPerNode[MyContext] || int(Wexcess.size(0)) != deltaN))
if (deltaN > 0 && (wset.size() != NewNumPerNode[MyContext] || int(std::get<0>(Wexcess.sizes())) != deltaN))
throw std::runtime_error("error in swapWalkersSimple().");
std::vector<ComplexType> buff;
if (deltaN < 0)
@ -117,9 +117,9 @@ inline int swapWalkersAsync(WlkBucket& wset,
NumContexts = comm.size();
MyContext = comm.rank();
static_assert(std::decay<Mat>::type::dimensionality == 2, "Wrong dimensionality");
if (wlk_size != Wexcess.size(1))
if (wlk_size != std::get<1>(Wexcess.sizes()))
throw std::runtime_error("Array dimension error in swapWalkersAsync().");
if (1 != Wexcess.stride(1) || (Wexcess.size(0) > 0 && Wexcess.size(1) != Wexcess.stride(0)))
if (1 != Wexcess.stride(1) || (std::get<0>(Wexcess.sizes()) > 0 && std::get<1>(Wexcess.sizes()) != Wexcess.stride(0)))
throw std::runtime_error("Array shape error in swapWalkersAsync().");
if (CurrNumPerNode.size() < NumContexts || NewNumPerNode.size() < NumContexts)
throw std::runtime_error("Array dimension error in swapWalkersAsync().");
@ -146,7 +146,7 @@ inline int swapWalkersAsync(WlkBucket& wset,
int countSend = 1;
if (deltaN <= 0 && wset.size() != CurrNumPerNode[MyContext])
throw std::runtime_error("error(1) in swapWalkersAsync().");
if (deltaN > 0 && (wset.size() != NewNumPerNode[MyContext] || int(Wexcess.size(0)) != deltaN))
if (deltaN > 0 && (wset.size() != NewNumPerNode[MyContext] || int(std::get<0>(Wexcess.sizes())) != deltaN))
throw std::runtime_error("error(2) in swapWalkersAsync().");
std::vector<ComplexType*> buffers;
std::vector<boost::mpi3::request> requests;
@ -161,7 +161,7 @@ inline int swapWalkersAsync(WlkBucket& wset,
}
else
{
requests.emplace_back(comm.isend(Wexcess[nsend].origin(), Wexcess[nsend].origin() + countSend * Wexcess.size(1),
requests.emplace_back(comm.isend(Wexcess[nsend].origin(), Wexcess[nsend].origin() + countSend * std::get<1>(Wexcess.sizes()),
minus[ic], plus[ic] + 1999));
nsend += countSend;
countSend = 1;

View File

@ -378,10 +378,10 @@ bool dumpToHDF5(WalkerSet& wset, hdf_archive& dump)
int NMO, NAEA, NAEB = 0;
{ // to limit the scope
auto w = wset[0];
NMO = (*w.SlaterMatrix(Alpha)).size(0);
NAEA = (*w.SlaterMatrix(Alpha)).size(1);
NMO = std::get<0>((*w.SlaterMatrix(Alpha)).sizes());
NAEA = std::get<1>((*w.SlaterMatrix(Alpha)).sizes());
if (walker_type == COLLINEAR)
NAEB = (*w.SlaterMatrix(Beta)).size(1);
NAEB = std::get<1>((*w.SlaterMatrix(Beta)).sizes());
if (walker_type == NONCOLLINEAR)
NMO /= 2;
}

View File

@ -128,7 +128,7 @@ public:
/*
* Returns the maximum number of walkers in the set that can be stored without reallocation.
*/
int capacity() const { return int(walker_buffer.size(0)); }
int capacity() const { return int(std::get<0>(walker_buffer.sizes())); }
/*
* Returns the maximum number of fields in the set that can be stored without reallocation.
@ -163,7 +163,7 @@ public:
*/
iterator begin()
{
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
return iterator(0, boost::multi::static_array_cast<element, pointer>(walker_buffer), data_displ, wlk_desc);
}
@ -172,7 +172,7 @@ public:
*/
const_iterator begin() const
{
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
return const_iterator(0, boost::multi::static_array_cast<element, pointer>(walker_buffer), data_displ, wlk_desc);
}
@ -182,7 +182,7 @@ public:
*/
iterator end()
{
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
return iterator(tot_num_walkers, boost::multi::static_array_cast<element, pointer>(walker_buffer), data_displ,
wlk_desc);
}
@ -194,7 +194,7 @@ public:
{
if (i < 0 || i > tot_num_walkers)
APP_ABORT("error: index out of bounds.\n");
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
return reference(boost::multi::static_array_cast<element, pointer>(walker_buffer)[i], data_displ, wlk_desc);
}
@ -205,7 +205,7 @@ public:
{
if (i < 0 || i > tot_num_walkers)
APP_ABORT("error: index out of bounds.\n");
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
return const_reference(boost::multi::static_array_cast<element, pointer>(walker_buffer)[i], data_displ, wlk_desc);
}
@ -240,12 +240,12 @@ public:
template<class MatA, class MatB>
void resize(int n, MatA&& A, MatB&& B)
{
assert(A.size(0) == wlk_desc[0]);
assert(A.size(1) == wlk_desc[1]);
assert(std::get<0>(A.sizes()) == wlk_desc[0]);
assert(std::get<1>(A.sizes()) == wlk_desc[1]);
if (walkerType == COLLINEAR)
{
assert(B.size(0) == wlk_desc[0]);
assert(B.size(1) == wlk_desc[2]);
assert(std::get<0>(B.sizes()) == wlk_desc[0]);
assert(std::get<1>(B.sizes()) == wlk_desc[2]);
}
reserve(n);
if (n > tot_num_walkers)
@ -258,7 +258,7 @@ public:
while (pos < n)
{
using std::fill_n;
fill_n(W[pos].origin(), W[pos].size(0), ComplexType(0, 0));
fill_n(W[pos].origin(), W[pos].size(), ComplexType(0, 0));
reference w0(W[pos], data_displ, wlk_desc);
//w0.SlaterMatrix(Alpha) = A;
auto&& SM_(*w0.SlaterMatrix(Alpha));
@ -293,9 +293,9 @@ public:
void resize_bp(int nbp, int nCV, int nref)
{
assert(walker_buffer.size(1) == walker_size);
assert(bp_buffer.size(0) == bp_walker_size);
assert(walker_buffer.size(0) == bp_buffer.size(1));
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
assert(bp_buffer.size() == bp_walker_size);
assert(walker_buffer.size() == std::get<1>(bp_buffer.sizes()));
// wlk_descriptor: {nmo, naea, naeb, nback_prop, nCV, nRefs, nHist}
wlk_desc[3] = nbp;
wlk_desc[4] = nCV;
@ -329,11 +329,11 @@ public:
data_displ[WEIGHT_HISTORY] = cnt;
cnt += wlk_desc[6];
bp_walker_size = cnt;
if (bp_buffer.size(0) != bp_walker_size)
if (std::get<0>(bp_buffer.sizes()) != bp_walker_size)
{
bp_buffer.reextent({bp_walker_size, walker_buffer.size(0)});
bp_buffer.reextent({bp_walker_size, std::get<0>(walker_buffer.sizes())});
using std::fill_n;
fill_n(bp_buffer.origin() + data_displ[WEIGHT_FAC] * bp_buffer.size(1), wlk_desc[6] * bp_buffer.size(1),
fill_n(bp_buffer.origin() + data_displ[WEIGHT_FAC] * std::get<1>(bp_buffer.sizes()), wlk_desc[6] * std::get<1>(bp_buffer.sizes()),
bp_element(1.0));
}
if (nbp > 0 && (data_displ[SMN] < 0 || data_displ[SM_AUX] < 0))
@ -343,7 +343,7 @@ public:
walker_size += nrow * ncol;
data_displ[SM_AUX] = walker_size;
walker_size += nrow * ncol;
CMatrix wb({walker_buffer.size(0), walker_size}, walker_buffer.get_allocator());
CMatrix wb({std::get<0>(walker_buffer.sizes()), walker_size}, walker_buffer.get_allocator());
ma::copy(walker_buffer, wb(wb.extension(0), {0, sz}));
walker_buffer = std::move(wb);
}
@ -360,7 +360,7 @@ public:
int GlobalPopulation() const
{
int res = 0;
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
if (TG.TG_local().root())
res += tot_num_walkers;
return (TG.Global() += res);
@ -369,7 +369,7 @@ public:
RealType GlobalWeight() const
{
RealType res = 0;
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
if (TG.TG_local().root())
{
boost::multi::array<ComplexType, 1> buff(iextensions<1u>{tot_num_walkers});
@ -387,20 +387,20 @@ public:
void push_walkers(Mat&& M)
{
static_assert(std::decay<Mat>::type::dimensionality == 2, "Wrong dimensionality");
if (tot_num_walkers + M.size(0) > capacity())
if (tot_num_walkers + M.size() > capacity())
APP_ABORT("Insufficient capacity");
if (single_walker_size() + single_walker_bp_size() != M.size(1))
if (single_walker_size() + single_walker_bp_size() != std::get<1>(M.sizes()))
APP_ABORT("Incorrect dimensions.");
if (M.stride(1) != 1)
APP_ABORT("Incorrect strides.");
if (!TG.TG_local().root())
{
tot_num_walkers += M.size(0);
tot_num_walkers += M.size();
return;
}
auto&& W(boost::multi::static_array_cast<element, pointer>(walker_buffer));
auto&& BPW(boost::multi::static_array_cast<bp_element, bp_pointer>(bp_buffer));
for (int i = 0; i < M.size(0); i++)
for (int i = 0; i < M.size(); i++)
{
W[tot_num_walkers] = M[i].sliced(0, walker_size);
if (wlk_desc[3] > 0)
@ -413,16 +413,16 @@ public:
void pop_walkers(Mat&& M)
{
static_assert(std::decay<Mat>::type::dimensionality == 2, "Wrong dimensionality");
if (tot_num_walkers < int(M.size(0)))
if (tot_num_walkers < int(M.size()))
APP_ABORT("Insufficient walkers");
if (wlk_desc[3] > 0)
{
if (walker_size + bp_walker_size != int(M.size(1)))
if (walker_size + bp_walker_size != int(std::get<1>(M.sizes())))
APP_ABORT("Incorrect dimensions.");
}
else
{
if (walker_size != int(M.size(1)))
if (walker_size != int(std::get<1>(M.sizes())))
APP_ABORT("Incorrect dimensions.");
}
if (M.stride(1) != 1)
@ -430,12 +430,12 @@ public:
if (!TG.TG_local().root())
{
tot_num_walkers -= int(M.size(0));
tot_num_walkers -= int(M.size());
return;
}
auto W(boost::multi::static_array_cast<element, pointer>(walker_buffer));
auto BPW(boost::multi::static_array_cast<bp_element, bp_pointer>(bp_buffer));
for (int i = 0; i < M.size(0); i++)
for (int i = 0; i < M.size(); i++)
{
M[i].sliced(0, walker_size) = W[tot_num_walkers - 1];
if (wlk_desc[3] > 0)
@ -457,13 +457,13 @@ public:
int nW = 0;
for (auto it = itbegin; it != itend; ++it)
nW += it->second;
if (int(M.size(0)) < std::max(0, nW - targetN_per_TG))
if (int(std::get<0>(M.sizes())) < std::max(0, nW - targetN_per_TG))
{
std::cout << " Error in WalkerSetBase::branch(): Not enough space in excess matrix. \n"
<< M.size(0) << " " << nW << " " << targetN_per_TG << std::endl;
<< std::get<0>(M.sizes()) << " " << nW << " " << targetN_per_TG << std::endl;
APP_ABORT("Error in WalkerSetBase::branch(): Not enough space in excess matrix.\n");
}
if (int(M.size(1)) < walker_size + ((wlk_desc[3] > 0) ? bp_walker_size : 0))
if (int(std::get<1>(M.sizes())) < walker_size + ((wlk_desc[3] > 0) ? bp_walker_size : 0))
APP_ABORT("Error in WalkerSetBase::branch(): Wrong dimensions in excess matrix.\n");
// if all walkers are dead, don't bother with routine, reset tot_num_walkers and return
@ -573,7 +573,7 @@ public:
{
if (!TG.TG_local().root())
return;
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
auto W(boost::multi::static_array_cast<element, pointer>(walker_buffer));
ma::scal(ComplexType(w0), W({0, tot_num_walkers}, data_displ[WEIGHT]));
if (scale_last_history)
@ -629,7 +629,7 @@ public:
{
assert(n < tot_num_walkers);
assert(x.size() >= walkerSizeIO());
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
auto W(boost::multi::static_array_cast<element, pointer>(walker_buffer));
using std::copy_n;
copy_n(W[n].origin(), walkerSizeIO(), x.origin());
@ -640,7 +640,7 @@ public:
{
assert(n < tot_num_walkers);
assert(x.size() >= walkerSizeIO());
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
auto W(boost::multi::static_array_cast<element, pointer>(walker_buffer));
using std::copy_n;
copy_n(x.origin(), walkerSizeIO(), W[n].origin());
@ -682,14 +682,14 @@ public:
{
if (ip < 0 || ip > wlk_desc[3])
APP_ABORT(" Error: index out of bounds in getFields. \n");
int skip = (data_displ[FIELDS] + ip * wlk_desc[4]) * bp_buffer.size(1);
return stdCMatrix_ptr(to_address(bp_buffer.origin()) + skip, {wlk_desc[4], bp_buffer.size(1)});
int skip = (data_displ[FIELDS] + ip * wlk_desc[4]) * std::get<1>(bp_buffer.sizes());
return stdCMatrix_ptr(to_address(bp_buffer.origin()) + skip, {wlk_desc[4], std::get<1>(bp_buffer.sizes())});
}
stdCTensor_ptr getFields()
{
return stdCTensor_ptr(to_address(bp_buffer.origin()) + data_displ[FIELDS] * bp_buffer.size(1),
{wlk_desc[3], wlk_desc[4], bp_buffer.size(1)});
return stdCTensor_ptr(to_address(bp_buffer.origin()) + data_displ[FIELDS] * std::get<1>(bp_buffer.sizes()),
{wlk_desc[3], wlk_desc[4], std::get<1>(bp_buffer.sizes())});
}
template<class Mat>
@ -697,7 +697,7 @@ public:
{
static_assert(std::decay<Mat>::type::dimensionality == 2, "Wrong dimensionality");
auto&& F(*getFields(ip));
if (V.stride(0) == V.size(1))
if (V.stride(0) == std::get<1>(V.sizes()))
{
using std::copy_n;
copy_n(V.origin(), F.num_elements(), F.origin());
@ -708,14 +708,14 @@ public:
stdCMatrix_ptr getWeightFactors()
{
return stdCMatrix_ptr(to_address(bp_buffer.origin()) + data_displ[WEIGHT_FAC] * bp_buffer.size(1),
{wlk_desc[6], bp_buffer.size(1)});
return stdCMatrix_ptr(to_address(bp_buffer.origin()) + data_displ[WEIGHT_FAC] * std::get<1>(bp_buffer.sizes()),
{wlk_desc[6], std::get<1>(bp_buffer.sizes())});
}
stdCMatrix_ptr getWeightHistory()
{
return stdCMatrix_ptr(to_address(bp_buffer.origin()) + data_displ[WEIGHT_HISTORY] * bp_buffer.size(1),
{wlk_desc[6], bp_buffer.size(1)});
return stdCMatrix_ptr(to_address(bp_buffer.origin()) + data_displ[WEIGHT_HISTORY] * std::get<1>(bp_buffer.sizes()),
{wlk_desc[6], std::get<1>(bp_buffer.sizes())});
}
double getLogOverlapFactor() const { return LogOverlapFactor; }
@ -726,7 +726,7 @@ public:
// LogOverlapFactor_new = LogOverlapFactor + f/nx
void adjustLogOverlapFactor(const double f)
{
assert(walker_buffer.size(1) == walker_size);
assert(std::get<1>(walker_buffer.sizes()) == walker_size);
double nx = (walkerType == NONCOLLINEAR ? 1.0 : 2.0);
if (TG.TG_local().root())
{

View File

@ -221,9 +221,9 @@ bool WalkerSetBase<Alloc, Ptr>::clean()
template<class Alloc, typename Ptr>
void WalkerSetBase<Alloc, Ptr>::reserve(int n)
{
if (walker_buffer.size(0) < n || walker_buffer.size(1) != walker_size)
if (std::get<0>(walker_buffer.sizes()) < n || std::get<1>(walker_buffer.sizes()) != walker_size)
walker_buffer.reextent({n, walker_size});
if (bp_buffer.size(1) < n || bp_buffer.size(0) != bp_walker_size)
if (std::get<1>(bp_buffer.sizes()) < n || std::get<0>(bp_buffer.sizes()) != bp_walker_size)
{
bp_buffer.reextent({bp_walker_size, n});
using std::fill_n;

View File

@ -245,15 +245,15 @@ public:
{
if (transposed_G_for_vbias_)
{
assert(G.size(0) == v.size(1));
assert(G.size(1) == size_of_G_for_vbias());
assert(std::get<0>(G.sizes()) == std::get<1>(v.sizes()));
assert(std::get<1>(G.sizes()) == size_of_G_for_vbias());
}
else
{
assert(G.size(0) == size_of_G_for_vbias());
assert(G.size(1) == v.size(1));
assert(std::get<0>(G.sizes()) == size_of_G_for_vbias());
assert(std::get<1>(G.sizes()) == std::get<1>(v.sizes()));
}
assert(v.size(0) == HamOp.local_number_of_cholesky_vectors());
assert(std::get<0>(v.sizes()) == HamOp.local_number_of_cholesky_vectors());
if (ci.size() == 1)
{
// HamOp expects a compact Gc with alpha/beta components
@ -285,11 +285,11 @@ public:
template<class MatX, class MatA>
void vHS(MatX&& X, MatA&& v, double a = 1.0)
{
assert(X.size(0) == HamOp.local_number_of_cholesky_vectors());
assert(std::get<0>(X.sizes()) == HamOp.local_number_of_cholesky_vectors());
if (transposed_vHS_)
assert(X.size(1) == v.size(0));
assert(std::get<1>(X.sizes()) == std::get<0>(v.sizes()));
else
assert(X.size(1) == v.size(1));
assert(std::get<1>(X.sizes()) == std::get<1>(v.sizes()));
HamOp.vHS(std::forward<MatX>(X), std::forward<MatA>(v), a);
TG.local_barrier();
}
@ -507,8 +507,8 @@ public:
{
static_assert(std::decay<Mat>::type::dimensionality == 2, "Wrong dimensionality");
int ndet = number_of_references_for_back_propagation();
assert(A.size(0) == ndet);
if (RefOrbMats.size(0) == 0)
assert(A.size() == ndet);
if (RefOrbMats.size() == 0)
{
TG.Node().barrier(); // for safety
int nrow(NMO * ((walker_type == NONCOLLINEAR) ? 2 : 1));
@ -538,13 +538,13 @@ public:
} // TG.Node().root()
TG.Node().barrier(); // for safety
}
assert(RefOrbMats.size(0) == ndet);
assert(RefOrbMats.size(1) == A.size(1));
assert(std::get<0>(RefOrbMats.sizes()) == ndet);
assert(std::get<1>(RefOrbMats.sizes()) == std::get<1>(A.sizes()));
auto&& RefOrbMats_(boost::multi::static_array_cast<ComplexType, ComplexType*>(RefOrbMats));
auto&& A_(boost::multi::static_array_cast<ComplexType, Ptr>(A));
using std::copy_n;
int n0, n1;
std::tie(n0, n1) = FairDivideBoundary(TG.getLocalTGRank(), int(A.size(1)), TG.getNCoresPerTG());
std::tie(n0, n1) = FairDivideBoundary(TG.getLocalTGRank(), int(std::get<1>(A.sizes())), TG.getNCoresPerTG());
for (int i = 0; i < ndet; i++)
copy_n(RefOrbMats_[i].origin() + n0, n1 - n0, A_[i].origin() + n0);
TG.TG_local().barrier();

View File

@ -253,25 +253,25 @@ public:
template<class MatG, class MatA>
void vbias(const MatG& G, MatA&& v, double a = 1.0)
{
assert(v.size(0) == HamOp.local_number_of_cholesky_vectors());
assert(std::get<0>(v.sizes()) == HamOp.local_number_of_cholesky_vectors());
double scl = (walker_type == COLLINEAR) ? 0.5 : 1.0;
if (transposed_G_for_vbias_)
{
assert(G.size(0) == v.size(1));
assert(G.size(1) == size_of_G_for_vbias());
assert(std::get<0>(G.sizes()) == std::get<1>(v.sizes()));
assert(std::get<1>(G.sizes()) == size_of_G_for_vbias());
HamOp.vbias(G(G.extension(0), {0, long(OrbMats[0].size(0) * NMO)}), std::forward<MatA>(v), scl * a, 0.0);
if (walker_type == COLLINEAR) {
APP_ABORT(" Error in PHMSD::vbias: transposed_G_for_vbias_ should be false. \n");
HamOp.vbias(G(G.extension(0), {long(OrbMats[0].size(0) * NMO), G.size(1)}), std::forward<MatA>(v), scl * a, 1.0);
HamOp.vbias(G(G.extension(0), {long(OrbMats[0].size() * NMO), std::get<1>(G.sizes())}), std::forward<MatA>(v), scl * a, 1.0);
}
}
else
{
assert(G.size(0) == size_of_G_for_vbias());
assert(G.size(1) == v.size(1));
HamOp.vbias(G.sliced(0, OrbMats[0].size(0) * NMO), std::forward<MatA>(v), scl * a, 0.0);
assert(G.size() == size_of_G_for_vbias());
assert(std::get<1>(G.sizes()) == std::get<1>(v.sizes()));
HamOp.vbias(G.sliced(0, OrbMats[0].size() * NMO), std::forward<MatA>(v), scl * a, 0.0);
if (walker_type == COLLINEAR)
HamOp.vbias(G.sliced(OrbMats[0].size(0) * NMO, G.size(0)), std::forward<MatA>(v), scl * a, 1.0);
HamOp.vbias(G.sliced(OrbMats[0].size() * NMO, G.size()), std::forward<MatA>(v), scl * a, 1.0);
}
TG.local_barrier();
}
@ -284,11 +284,11 @@ public:
template<class MatX, class MatA>
void vHS(MatX&& X, MatA&& v, double a = 1.0)
{
assert(X.size(0) == HamOp.local_number_of_cholesky_vectors());
assert(std::get<0>(X.sizes()) == HamOp.local_number_of_cholesky_vectors());
if (transposed_vHS_)
assert(X.size(1) == v.size(0));
assert(std::get<1>(X.sizes()) == std::get<0>(v.sizes()));
else
assert(X.size(1) == v.size(1));
assert(std::get<1>(X.sizes()) == std::get<1>(v.sizes()));
HamOp.vHS(std::forward<MatX>(X), std::forward<MatA>(v), a);
TG.local_barrier();
}
@ -303,7 +303,7 @@ public:
int nw = wset.size();
if (ovlp.num_elements() != nw)
ovlp.reextent(iextensions<1u>{nw});
if (eloc.size(0) != nw || eloc.size(1) != 3)
if (std::get<0>(eloc.sizes()) != nw || std::get<1>(eloc.sizes()) != 3)
eloc.reextent({nw, 3});
Energy(wset, eloc, ovlp);
TG.local_barrier();
@ -487,8 +487,8 @@ public:
{
static_assert(std::decay<Mat>::type::dimensionality == 2, "Wrong dimensionality");
int ndet = number_of_references_for_back_propagation();
assert(A.size(0) == ndet);
if (RefOrbMats.size(0) == 0)
assert(A.size() == ndet);
if (RefOrbMats.size() == 0)
{
TG.Node().barrier(); // for safety
int nrow(NMO * ((walker_type == NONCOLLINEAR) ? 2 : 1));
@ -498,14 +498,14 @@ public:
if (TG.Node().root())
{
boost::multi::array<ComplexType, 2> OA_({
static_cast<boost::multi::size_t>(OrbMats[0].size(1)),
static_cast<boost::multi::size_t>(OrbMats[0].size(0))
static_cast<boost::multi::size_t>(std::get<1>(OrbMats[0].sizes())),
static_cast<boost::multi::size_t>(std::get<0>(OrbMats[0].sizes()))
});
boost::multi::array<ComplexType, 2> OB_({0, 0});
if (OrbMats.size() > 1)
OB_.reextent({
static_cast<boost::multi::size_t>(OrbMats[1].size(1)),
static_cast<boost::multi::size_t>(OrbMats[1].size(0))
static_cast<boost::multi::size_t>(std::get<1>(OrbMats[1].sizes())),
static_cast<boost::multi::size_t>(std::get<0>(OrbMats[1].sizes()))
});
ma::Matrix2MAREF('H', OrbMats[0], OA_);
if (OrbMats.size() > 1)
@ -538,13 +538,13 @@ public:
} // TG.Node().root()
TG.Node().barrier(); // for safety
}
assert(RefOrbMats.size(0) == ndet);
assert(RefOrbMats.size(1) == A.size(1));
assert(std::get<0>(RefOrbMats.sizes()) == ndet);
assert(std::get<1>(RefOrbMats.sizes()) == std::get<1>(A.sizes()));
auto&& RefOrbMats_(boost::multi::static_array_cast<ComplexType, ComplexType*>(RefOrbMats));
auto&& A_(boost::multi::static_array_cast<ComplexType, Ptr>(A));
using std::copy_n;
int n0, n1;
std::tie(n0, n1) = FairDivideBoundary(TG.getLocalTGRank(), int(A.size(1)), TG.getNCoresPerTG());
std::tie(n0, n1) = FairDivideBoundary(TG.getLocalTGRank(), int(std::get<1>(A.sizes())), TG.getNCoresPerTG());
for (int i = 0; i < ndet; i++)
copy_n(RefOrbMats_[i].origin() + n0, n1 - n0, A_[i].origin() + n0);
TG.TG_local().barrier();

View File

@ -43,9 +43,9 @@ void PHMSD::Energy_shared(const WlkSet& wset, Mat&& E, TVec&& Ov)
size_t nkev = HamOp.number_of_ke_vectors();
assert(E.dimensionality == 2);
assert(Ov.dimensionality == 1);
assert(E.size(0) == wset.size());
assert(Ov.size(0) == wset.size());
assert(E.size(1) == 3);
assert(E.size() == wset.size());
assert(std::get<0>(Ov.sizes()) == wset.size());
assert(std::get<1>(E.sizes()) == 3);
ComplexType zero(0.0);
auto Gsize = dm_size(false);
@ -67,7 +67,7 @@ void PHMSD::Energy_shared(const WlkSet& wset, Mat&& E, TVec&& Ov)
opSpinEJ.reextent(iextensions<1u>{nwalk});
if (localGbuff.size() < 2 * Gsize)
localGbuff.reextent(iextensions<1u>{2 * Gsize});
if (eloc2.size(0) != nwalk || eloc2.size(1) != 3)
if (std::get<0>(eloc2.sizes()) != nwalk || std::get<1>(eloc2.sizes()) != 3)
eloc2.reextent({nwalk, 3});
std::fill_n(Ov.origin(), nwalk, zero);
@ -424,17 +424,17 @@ void PHMSD::MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool com
assert(G.stride(1) == 1);
assert(Ov.stride(0) == 1);
if (transpose)
assert(G.size(0) == wset.size() && G.size(1) == size_t(dm_size(not compact)));
assert(std::get<0>(G.sizes()) == wset.size() && std::get<1>(G.sizes()) == size_t(dm_size(not compact)));
else
assert(G.size(1) == wset.size() && G.size(0) == size_t(dm_size(not compact)));
assert(std::get<1>(G.sizes()) == wset.size() && std::get<0>(G.sizes()) == size_t(dm_size(not compact)));
const int nw = wset.size();
auto refc = abij.reference_configuration();
double LogOverlapFactor(wset.getLogOverlapFactor());
assert(Ov.size() >= nw);
std::fill_n(Ov.begin(), nw, 0);
for (int i = 0; i < G.size(0); i++)
for (int i = 0; i < std::get<0>(G.sizes()); i++)
if (i % TG.TG_local().size() == TG.TG_local().rank())
std::fill_n(G[i].origin(), G.size(1), ComplexType(0.0));
std::fill_n(G[i].origin(), std::get<1>(G.sizes()), ComplexType(0.0));
TG.local_barrier();
auto Gsize = dm_size(not compact);
if (compact)
@ -569,8 +569,8 @@ void PHMSD::MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool com
if (compact)
{
ma::product(T(Rb), GB2D0_, GB2D_);
//G({GAdims.first*GAdims.second,G.size(0)},iw) = GB1D_;
ma::copy(GB1D_, G({GAdims.first * GAdims.second, G.size(0)}, iw));
//G({GAdims.first*GAdims.second,std::get<0>(G.sizes())},iw) = GB1D_;
ma::copy(GB1D_, G({GAdims.first * GAdims.second, std::get<0>(G.sizes())}, iw));
}
else
{
@ -578,7 +578,7 @@ void PHMSD::MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool com
ma::product(T(Rb), GB2D0_, GB2D_);
ma::product(T(OrbMats.back()), GB2D_, Gfullb);
//G({Gfulla.num_elements(),G.size(0)},iw) = G1D;
ma::copy(G1D, G({Gfulla.num_elements(), G.size(0)}, iw));
ma::copy(G1D, G({Gfulla.num_elements(), std::get<0>(G.sizes())}, iw));
}
}
}
@ -681,9 +681,9 @@ void PHMSD::MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool com
ma::product(T(Ra), GA2D0_shm(GA2D0_shm.extension(0), {M0, Mn}),
GA2D_(GA2D_.extension(0), {M0, Mn})); // can be local
boost::multi::array_ref<ComplexType, 3> Gw(to_address(G.origin()),
{GAdims.first, GAdims.second, long(G.size(1))});
{GAdims.first, GAdims.second, long(std::get<1>(G.sizes()))});
// copying by hand for now, implement strided copy in ma_blas
for (size_t k = 0; k < GA2D_.size(0); ++k)
for (size_t k = 0; k < std::get<0>(GA2D_.sizes()); ++k)
for (size_t m = M0; m < Mn; ++m)
Gw[k][m][iw] = GA2D_[k][m];
}
@ -694,9 +694,9 @@ void PHMSD::MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool com
ma::product(T(OrbMats[0]), GA2D_(GA2D_.extension(0), {M0, Mn}),
Gfulla(Gfulla.extension(0), {M0, Mn})); // can be local
boost::multi::array_ref<ComplexType, 3> Gw(to_address(G.origin()),
{long(Gfulla.size(0)), long(Gfulla.size(1)), long(G.size(1))});
{long(std::get<0>(Gfulla.sizes())), long(std::get<1>(Gfulla.sizes())), long(std::get<1>(G.sizes()))});
// copying by hand for now, implement strided copy in ma_blas
for (size_t k = 0; k < Gfulla.size(0); ++k)
for (size_t k = 0; k < std::get<0>(Gfulla.sizes()); ++k)
for (size_t m = M0; m < Mn; ++m)
Gw[k][m][iw] = Gfulla[k][m];
}
@ -732,9 +732,9 @@ void PHMSD::MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool com
ma::product(T(Rb), GB2D0_shm(GB2D0_shm.extension(0), {M0, Mn}),
GB2D_(GB2D_.extension(0), {M0, Mn})); // can be local
boost::multi::array_ref<ComplexType, 3> Gw(to_address(G[GAdims.first * GAdims.second].origin()),
{GBdims.first, GBdims.second, long(G.size(1))});
{GBdims.first, GBdims.second, long(std::get<1>(G.sizes()))});
// copying by hand for now, implement strided copy in ma_blas
for (size_t k = 0; k < GB2D_.size(0); ++k)
for (size_t k = 0; k < std::get<0>(GB2D_.sizes()); ++k)
for (size_t m = M0; m < Mn; ++m)
Gw[k][m][iw] = GB2D_[k][m];
}
@ -745,9 +745,9 @@ void PHMSD::MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool com
ma::product(T(OrbMats[0]), GB2D_(GB2D_.extension(0), {M0, Mn}),
Gfullb(Gfullb.extension(0), {M0, Mn})); // can be local
boost::multi::array_ref<ComplexType, 3> Gw(to_address(G[Gfulla.num_elements()].origin()),
{long(Gfullb.size(0)), long(Gfullb.size(1)), long(G.size(1))});
{long(std::get<0>(Gfullb.sizes())), long(std::get<1>(Gfullb.sizes())), long(std::get<1>(G.sizes()))});
// copying by hand for now, implement strided copy in ma_blas
for (size_t k = 0; k < Gfullb.size(0); ++k)
for (size_t k = 0; k < std::get<0>(Gfullb.sizes()); ++k)
for (size_t m = M0; m < Mn; ++m)
Gw[k][m][iw] = Gfullb[k][m];
}
@ -759,7 +759,7 @@ void PHMSD::MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool com
TG.TG_local().all_reduce_in_place_n(to_address(Ov.origin()), nw, std::plus<>());
if (transpose)
{
for (size_t iw = 0; iw < G.size(0); ++iw)
for (size_t iw = 0; iw < G.size(); ++iw)
if (iw % TG.TG_local().size() == TG.TG_local().rank())
{
auto ov_ = ComplexType(1.0, 0.0) / Ov[iw];
@ -769,8 +769,8 @@ void PHMSD::MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool com
else
{
auto Ov_ = Ov.origin();
const size_t nw_ = G.size(1);
for (int ik = 0; ik < G.size(0); ++ik)
const size_t nw_ = std::get<1>(G.sizes());
for (int ik = 0; ik < std::get<0>(G.sizes()); ++ik)
if (ik % TG.TG_local().size() == TG.TG_local().rank())
{
auto Gik = to_address(G[ik].origin());
@ -841,15 +841,15 @@ void PHMSD::DensityMatrix_shared(const WlkSet& wset,
else
{
if (herm)
assert(RefA.size(0) == dm_dims(false, Alpha).first && RefA.size(1) == dm_dims(false, Alpha).second);
assert(std::get<0>(RefA.sizes()) == dm_dims(false, Alpha).first && RefA.size(1) == dm_dims(false, Alpha).second);
else
assert(RefA.size(1) == dm_dims(false, Alpha).first && RefA.size(0) == dm_dims(false, Alpha).second);
assert(std::get<1>(RefA.sizes()) == dm_dims(false, Alpha).first && RefA.size(0) == dm_dims(false, Alpha).second);
if (herm)
assert(RefB.size(0) == dm_dims(false, Beta).first && RefB.size(1) == dm_dims(false, Beta).second);
assert(std::get<0>(RefB.sizes()) == dm_dims(false, Beta).first && RefB.size(1) == dm_dims(false, Beta).second);
else
assert(RefB.size(1) == dm_dims(false, Beta).first && RefB.size(0) == dm_dims(false, Beta).second);
assert(std::get<1>(RefB.sizes()) == dm_dims(false, Beta).first && RefB.size(0) == dm_dims(false, Beta).second);
if (ovlp2.size(0) < 2 * nw)
if (std::get<0>(ovlp2.sizes()) < 2 * nw)
ovlp2.reextent(iextensions<1u>{2 * nw});
fill_n(ovlp2.origin(), 2 * nw, ComplexType(0.0));
auto GAdims = dm_dims(not compact, Alpha);
@ -1200,7 +1200,7 @@ void PHMSD::OrthogonalizeExcited(Mat&& A, SpinTypes spin, double LogOverlapFacto
APP_ABORT(" Error: OrthogonalizeExcited not implemented with NONCOLLINEAR.\n");
if (spin == Alpha)
{
if (extendedMatAlpha.size(0) != NMO || extendedMatAlpha.size(1) != maxOccupExtendedMat.first)
if (std::get<0>(extendedMatAlpha.sizes()) != NMO || std::get<1>(extendedMatAlpha.sizes()) != maxOccupExtendedMat.first)
extendedMatAlpha.reextent({NMO, maxOccupExtendedMat.first});
extendedMatAlpha(extendedMatAlpha.extension(0), {0, NAEA}) = A;
extendedMatAlpha(extendedMatAlpha.extension(0), {NAEA + 1, maxOccupExtendedMat.first}) =
@ -1222,7 +1222,7 @@ void PHMSD::OrthogonalizeExcited(Mat&& A, SpinTypes spin, double LogOverlapFacto
}
else
{
if (extendedMatBeta.size(0) != NMO || extendedMatBeta.size(1) != maxOccupExtendedMat.second)
if (std::get<0>(extendedMatBeta.sizes()) != NMO || std::get<1>(extendedMatBeta.sizes()) != maxOccupExtendedMat.second)
extendedMatBeta.reextent({NMO, maxOccupExtendedMat.second});
extendedMatBeta(extendedMatBeta.extension(0), {0, NAEB}) = A;
extendedMatBeta(extendedMatBeta.extension(0), {NAEB + 1, maxOccupExtendedMat.second}) =

View File

@ -126,9 +126,9 @@ inline void calculate_R(int rank,
std::vector<ComplexType> WORK(abij.maximum_excitation_number()[spin] * abij.maximum_excitation_number()[spin]);
auto confgs = abij.configurations_begin();
auto refc = abij.reference_configuration(spin);
for (int i = 0; i < R.size(0); i++)
std::fill_n(R[i].origin(), R.size(1), ComplexType(0));
int NEL = T.size(1);
for (int i = 0; i < std::get<0>(R.sizes()); i++)
std::fill_n(R[i].origin(), std::get<1>(R.sizes()), ComplexType(0));
int NEL = std::get<1>(T.sizes());
std::vector<int> orbs(NEL);
ComplexType ov_a;
// add reference contribution!!!

View File

@ -247,9 +247,9 @@ void test_phmsd(boost::mpi3::communicator& world)
RandomGenerator rng;
WalkerSet wset(TG, doc3.getRoot(), InfoMap["info0"], &rng);
auto initial_guess = WfnFac.getInitialGuess(wfn_name);
REQUIRE(initial_guess.size(0) == 2);
REQUIRE(initial_guess.size(1) == NMO);
REQUIRE(initial_guess.size(2) == NAEA);
REQUIRE(std::get<0>(initial_guess.sizes()) == 2);
REQUIRE(std::get<1>(initial_guess.sizes()) == NMO);
REQUIRE(std::get<2>(initial_guess.sizes()) == NAEA);
wset.resize(nwalk, initial_guess[0], initial_guess[1](initial_guess.extension(1), {0, NAEB}));
// 1. Test Overlap Explicitly

View File

@ -154,9 +154,9 @@ void wfn_fac(boost::mpi3::communicator& world)
//nwalk=nw;
WalkerSet wset(TG, doc3.getRoot(), InfoMap["info0"], &rng);
auto initial_guess = WfnFac.getInitialGuess(wfn_name);
REQUIRE(initial_guess.size(0) == 2);
REQUIRE(initial_guess.size(1) == NPOL * NMO);
REQUIRE(initial_guess.size(2) == NAEA);
REQUIRE(std::get<0>(initial_guess.sizes()) == 2);
REQUIRE(std::get<1>(initial_guess.sizes()) == NPOL * NMO);
REQUIRE(std::get<2>(initial_guess.sizes()) == NAEA);
if (type == COLLINEAR)
wset.resize(nwalk, initial_guess[0], initial_guess[1](initial_guess.extension(1), {0, NAEB}));
@ -213,7 +213,7 @@ void wfn_fac(boost::mpi3::communicator& world)
for (int n = 0; n < nwalk; n++)
{
Xsum = 0;
for (int i = 0; i < X.size(0); i++)
for (int i = 0; i < X.size(); i++)
Xsum += X[i][n];
REQUIRE(real(Xsum) == Approx(real(file_data.Xsum)));
REQUIRE(imag(Xsum) == Approx(imag(file_data.Xsum)));
@ -223,7 +223,7 @@ void wfn_fac(boost::mpi3::communicator& world)
{
Xsum = 0;
ComplexType Xsum2 = 0;
for (int i = 0; i < X.size(0); i++)
for (int i = 0; i < X.size(); i++)
{
Xsum += X[i][0];
Xsum2 += ComplexType(0.5) * X[i][0] * X[i][0];
@ -247,12 +247,12 @@ void wfn_fac(boost::mpi3::communicator& world)
Vsum = 0;
if (wfn.transposed_vHS())
{
for (int i = 0; i < vHS.size(1); i++)
for (int i = 0; i < std::get<1>(vHS.sizes()); i++)
Vsum += vHS[n][i];
}
else
{
for (int i = 0; i < vHS.size(0); i++)
for (int i = 0; i < std::get<0>(vHS.sizes()); i++)
Vsum += vHS[i][n];
}
REQUIRE(real(Vsum) == Approx(real(file_data.Vsum)));
@ -264,12 +264,12 @@ void wfn_fac(boost::mpi3::communicator& world)
Vsum = 0;
if (wfn.transposed_vHS())
{
for (int i = 0; i < vHS.size(1); i++)
for (int i = 0; i < std::get<1>(vHS.sizes()); i++)
Vsum += vHS[0][i];
}
else
{
for (int i = 0; i < vHS.size(0); i++)
for (int i = 0; i < std::get<0>(vHS.sizes()); i++)
Vsum += vHS[i][0];
}
app_log() << " Vsum: " << setprecision(12) << Vsum << " Time: " << t1 << std::endl;
@ -292,9 +292,9 @@ void wfn_fac(boost::mpi3::communicator& world)
WalkerSet wset2(TG, doc3.getRoot(), InfoMap["info0"], &rng);
//auto initial_guess = WfnFac.getInitialGuess(wfn_name);
REQUIRE(initial_guess.size(0) == 2);
REQUIRE(initial_guess.size(1) == NPOL * NMO);
REQUIRE(initial_guess.size(2) == NAEA);
REQUIRE(std::get<0>(initial_guess.sizes()) == 2);
REQUIRE(std::get<1>(initial_guess.sizes()) == NPOL * NMO);
REQUIRE(std::get<2>(initial_guess.sizes()) == NAEA);
if (type == COLLINEAR)
wset2.resize(nwalk, initial_guess[0], initial_guess[1](initial_guess.extension(1), {0, NAEB}));
@ -335,7 +335,7 @@ void wfn_fac(boost::mpi3::communicator& world)
for (int n = 0; n < nwalk; n++)
{
Xsum = 0;
for (int i = 0; i < X.size(0); i++)
for (int i = 0; i < X.size(); i++)
Xsum += X[i][n];
REQUIRE(real(Xsum) == Approx(real(file_data.Xsum)));
REQUIRE(imag(Xsum) == Approx(imag(file_data.Xsum)));
@ -345,7 +345,7 @@ void wfn_fac(boost::mpi3::communicator& world)
{
Xsum = 0;
ComplexType Xsum2(0.0);
for (int i = 0; i < X.size(0); i++)
for (int i = 0; i < X.size(); i++)
{
Xsum += X[i][0];
Xsum2 += ComplexType(0.5) * X[i][0] * X[i][0];
@ -364,12 +364,12 @@ void wfn_fac(boost::mpi3::communicator& world)
Vsum = 0;
if (wfn.transposed_vHS())
{
for (int i = 0; i < vHS.size(1); i++)
for (int i = 0; i < std::get<1>(vHS.sizes()); i++)
Vsum += vHS[n][i];
}
else
{
for (int i = 0; i < vHS.size(0); i++)
for (int i = 0; i < std::get<0>(vHS.sizes()); i++)
Vsum += vHS[i][n];
}
REQUIRE(real(Vsum) == Approx(real(file_data.Vsum)));
@ -381,12 +381,12 @@ void wfn_fac(boost::mpi3::communicator& world)
Vsum = 0;
if (wfn.transposed_vHS())
{
for (int i = 0; i < vHS.size(1); i++)
for (int i = 0; i < std::get<1>(vHS.sizes()); i++)
Vsum += vHS[0][i];
}
else
{
for (int i = 0; i < vHS.size(0); i++)
for (int i = 0; i < std::get<0>(vHS.sizes()); i++)
Vsum += vHS[i][0];
}
app_log() << " Vsum: " << setprecision(12) << Vsum << std::endl;
@ -490,9 +490,9 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
WalkerSet wset(TG, doc3.getRoot(), InfoMap["info0"], &rng);
auto initial_guess = WfnFac.getInitialGuess(wfn_name);
REQUIRE(initial_guess.size(0) == 2);
REQUIRE(initial_guess.size(1) == NPOL * NMO);
REQUIRE(initial_guess.size(2) == NAEA);
REQUIRE(std::get<0>(initial_guess.sizes()) == 2);
REQUIRE(std::get<1>(initial_guess.sizes()) == NPOL * NMO);
REQUIRE(std::get<2>(initial_guess.sizes()) == NAEA);
if (type == COLLINEAR)
wset.resize(nwalk, initial_guess[0], initial_guess[1](initial_guess.extension(1), {0, NAEB}));
@ -550,7 +550,7 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
{
Xsum = 0;
if (TGwfn.TG_local().root())
for (int i = 0; i < X.size(0); i++)
for (int i = 0; i < X.size(); i++)
Xsum += X[i][n];
Xsum = (TGwfn.TG() += Xsum);
REQUIRE(real(Xsum) == Approx(real(file_data.Xsum)));
@ -561,7 +561,7 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
{
Xsum = 0;
if (TGwfn.TG_local().root())
for (int i = 0; i < X.size(0); i++)
for (int i = 0; i < X.size(); i++)
Xsum += X[i][0];
Xsum = (TGwfn.TG() += Xsum);
app_log() << " Xsum: " << setprecision(12) << Xsum << " Time: " << t1 << std::endl;
@ -598,12 +598,12 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
{
if (wfn.transposed_vHS())
{
for (int i = 0; i < vHS.size(1); i++)
for (int i = 0; i < std::get<1>(vHS.sizes()); i++)
Vsum += vHS[n][i];
}
else
{
for (int i = 0; i < vHS.size(0); i++)
for (int i = 0; i < std::get<0>(vHS.sizes()); i++)
Vsum += vHS[i][n];
}
}
@ -619,12 +619,12 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
{
if (wfn.transposed_vHS())
{
for (int i = 0; i < vHS.size(1); i++)
for (int i = 0; i < std::get<1>(vHS.sizes()); i++)
Vsum += vHS[0][i];
}
else
{
for (int i = 0; i < vHS.size(0); i++)
for (int i = 0; i < std::get<0>(vHS.sizes()); i++)
Vsum += vHS[i][0];
}
}
@ -649,9 +649,9 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
WalkerSet wset2(TG, doc3.getRoot(), InfoMap["info0"], &rng);
//auto initial_guess = WfnFac.getInitialGuess(wfn_name);
REQUIRE(initial_guess.size(0) == 2);
REQUIRE(initial_guess.size(1) == NPOL * NMO);
REQUIRE(initial_guess.size(2) == NAEA);
REQUIRE(std::get<0>(initial_guess.sizes()) == 2);
REQUIRE(std::get<1>(initial_guess.sizes()) == NPOL * NMO);
REQUIRE(std::get<2>(initial_guess.sizes()) == NAEA);
if (type == COLLINEAR)
wset2.resize(nwalk, initial_guess[0], initial_guess[1](initial_guess.extension(1), {0, NAEB}));
@ -695,7 +695,7 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
{
Xsum = 0;
if (TGwfn.TG_local().root())
for (int i = 0; i < X2.size(0); i++)
for (int i = 0; i < X2.size(); i++)
Xsum += X2[i][n];
Xsum = (TGwfn.TG() += Xsum);
REQUIRE(real(Xsum) == Approx(real(file_data.Xsum)));
@ -706,7 +706,7 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
{
Xsum = 0;
if (TGwfn.TG_local().root())
for (int i = 0; i < X2.size(0); i++)
for (int i = 0; i < X2.size(); i++)
Xsum += X2[i][0];
Xsum = (TGwfn.TG() += Xsum);
app_log() << " Xsum: " << setprecision(12) << Xsum << std::endl;
@ -738,12 +738,12 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
{
if (wfn.transposed_vHS())
{
for (int i = 0; i < vHS.size(1); i++)
for (int i = 0; i < std::get<1>(vHS.sizes()); i++)
Vsum += vHS[n][i];
}
else
{
for (int i = 0; i < vHS.size(0); i++)
for (int i = 0; i < std::get<0>(vHS.sizes()); i++)
Vsum += vHS[i][n];
}
}
@ -759,12 +759,12 @@ void wfn_fac_distributed(boost::mpi3::communicator& world, int ngroups)
{
if (wfn.transposed_vHS())
{
for (int i = 0; i < vHS.size(1); i++)
for (int i = 0; i < std::get<1>(vHS.sizes()); i++)
Vsum += vHS[0][i];
}
else
{
for (int i = 0; i < vHS.size(0); i++)
for (int i = 0; i < std::get<0>(vHS.sizes()); i++)
Vsum += vHS[i][0];
}
}

View File

@ -39,6 +39,7 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
std::string serialize_walkers;
std::string debug_checks_str;
std::string measure_imbalance_str;
int Period4CheckPoint{-1};
ParameterSet parameter_set;
parameter_set.add(store_config_period_, "storeconfigs");
@ -80,10 +81,15 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
aAttrib.add(qmc_method_, "method");
aAttrib.add(update_mode_, "move");
aAttrib.add(scoped_profiling_, "profiling");
aAttrib.add(Period4CheckPoint, "checkpoint");
aAttrib.add(k_delay_, "kdelay");
// This does all the parameter parsing setup in the constructor
aAttrib.put(cur);
//set default to match legacy QMCDriver
check_point_period_.stride = Period4CheckPoint;
check_point_period_.period = Period4CheckPoint;
if (cur != NULL)
{
//initialize the parameter set
@ -156,7 +162,7 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
if (check_point_period_.period < 1)
check_point_period_.period = max_blocks_;
dump_config_ = (check_point_period_.period >= 0);
dump_config_ = (Period4CheckPoint >= 0);
}
} // namespace qmcplusplus

View File

@ -47,8 +47,7 @@ void GradientTest::run(QMCCostFunctionBase& costFunc, const std::string& root_na
}
// Numerical gradient
double finite_diff_delta = 1e-5;
costFunc.GradCost(numeric_grad, params, finite_diff_delta);
costFunc.GradCost(numeric_grad, params, input_.get_finite_diff_delta());
// Analytic gradient
costFunc.GradCost(analytic_grad, params);
@ -60,20 +59,63 @@ void GradientTest::run(QMCCostFunctionBase& costFunc, const std::string& root_na
param_deriv_index_++;
}
app_log() << "Param_Name Value Numeric Analytic Percent" << std::endl;
std::string_view param_name_header("Param_Name");
size_t max_name_len = param_name_header.size();
for (int k = 0; k < num_params; k++)
{
std::string vname = costFunc.getParamName(k);
max_name_len = std::max(vname.size(), max_name_len);
}
max_name_len += 2; // add some padding
// clang-format off
app_log() << std::setw(max_name_len) << std::left << param_name_header
<< std::setw(14) << std::right << " Value "
<< std::setw(20) << std::right << " Numeric "
<< std::setw(20) << std::right << " Analytic "
<< std::setw(14) << " Percent" << std::endl;
// clang-format on
for (int k = 0; k < num_params; k++)
{
std::string vname = costFunc.getParamName(k);
std::ostringstream rel_diff_str;
std::string over_threshold;
if (numeric_grad[k] != 0)
app_log() << vname << " " << params[k] << " " << numeric_grad[k] << " " << analytic_grad[k] << " "
<< 100 * (numeric_grad[k] - analytic_grad[k]) / numeric_grad[k] << std::endl;
{
double rel_diff_percent = 100 * (numeric_grad[k] - analytic_grad[k]) / numeric_grad[k];
rel_diff_str << std::scientific << std::setprecision(2) << rel_diff_percent;
// Highlight problematic differences
// The thresholds are arbitrary.
if (std::abs(rel_diff_percent) > 1e-3)
over_threshold = " !";
if (std::abs(rel_diff_percent) > 1e-2)
over_threshold = " !!";
if (std::abs(rel_diff_percent) > 1e-1)
over_threshold = " !!!";
}
else
app_log() << vname << " " << params[k] << " " << numeric_grad[k] << " " << analytic_grad[k] << " inf"
<< std::endl;
{
rel_diff_str << "inf";
over_threshold = " !!!";
}
// clang-format off
app_log() << std::setw(max_name_len) << std::left << vname
<< std::setprecision(6) << std::setw(14) << std::right << params[k]
<< std::setprecision(10) << std::setw(20) << std::right << numeric_grad[k]
<< std::setw(20) << std::right << analytic_grad[k]
<< std::setw(14) << std::right << rel_diff_str.str() << over_threshold << std::endl;
// clang-format on
if (input_.do_param_output())
param_deriv_file_ << std::setprecision(10) << analytic_grad[k] << " ";
}
// Reset precision to the default
app_log() << std::setprecision(6);
if (input_.do_param_output())
param_deriv_file_ << std::endl;

View File

@ -17,11 +17,9 @@ namespace qmcplusplus
void GradientTestInput::readXML(xmlNodePtr xml_input)
{
ParameterSet param;
std::string do_output_file = "no";
param.add(do_output_file, "output_param_file");
param.add(do_param_output_, "output_param_file", {false});
param.add(finite_diff_delta_, "finite_diff_delta");
param.put(xml_input);
do_param_output_ = (do_output_file != "no");
}

View File

@ -25,9 +25,11 @@ public:
protected:
bool do_param_output_ = false;
double finite_diff_delta_ = 1e-5;
public:
bool do_param_output() { return do_param_output_; }
bool do_param_output() const { return do_param_output_; }
double get_finite_diff_delta() const { return finite_diff_delta_; }
};
} // namespace qmcplusplus

View File

@ -53,7 +53,8 @@ QMCCostFunctionBase::QMCCostFunctionBase(ParticleSet& w, TrialWaveFunction& psi,
msg_stream(0),
m_wfPtr(NULL),
m_doc_out(NULL),
debug_stream(0)
debug_stream(0),
do_override_output(true)
{
GEVType = "mixed";
//paramList.resize(10);
@ -310,7 +311,6 @@ bool QMCCostFunctionBase::put(xmlNodePtr q)
std::string includeNonlocalH;
std::string writeXmlPerStep("no");
std::string computeNLPPderiv;
std::string output_override_str("no");
astring variational_subset_str;
ParameterSet m_param;
m_param.add(writeXmlPerStep, "dumpXML");
@ -322,7 +322,7 @@ bool QMCCostFunctionBase::put(xmlNodePtr q)
m_param.add(GEVType, "GEVMethod");
m_param.add(targetExcitedStr, "targetExcited");
m_param.add(omega_shift, "omega");
m_param.add(output_override_str, "output_vp_override", {"no", "yes"});
m_param.add(do_override_output, "output_vp_override", {true});
m_param.add(variational_subset_str, "variational_subset");
m_param.put(q);
@ -337,9 +337,6 @@ bool QMCCostFunctionBase::put(xmlNodePtr q)
targetExcitedStr = lowerCase(targetExcitedStr);
targetExcited = (targetExcitedStr == "yes");
if (output_override_str == "yes")
do_override_output = true;
variational_subset_names = convertStrToVec<std::string>(variational_subset_str.s);
// app_log() << " QMCCostFunctionBase::put " << std::endl;

View File

@ -57,11 +57,10 @@ set(JASTROW_SRCS
Jastrow/CountingJastrowBuilder.cpp
Jastrow/RPAJastrow.cpp
Jastrow/J1OrbitalSoA.cpp
Jastrow/J2OrbitalSoA.cpp
LatticeGaussianProduct.cpp
LatticeGaussianProductBuilder.cpp)
set(JASTROW_OMPTARGET_SRCS
Jastrow/J2OMPTarget.cpp
Jastrow/TwoBodyJastrow.cpp
Jastrow/BsplineFunctor.cpp)
set(FERMION_SRCS ${FERMION_SRCS} ElectronGas/FreeOrbital.cpp ElectronGas/FreeOrbitalBuilder.cpp)
@ -97,7 +96,7 @@ if(OHMMS_DIM MATCHES 3)
BsplineFactory/HybridRepCenterOrbitals.cpp
BandInfo.cpp
BsplineFactory/BsplineReaderBase.cpp)
set(FERMION_OMPTARGET_SRCS Fermion/DiracDeterminantBatched.cpp)
set(FERMION_OMPTARGET_SRCS Fermion/DiracDeterminantBatched.cpp Fermion/MultiDiracDeterminant.2.cpp)
if(QMC_COMPLEX)
set(FERMION_SRCS ${FERMION_SRCS} EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp)
set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2COMPTarget.cpp)
@ -127,7 +126,6 @@ set(FERMION_SRCS
${FERMION_SRCS}
Fermion/DiracDeterminant.cpp
Fermion/MultiDiracDeterminant.cpp
Fermion/MultiDiracDeterminant.2.cpp
Fermion/SlaterDet.cpp
Fermion/SlaterDetBuilder.cpp
Fermion/BackflowBuilder.cpp
@ -145,7 +143,7 @@ set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} Fermion/MultiSlaterDetTable
if(QMC_CUDA)
set(FERMION_SRCS ${FERMION_SRCS} EinsplineSetCuda.cpp Fermion/DiracDeterminantCUDA.cpp Fermion/SlaterDetCUDA.cpp
TrialWaveFunction_CUDA.cpp)
set(JASTROW_SRCS ${JASTROW_SRCS} Jastrow/OneBodyJastrowOrbitalBspline.cpp Jastrow/TwoBodyJastrowOrbitalBspline.cpp)
set(JASTROW_SRCS ${JASTROW_SRCS} Jastrow/OneBodyJastrowCUDA.cpp Jastrow/TwoBodyJastrowCUDA.cpp)
endif()
####################################

View File

@ -1,710 +0,0 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2021 QMCPACK developers.
//
// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//
// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
//////////////////////////////////////////////////////////////////////////////////////
// -*- C++ -*-
#include "J2OrbitalSoA.h"
#include "CPU/SIMD/algorithm.hpp"
#include "ParticleBase/ParticleAttribOps.h"
namespace qmcplusplus
{
template<typename FT>
void J2OrbitalSoA<FT>::extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs)
{
for (auto& [key, functor] : J2Unique)
opt_obj_refs.push_back(*functor);
}
template<typename FT>
void J2OrbitalSoA<FT>::checkOutVariables(const opt_variables_type& active)
{
myVars.clear();
for (auto& [key, functor] : J2Unique)
{
functor->myVars.getIndex(active);
myVars.insertFrom(functor->myVars);
}
// Remove inactive variables so the mappings are correct
myVars.removeInactive();
myVars.getIndex(active);
const size_t NumVars = myVars.size();
if (NumVars)
{
OffSet.resize(F.size());
// Find first active variable for the starting offset
int varoffset = -1;
for (int i = 0; i < myVars.size(); i++)
{
varoffset = myVars.Index[i];
if (varoffset != -1)
break;
}
for (int i = 0; i < F.size(); ++i)
{
if (F[i] && F[i]->myVars.Index.size())
{
OffSet[i].first = F[i]->myVars.Index.front() - varoffset;
OffSet[i].second = F[i]->myVars.Index.size() + OffSet[i].first;
}
else
{
OffSet[i].first = OffSet[i].second = -1;
}
}
}
}
template<typename FT>
void J2OrbitalSoA<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
{
for (int k = 0; k < ratios.size(); ++k)
ratios[k] =
std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTableAB(my_table_ID_).getDistRow(k)));
}
template<typename FT>
void J2OrbitalSoA<FT>::registerData(ParticleSet& P, WFBufferType& buf)
{
if (Bytes_in_WFBuffer == 0)
{
Bytes_in_WFBuffer = buf.current();
buf.add(Uat.begin(), Uat.end());
buf.add(dUat.data(), dUat.end());
buf.add(d2Uat.begin(), d2Uat.end());
Bytes_in_WFBuffer = buf.current() - Bytes_in_WFBuffer;
// free local space
Uat.free();
dUat.free();
d2Uat.free();
}
else
{
buf.forward(Bytes_in_WFBuffer);
}
}
template<typename FT>
void J2OrbitalSoA<FT>::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
{
Uat.attachReference(buf.lendReference<valT>(N), N);
dUat.attachReference(N, N_padded, buf.lendReference<valT>(N_padded * OHMMS_DIM));
d2Uat.attachReference(buf.lendReference<valT>(N), N);
}
template<typename FT>
typename J2OrbitalSoA<FT>::LogValueType J2OrbitalSoA<FT>::updateBuffer(ParticleSet& P,
WFBufferType& buf,
bool fromscratch)
{
log_value_ = computeGL(P.G, P.L);
buf.forward(Bytes_in_WFBuffer);
return log_value_;
}
template<typename FT>
typename J2OrbitalSoA<FT>::valT J2OrbitalSoA<FT>::computeU(const ParticleSet& P, int iat, const DistRow& dist)
{
valT curUat(0);
const int igt = P.GroupID[iat] * NumGroups;
for (int jg = 0; jg < NumGroups; ++jg)
{
const FuncType& f2(*F[igt + jg]);
int iStart = P.first(jg);
int iEnd = P.last(jg);
curUat += f2.evaluateV(iat, iStart, iEnd, dist.data(), DistCompressed.data());
}
return curUat;
}
template<typename FT>
typename J2OrbitalSoA<FT>::posT J2OrbitalSoA<FT>::accumulateG(const valT* restrict du, const DisplRow& displ) const
{
posT grad;
for (int idim = 0; idim < OHMMS_DIM; ++idim)
{
const valT* restrict dX = displ.data(idim);
valT s = valT();
#pragma omp simd reduction(+ : s) aligned(du, dX : QMC_SIMD_ALIGNMENT)
for (int jat = 0; jat < N; ++jat)
s += du[jat] * dX[jat];
grad[idim] = s;
}
return grad;
}
template<typename FT>
J2OrbitalSoA<FT>::J2OrbitalSoA(const std::string& obj_name, ParticleSet& p)
: WaveFunctionComponent(obj_name),
my_table_ID_(p.addTable(p, DTModes::NEED_TEMP_DATA_ON_HOST | DTModes::NEED_VP_FULL_TABLE_ON_HOST)),
j2_ke_corr_helper(p, F)
{
if (my_name_.empty())
throw std::runtime_error("J2OrbitalSoA object name cannot be empty!");
init(p);
KEcorr = 0.0;
}
template<typename FT>
J2OrbitalSoA<FT>::~J2OrbitalSoA() = default;
template<typename FT>
void J2OrbitalSoA<FT>::init(ParticleSet& p)
{
N = p.getTotalNum();
N_padded = getAlignedSize<valT>(N);
NumGroups = p.groups();
Uat.resize(N);
dUat.resize(N);
d2Uat.resize(N);
cur_u.resize(N);
cur_du.resize(N);
cur_d2u.resize(N);
old_u.resize(N);
old_du.resize(N);
old_d2u.resize(N);
F.resize(NumGroups * NumGroups, nullptr);
DistCompressed.resize(N);
DistIndice.resize(N);
}
template<typename FT>
void J2OrbitalSoA<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
{
assert(ia < NumGroups);
assert(ib < NumGroups);
if (ia == ib)
{
if (ia == 0) //first time, assign everything
{
int ij = 0;
for (int ig = 0; ig < NumGroups; ++ig)
for (int jg = 0; jg < NumGroups; ++jg, ++ij)
if (F[ij] == nullptr)
F[ij] = j.get();
}
else
F[ia * NumGroups + ib] = j.get();
}
else
{
// a very special case, 1 particle of each type (e.g. 1 up + 1 down)
// uu/dd/etc. was prevented by the builder
if (N == NumGroups)
for (int ig = 0; ig < NumGroups; ++ig)
F[ig * NumGroups + ig] = j.get();
// generic case
F[ia * NumGroups + ib] = j.get();
F[ib * NumGroups + ia] = j.get();
}
std::stringstream aname;
aname << ia << ib;
J2Unique[aname.str()] = std::move(j);
}
template<typename FT>
std::unique_ptr<WaveFunctionComponent> J2OrbitalSoA<FT>::makeClone(ParticleSet& tqp) const
{
auto j2copy = std::make_unique<J2OrbitalSoA<FT>>(my_name_, tqp);
std::map<const FT*, FT*> fcmap;
for (int ig = 0; ig < NumGroups; ++ig)
for (int jg = ig; jg < NumGroups; ++jg)
{
int ij = ig * NumGroups + jg;
if (F[ij] == 0)
continue;
typename std::map<const FT*, FT*>::iterator fit = fcmap.find(F[ij]);
if (fit == fcmap.end())
{
auto fc = std::make_unique<FT>(*F[ij]);
fcmap[F[ij]] = fc.get();
j2copy->addFunc(ig, jg, std::move(fc));
}
}
j2copy->KEcorr = KEcorr;
j2copy->myVars.clear();
j2copy->myVars.insertFrom(myVars);
j2copy->OffSet = OffSet;
return j2copy;
}
/** intenal function to compute \f$\sum_j u(r_j), du/dr, d2u/dr2\f$
* @param P particleset
* @param iat particle index
* @param dist starting distance
* @param u starting value
* @param du starting first deriv
* @param d2u starting second deriv
*/
template<typename FT>
void J2OrbitalSoA<FT>::computeU3(const ParticleSet& P,
int iat,
const DistRow& dist,
RealType* restrict u,
RealType* restrict du,
RealType* restrict d2u,
bool triangle)
{
const int jelmax = triangle ? iat : N;
constexpr valT czero(0);
std::fill_n(u, jelmax, czero);
std::fill_n(du, jelmax, czero);
std::fill_n(d2u, jelmax, czero);
const int igt = P.GroupID[iat] * NumGroups;
for (int jg = 0; jg < NumGroups; ++jg)
{
const FuncType& f2(*F[igt + jg]);
int iStart = P.first(jg);
int iEnd = std::min(jelmax, P.last(jg));
f2.evaluateVGL(iat, iStart, iEnd, dist.data(), u, du, d2u, DistCompressed.data(), DistIndice.data());
}
//u[iat]=czero;
//du[iat]=czero;
//d2u[iat]=czero;
}
template<typename FT>
typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratio(ParticleSet& P, int iat)
{
//only ratio, ready to compute it again
UpdateMode = ORB_PBYP_RATIO;
cur_Uat = computeU(P, iat, P.getDistTableAA(my_table_ID_).getTempDists());
return std::exp(static_cast<PsiValueType>(Uat[iat] - cur_Uat));
}
template<typename FT>
void J2OrbitalSoA<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
{
const auto& d_table = P.getDistTableAA(my_table_ID_);
const auto& dist = d_table.getTempDists();
for (int ig = 0; ig < NumGroups; ++ig)
{
const int igt = ig * NumGroups;
valT sumU(0);
for (int jg = 0; jg < NumGroups; ++jg)
{
const FuncType& f2(*F[igt + jg]);
int iStart = P.first(jg);
int iEnd = P.last(jg);
sumU += f2.evaluateV(-1, iStart, iEnd, dist.data(), DistCompressed.data());
}
for (int i = P.first(ig); i < P.last(ig); ++i)
{
// remove self-interaction
const valT Uself = F[igt + ig]->evaluate(dist[i]);
ratios[i] = std::exp(Uat[i] + Uself - sumU);
}
}
}
template<typename FT>
typename J2OrbitalSoA<FT>::GradType J2OrbitalSoA<FT>::evalGrad(ParticleSet& P, int iat)
{
return GradType(dUat[iat]);
}
template<typename FT>
typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
{
UpdateMode = ORB_PBYP_PARTIAL;
computeU3(P, iat, P.getDistTableAA(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data());
cur_Uat = simd::accumulate_n(cur_u.data(), N, valT());
DiffVal = Uat[iat] - cur_Uat;
grad_iat += accumulateG(cur_du.data(), P.getDistTableAA(my_table_ID_).getTempDispls());
return std::exp(static_cast<PsiValueType>(DiffVal));
}
template<typename FT>
void J2OrbitalSoA<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
{
// get the old u, du, d2u
const auto& d_table = P.getDistTableAA(my_table_ID_);
computeU3(P, iat, d_table.getOldDists(), old_u.data(), old_du.data(), old_d2u.data());
if (UpdateMode == ORB_PBYP_RATIO)
{ //ratio-only during the move; need to compute derivatives
const auto& dist = d_table.getTempDists();
computeU3(P, iat, dist, cur_u.data(), cur_du.data(), cur_d2u.data());
}
valT cur_d2Uat(0);
const auto& new_dr = d_table.getTempDispls();
const auto& old_dr = d_table.getOldDispls();
constexpr valT lapfac = OHMMS_DIM - RealType(1);
#pragma omp simd reduction(+ : cur_d2Uat)
for (int jat = 0; jat < N; jat++)
{
const valT du = cur_u[jat] - old_u[jat];
const valT newl = cur_d2u[jat] + lapfac * cur_du[jat];
const valT dl = old_d2u[jat] + lapfac * old_du[jat] - newl;
Uat[jat] += du;
d2Uat[jat] += dl;
cur_d2Uat -= newl;
}
posT cur_dUat;
for (int idim = 0; idim < OHMMS_DIM; ++idim)
{
const valT* restrict new_dX = new_dr.data(idim);
const valT* restrict old_dX = old_dr.data(idim);
const valT* restrict cur_du_pt = cur_du.data();
const valT* restrict old_du_pt = old_du.data();
valT* restrict save_g = dUat.data(idim);
valT cur_g = cur_dUat[idim];
#pragma omp simd reduction(+ : cur_g) aligned(old_dX, new_dX, save_g, cur_du_pt, old_du_pt : QMC_SIMD_ALIGNMENT)
for (int jat = 0; jat < N; jat++)
{
const valT newg = cur_du_pt[jat] * new_dX[jat];
const valT dg = newg - old_du_pt[jat] * old_dX[jat];
save_g[jat] -= dg;
cur_g += newg;
}
cur_dUat[idim] = cur_g;
}
log_value_ += Uat[iat] - cur_Uat;
Uat[iat] = cur_Uat;
dUat(iat) = cur_dUat;
d2Uat[iat] = cur_d2Uat;
}
template<typename FT>
void J2OrbitalSoA<FT>::recompute(const ParticleSet& P)
{
const auto& d_table = P.getDistTableAA(my_table_ID_);
for (int ig = 0; ig < NumGroups; ++ig)
{
for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat)
{
computeU3(P, iat, d_table.getDistRow(iat), cur_u.data(), cur_du.data(), cur_d2u.data(), true);
Uat[iat] = simd::accumulate_n(cur_u.data(), iat, valT());
posT grad;
valT lap(0);
const valT* restrict u = cur_u.data();
const valT* restrict du = cur_du.data();
const valT* restrict d2u = cur_d2u.data();
const auto& displ = d_table.getDisplRow(iat);
constexpr valT lapfac = OHMMS_DIM - RealType(1);
#pragma omp simd reduction(+ : lap) aligned(du, d2u : QMC_SIMD_ALIGNMENT)
for (int jat = 0; jat < iat; ++jat)
lap += d2u[jat] + lapfac * du[jat];
for (int idim = 0; idim < OHMMS_DIM; ++idim)
{
const valT* restrict dX = displ.data(idim);
valT s = valT();
#pragma omp simd reduction(+ : s) aligned(du, dX : QMC_SIMD_ALIGNMENT)
for (int jat = 0; jat < iat; ++jat)
s += du[jat] * dX[jat];
grad[idim] = s;
}
dUat(iat) = grad;
d2Uat[iat] = -lap;
// add the contribution from the upper triangle
#pragma omp simd aligned(u, du, d2u : QMC_SIMD_ALIGNMENT)
for (int jat = 0; jat < iat; jat++)
{
Uat[jat] += u[jat];
d2Uat[jat] -= d2u[jat] + lapfac * du[jat];
}
for (int idim = 0; idim < OHMMS_DIM; ++idim)
{
valT* restrict save_g = dUat.data(idim);
const valT* restrict dX = displ.data(idim);
#pragma omp simd aligned(save_g, du, dX : QMC_SIMD_ALIGNMENT)
for (int jat = 0; jat < iat; jat++)
save_g[jat] -= du[jat] * dX[jat];
}
}
}
}
template<typename FT>
typename J2OrbitalSoA<FT>::LogValueType J2OrbitalSoA<FT>::evaluateLog(const ParticleSet& P,
ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L)
{
recompute(P);
return log_value_ = computeGL(G, L);
}
template<typename FT>
typename J2OrbitalSoA<FT>::QTFull::RealType J2OrbitalSoA<FT>::computeGL(ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L) const
{
for (int iat = 0; iat < N; ++iat)
{
G[iat] += dUat[iat];
L[iat] += d2Uat[iat];
}
return -0.5 * simd::accumulate_n(Uat.data(), N, QTFull::RealType());
}
template<typename FT>
WaveFunctionComponent::LogValueType J2OrbitalSoA<FT>::evaluateGL(const ParticleSet& P,
ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L,
bool fromscratch)
{
return log_value_ = computeGL(G, L);
}
template<typename FT>
void J2OrbitalSoA<FT>::evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi)
{
log_value_ = 0.0;
const auto& d_ee(P.getDistTableAA(my_table_ID_));
valT dudr, d2udr2;
Tensor<valT, DIM> ident;
grad_grad_psi = 0.0;
ident.diagonal(1.0);
for (int i = 1; i < N; ++i)
{
const auto& dist = d_ee.getDistRow(i);
const auto& displ = d_ee.getDisplRow(i);
auto ig = P.GroupID[i];
const int igt = ig * NumGroups;
for (int j = 0; j < i; ++j)
{
auto r = dist[j];
auto rinv = 1.0 / r;
auto dr = displ[j];
auto jg = P.GroupID[j];
auto uij = F[igt + jg]->evaluate(r, dudr, d2udr2);
log_value_ -= uij;
auto hess = rinv * rinv * outerProduct(dr, dr) * (d2udr2 - dudr * rinv) + ident * dudr * rinv;
grad_grad_psi[i] -= hess;
grad_grad_psi[j] -= hess;
}
}
}
template<typename FT>
void J2OrbitalSoA<FT>::evaluateDerivatives(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi,
Vector<ValueType>& dhpsioverpsi)
{
if (myVars.size() == 0)
return;
evaluateDerivativesWF(P, active, dlogpsi);
bool recalculate(false);
std::vector<bool> rcsingles(myVars.size(), false);
for (int k = 0; k < myVars.size(); ++k)
{
int kk = myVars.where(k);
if (kk < 0)
continue;
if (active.recompute(kk))
recalculate = true;
rcsingles[k] = true;
}
if (recalculate)
{
for (int k = 0; k < myVars.size(); ++k)
{
int kk = myVars.where(k);
if (kk < 0)
continue;
if (rcsingles[k])
{
dhpsioverpsi[kk] = -RealType(0.5) * ValueType(Sum(lapLogPsi[k])) - ValueType(Dot(P.G, gradLogPsi[k]));
}
}
}
}
template<typename FT>
void J2OrbitalSoA<FT>::evaluateDerivativesWF(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi)
{
if (myVars.size() == 0)
return;
resizeWFOptVectors();
bool recalculate(false);
std::vector<bool> rcsingles(myVars.size(), false);
for (int k = 0; k < myVars.size(); ++k)
{
int kk = myVars.where(k);
if (kk < 0)
continue;
if (active.recompute(kk))
recalculate = true;
rcsingles[k] = true;
}
if (recalculate)
{
///precomputed recalculation switch
std::vector<bool> RecalcSwitch(F.size(), false);
for (int i = 0; i < F.size(); ++i)
{
if (OffSet[i].first < 0)
{
// nothing to optimize
RecalcSwitch[i] = false;
}
else
{
bool recalcFunc(false);
for (int rcs = OffSet[i].first; rcs < OffSet[i].second; rcs++)
if (rcsingles[rcs] == true)
recalcFunc = true;
RecalcSwitch[i] = recalcFunc;
}
}
dLogPsi = 0.0;
const size_t NumVars = myVars.size();
for (int p = 0; p < NumVars; ++p)
{
gradLogPsi[p] = 0.0;
lapLogPsi[p] = 0.0;
}
std::vector<TinyVector<RealType, 3>> derivs(NumVars);
const auto& d_table = P.getDistTableAA(my_table_ID_);
constexpr RealType cone(1);
constexpr RealType lapfac(OHMMS_DIM - cone);
const size_t n = d_table.sources();
const size_t ng = P.groups();
for (size_t i = 1; i < n; ++i)
{
const size_t ig = P.GroupID[i] * ng;
const auto& dist = d_table.getDistRow(i);
const auto& displ = d_table.getDisplRow(i);
for (size_t j = 0; j < i; ++j)
{
const size_t ptype = ig + P.GroupID[j];
if (RecalcSwitch[ptype])
{
std::fill(derivs.begin(), derivs.end(), 0.0);
if (!F[ptype]->evaluateDerivatives(dist[j], derivs))
continue;
RealType rinv(cone / dist[j]);
PosType dr(displ[j]);
for (int p = OffSet[ptype].first, ip = 0; p < OffSet[ptype].second; ++p, ++ip)
{
RealType dudr(rinv * derivs[ip][1]);
RealType lap(derivs[ip][2] + lapfac * dudr);
//RealType lap(derivs[ip][2]+(OHMMS_DIM-1.0)*dudr);
PosType gr(dudr * dr);
dLogPsi[p] -= derivs[ip][0];
gradLogPsi[p][i] += gr;
gradLogPsi[p][j] -= gr;
lapLogPsi[p][i] -= lap;
lapLogPsi[p][j] -= lap;
}
}
}
}
for (int k = 0; k < myVars.size(); ++k)
{
int kk = myVars.where(k);
if (kk < 0)
continue;
if (rcsingles[k])
{
dlogpsi[kk] = dLogPsi[k];
}
//optVars.setDeriv(p,dLogPsi[ip],-0.5*Sum(lapLogPsi[ip])-Dot(P.G,gradLogPsi[ip]));
}
}
}
template<typename FT>
void J2OrbitalSoA<FT>::evaluateDerivRatios(const VirtualParticleSet& VP,
const opt_variables_type& optvars,
std::vector<ValueType>& ratios,
Matrix<ValueType>& dratios)
{
evaluateRatios(VP, ratios);
if (myVars.size() == 0)
return;
bool recalculate(false);
std::vector<bool> rcsingles(myVars.size(), false);
for (int k = 0; k < myVars.size(); ++k)
{
int kk = myVars.where(k);
if (kk < 0)
continue;
if (optvars.recompute(kk))
recalculate = true;
rcsingles[k] = true;
}
if (recalculate)
{
///precomputed recalculation switch
std::vector<bool> RecalcSwitch(F.size(), false);
for (int i = 0; i < F.size(); ++i)
{
if (OffSet[i].first < 0)
{
// nothing to optimize
RecalcSwitch[i] = false;
}
else
{
bool recalcFunc(false);
for (int rcs = OffSet[i].first; rcs < OffSet[i].second; rcs++)
if (rcsingles[rcs] == true)
recalcFunc = true;
RecalcSwitch[i] = recalcFunc;
}
}
const size_t NumVars = myVars.size();
std::vector<RealType> derivs_ref(NumVars);
std::vector<RealType> derivs(NumVars);
const auto& d_table = VP.getDistTableAB(my_table_ID_);
const size_t n = d_table.sources();
const size_t nt = VP.getTotalNum();
for (size_t i = 0; i < n; ++i)
{
if (i == VP.refPtcl)
continue;
const size_t ptype = VP.refPS.GroupID[i] * VP.refPS.groups() + VP.refPS.GroupID[VP.refPtcl];
if (!RecalcSwitch[ptype])
continue;
const auto dist_ref = i < VP.refPtcl ? VP.refPS.getDistTableAA(my_table_ID_).getDistRow(VP.refPtcl)[i]
: VP.refPS.getDistTableAA(my_table_ID_).getDistRow(i)[VP.refPtcl];
//first calculate the old derivatives VP.refPtcl.
std::fill(derivs_ref.begin(), derivs_ref.end(), 0.0);
F[ptype]->evaluateDerivatives(dist_ref, derivs_ref);
for (size_t j = 0; j < nt; ++j)
{
std::fill(derivs.begin(), derivs.end(), 0.0);
F[ptype]->evaluateDerivatives(d_table.getDistRow(j)[i], derivs);
for (int ip = 0, p = F[ptype]->myVars.Index.front(); ip < F[ptype]->myVars.Index.size(); ++ip, ++p)
dratios[j][p] += derivs_ref[ip] - derivs[ip];
}
}
}
}
template class J2OrbitalSoA<BsplineFunctor<QMCTraits::RealType>>;
template class J2OrbitalSoA<PadeFunctor<QMCTraits::RealType>>;
template class J2OrbitalSoA<UserFunctor<QMCTraits::RealType>>;
template class J2OrbitalSoA<FakeFunctor<QMCTraits::RealType>>;
} // namespace qmcplusplus

View File

@ -1,220 +0,0 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2021 QMCPACK developers.
//
// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//
// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
//////////////////////////////////////////////////////////////////////////////////////
// -*- C++ -*-
#ifndef QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H
#define QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H
#include <map>
#include <numeric>
#include "Configuration.h"
#if !defined(QMC_BUILD_SANDBOX_ONLY)
#include "QMCWaveFunctions/WaveFunctionComponent.h"
#endif
#include "Particle/DistanceTable.h"
#include "LongRange/StructFact.h"
#include "CPU/SIMD/aligned_allocator.hpp"
#include "J2KECorrection.h"
#include "BsplineFunctor.h"
#include "PadeFunctors.h"
#include "UserFunctor.h"
#include "FakeFunctor.h"
namespace qmcplusplus
{
/** @ingroup WaveFunctionComponent
* @brief Specialization for two-body Jastrow function using multiple functors
*
* Each pair-type can have distinct function \f$u(r_{ij})\f$.
* For electrons, distinct pair correlation functions are used
* for spins up-up/down-down and up-down/down-up.
*
* Based on J2OrbitalSoA.h with these considerations
* - DistanceTable using SoA containers
* - support mixed precision: FT::real_type != OHMMS_PRECISION
* - loops over the groups: elminated PairID
* - support simd function
* - double the loop counts
* - Memory use is O(N).
*/
template<class FT>
class J2OrbitalSoA : public WaveFunctionComponent
{
public:
///alias FuncType
using FuncType = FT;
///type of each component U, dU, d2U;
using valT = typename FT::real_type;
///element position type
using posT = TinyVector<valT, OHMMS_DIM>;
///use the same container
using DistRow = DistanceTable::DistRow;
using DisplRow = DistanceTable::DisplRow;
using gContainer_type = VectorSoaContainer<valT, OHMMS_DIM>;
using GradDerivVec = ParticleAttrib<QTFull::GradType>;
using ValueDerivVec = ParticleAttrib<QTFull::ValueType>;
protected:
///number of particles
size_t N;
///number of particles + padded
size_t N_padded;
///number of groups of the target particleset
size_t NumGroups;
///diff value
RealType DiffVal;
///Correction
RealType KEcorr;
///\f$Uat[i] = sum_(j) u_{i,j}\f$
Vector<valT> Uat;
///\f$dUat[i] = sum_(j) du_{i,j}\f$
gContainer_type dUat;
///\f$d2Uat[i] = sum_(j) d2u_{i,j}\f$
Vector<valT> d2Uat;
valT cur_Uat;
aligned_vector<valT> cur_u, cur_du, cur_d2u;
aligned_vector<valT> old_u, old_du, old_d2u;
aligned_vector<valT> DistCompressed;
aligned_vector<int> DistIndice;
///Uniquue J2 set for cleanup
std::map<std::string, std::unique_ptr<FT>> J2Unique;
///Container for \f$F[ig*NumGroups+jg]\f$. treat every pointer as a reference.
std::vector<FT*> F;
/// e-e table ID
const int my_table_ID_;
// helper for compute J2 Chiesa KE correction
J2KECorrection<RealType, FT> j2_ke_corr_helper;
/// Map indices from subcomponent variables to component variables
std::vector<std::pair<int, int>> OffSet;
Vector<RealType> dLogPsi;
std::vector<GradDerivVec> gradLogPsi;
std::vector<ValueDerivVec> lapLogPsi;
void resizeWFOptVectors()
{
dLogPsi.resize(myVars.size());
gradLogPsi.resize(myVars.size(), GradDerivVec(N));
lapLogPsi.resize(myVars.size(), ValueDerivVec(N));
}
/// compute G and L from internally stored data
QTFull::RealType computeGL(ParticleSet::ParticleGradient& G, ParticleSet::ParticleLaplacian& L) const;
/*@{ internal compute engines*/
valT computeU(const ParticleSet& P, int iat, const DistRow& dist);
void computeU3(const ParticleSet& P,
int iat,
const DistRow& dist,
RealType* restrict u,
RealType* restrict du,
RealType* restrict d2u,
bool triangle = false);
/** compute gradient
*/
posT accumulateG(const valT* restrict du, const DisplRow& displ) const;
/**@} */
public:
J2OrbitalSoA(const std::string& obj_name, ParticleSet& p);
J2OrbitalSoA(const J2OrbitalSoA& rhs) = delete;
~J2OrbitalSoA() override;
/* initialize storage */
void init(ParticleSet& p);
/** add functor for (ia,ib) pair */
void addFunc(int ia, int ib, std::unique_ptr<FT> j);
std::string getClassName() const override { return "J2OrbitalSoA"; }
bool isOptimizable() const override { return true; }
void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override;
/** check out optimizable variables
*/
void checkOutVariables(const opt_variables_type& active) override;
inline void finalizeOptimization() override { KEcorr = j2_ke_corr_helper.computeKEcorr(); }
std::unique_ptr<WaveFunctionComponent> makeClone(ParticleSet& tqp) const override;
LogValueType evaluateLog(const ParticleSet& P,
ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L) override;
void evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi) override;
/** recompute internal data assuming distance table is fully ready */
void recompute(const ParticleSet& P) override;
PsiValueType ratio(ParticleSet& P, int iat) override;
void evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios) override;
void evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios) override;
GradType evalGrad(ParticleSet& P, int iat) override;
PsiValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat) override;
void acceptMove(ParticleSet& P, int iat, bool safe_to_delay = false) override;
inline void restore(int iat) override {}
/** compute G and L after the sweep
*/
LogValueType evaluateGL(const ParticleSet& P,
ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L,
bool fromscratch = false) override;
void registerData(ParticleSet& P, WFBufferType& buf) override;
void copyFromBuffer(ParticleSet& P, WFBufferType& buf) override;
LogValueType updateBuffer(ParticleSet& P, WFBufferType& buf, bool fromscratch = false) override;
inline RealType ChiesaKEcorrection() { return KEcorr = j2_ke_corr_helper.computeKEcorr(); }
inline RealType KECorrection() override { return KEcorr; }
const std::vector<FT*>& getPairFunctions() const { return F; }
// Accessors for unit testing
std::pair<int, int> getComponentOffset(int index) { return OffSet.at(index); }
opt_variables_type& getComponentVars() { return myVars; }
void evaluateDerivatives(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi,
Vector<ValueType>& dhpsioverpsi) override;
void evaluateDerivativesWF(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi) override;
void evaluateDerivRatios(const VirtualParticleSet& VP,
const opt_variables_type& optvars,
std::vector<ValueType>& ratios,
Matrix<ValueType>& dratios) override;
};
extern template class J2OrbitalSoA<BsplineFunctor<QMCTraits::RealType>>;
extern template class J2OrbitalSoA<PadeFunctor<QMCTraits::RealType>>;
extern template class J2OrbitalSoA<UserFunctor<QMCTraits::RealType>>;
extern template class J2OrbitalSoA<FakeFunctor<QMCTraits::RealType>>;
} // namespace qmcplusplus
#endif

View File

@ -14,7 +14,7 @@
//////////////////////////////////////////////////////////////////////////////////////
#include "OneBodyJastrowOrbitalBspline.h"
#include "OneBodyJastrowCUDA.h"
#include "CudaSpline.h"
#include "Lattice/ParticleBConds.h"
#include "QMCWaveFunctions/detail/CUDA_legacy/BsplineJastrowCuda.h"
@ -24,15 +24,15 @@
namespace qmcplusplus
{
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::recompute(MCWalkerConfiguration& W, bool firstTime)
void OneBodyJastrowCUDA<FT>::recompute(MCWalkerConfiguration& W, bool firstTime)
{}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::reserve(PointerPool<gpu::device_vector<CTS::RealType>>& pool)
void OneBodyJastrowCUDA<FT>::reserve(PointerPool<gpu::device_vector<CTS::RealType>>& pool)
{}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::addFunc(int ig, std::unique_ptr<FT> j, int jg)
void OneBodyJastrowCUDA<FT>::addFunc(int ig, std::unique_ptr<FT> j, int jg)
{
auto newSpline = std::make_unique<CudaSpline<CTS::RealType>>(*j);
GPUSplines[ig] = newSpline.get();
@ -41,7 +41,7 @@ void OneBodyJastrowOrbitalBspline<FT>::addFunc(int ig, std::unique_ptr<FT> j, in
}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::addLog(MCWalkerConfiguration& W, std::vector<RealType>& logPsi)
void OneBodyJastrowCUDA<FT>::addLog(MCWalkerConfiguration& W, std::vector<RealType>& logPsi)
{
auto& walkers = W.WalkerList;
if (SumHost.size() < 4 * walkers.size())
@ -105,11 +105,11 @@ void OneBodyJastrowOrbitalBspline<FT>::addLog(MCWalkerConfiguration& W, std::vec
}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::update(MCWalkerConfiguration* W,
std::vector<Walker_t*>& walkers,
int iat,
std::vector<bool>* acc,
int k)
void OneBodyJastrowCUDA<FT>::update(MCWalkerConfiguration* W,
std::vector<Walker_t*>& walkers,
int iat,
std::vector<bool>* acc,
int k)
{
// for (int iw=0; iw<walkers.size(); iw++)
// UpdateListHost[iw] = (CTS::RealType*)walkers[iw]->R_GPU.data();
@ -120,11 +120,11 @@ void OneBodyJastrowOrbitalBspline<FT>::update(MCWalkerConfiguration* W,
// #define DEBUG_DELAYED
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::ratio(MCWalkerConfiguration& W,
int iat,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
void OneBodyJastrowCUDA<FT>::ratio(MCWalkerConfiguration& W,
int iat,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
{
auto& walkers = W.WalkerList;
int N = W.Rnew_GPU.size();
@ -196,11 +196,11 @@ void OneBodyJastrowOrbitalBspline<FT>::ratio(MCWalkerConfiguration& W,
}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::calcRatio(MCWalkerConfiguration& W,
int iat,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
void OneBodyJastrowCUDA<FT>::calcRatio(MCWalkerConfiguration& W,
int iat,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
{
int N = W.Rnew_GPU.size();
auto& walkers = W.WalkerList;
@ -244,12 +244,12 @@ void OneBodyJastrowOrbitalBspline<FT>::calcRatio(MCWalkerConfiguration& W,
}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::addRatio(MCWalkerConfiguration& W,
int iat,
int k,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
void OneBodyJastrowCUDA<FT>::addRatio(MCWalkerConfiguration& W,
int iat,
int k,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
{
int N = W.Rnew_GPU.size();
auto& walkers = W.WalkerList;
@ -290,10 +290,10 @@ void OneBodyJastrowOrbitalBspline<FT>::addRatio(MCWalkerConfiguration& W,
}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::NLratios(MCWalkerConfiguration& W,
std::vector<NLjob>& jobList,
std::vector<PosType>& quadPoints,
std::vector<ValueType>& psi_ratios)
void OneBodyJastrowCUDA<FT>::NLratios(MCWalkerConfiguration& W,
std::vector<NLjob>& jobList,
std::vector<PosType>& quadPoints,
std::vector<ValueType>& psi_ratios)
{
auto& walkers = W.WalkerList;
float sim_cell_radius = W.getLattice().SimulationCellRadius;
@ -357,10 +357,7 @@ void OneBodyJastrowOrbitalBspline<FT>::NLratios(MCWalkerConfiguration& W,
}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::calcGradient(MCWalkerConfiguration& W,
int iat,
int k,
std::vector<GradType>& grad)
void OneBodyJastrowCUDA<FT>::calcGradient(MCWalkerConfiguration& W, int iat, int k, std::vector<GradType>& grad)
{
CTS::RealType sim_cell_radius = W.getLattice().SimulationCellRadius;
auto& walkers = W.WalkerList;
@ -393,7 +390,7 @@ void OneBodyJastrowOrbitalBspline<FT>::calcGradient(MCWalkerConfiguration& W,
}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::addGradient(MCWalkerConfiguration& W, int iat, std::vector<GradType>& grad)
void OneBodyJastrowCUDA<FT>::addGradient(MCWalkerConfiguration& W, int iat, std::vector<GradType>& grad)
{
auto& walkers = W.WalkerList;
cudaCheck(cudaEventSynchronize(gpu::gradientSyncOneBodyEvent));
@ -418,7 +415,7 @@ void OneBodyJastrowOrbitalBspline<FT>::addGradient(MCWalkerConfiguration& W, int
}
template<class FT>
void OneBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMatrix& grad, ValueMatrix& lapl)
void OneBodyJastrowCUDA<FT>::gradLapl(MCWalkerConfiguration& W, GradMatrix& grad, ValueMatrix& lapl)
{
auto& walkers = W.WalkerList;
int numGL = 4 * N * walkers.size();
@ -490,7 +487,7 @@ void OneBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMa
grad(iw, ptcl)[i] += this->GradLaplHost[4 * this->N * iw + 4 * ptcl + i];
if (std::isnan(this->GradLaplHost[4 * this->N * iw + +4 * ptcl + 3]))
{
fprintf(stderr, "NAN in OneBodyJastrowOrbitalBspline<FT> laplacian.\n");
fprintf(stderr, "NAN in OneBodyJastrowCUDA<FT> laplacian.\n");
abort();
}
lapl(iw, ptcl) += this->GradLaplHost[4 * this->N * iw + +4 * ptcl + 3];
@ -499,6 +496,6 @@ void OneBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMa
}
// explicit instantiations of templates
template class OneBodyJastrowOrbitalBspline<BsplineFunctor<WaveFunctionComponent::RealType>>;
template class OneBodyJastrowCUDA<BsplineFunctor<WaveFunctionComponent::RealType>>;
} // namespace qmcplusplus

View File

@ -27,7 +27,7 @@
namespace qmcplusplus
{
template<class FT>
class OneBodyJastrowOrbitalBspline : public J1OrbitalSoA<FT>
class OneBodyJastrowCUDA : public J1OrbitalSoA<FT>
{
private:
bool UsePBC;
@ -140,7 +140,7 @@ public:
std::vector<PosType>& quadPoints,
std::vector<ValueType>& psi_ratios) override;
OneBodyJastrowOrbitalBspline(const std::string& obj_name, ParticleSet& centers, ParticleSet& elecs, bool use_offload)
OneBodyJastrowCUDA(const std::string& obj_name, ParticleSet& centers, ParticleSet& elecs, bool use_offload)
: J1OrbitalSoA<FT>(obj_name, centers, elecs, use_offload),
ElecRef(elecs),
L(obj_name + "L"),

View File

@ -18,7 +18,7 @@
#include "RPAJastrow.h"
#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h"
#include "QMCWaveFunctions/Jastrow/J2OrbitalSoA.h"
#include "QMCWaveFunctions/Jastrow/TwoBodyJastrow.h"
#include "QMCWaveFunctions/Jastrow/LRBreakupUtilities.h"
#include "QMCWaveFunctions/Jastrow/SplineFunctors.h"
#include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
@ -174,7 +174,7 @@ void RPAJastrow::makeShortRange()
nfunc = nfunc_uptr.get();
ShortRangePartAdapter<RealType> SRA(myHandler.get());
SRA.setRmax(Rcut);
auto j2 = std::make_unique<J2OrbitalSoA<BsplineFunctor<RealType>>>("RPA", targetPtcl);
auto j2 = std::make_unique<TwoBodyJastrow<BsplineFunctor<RealType>>>("RPA", targetPtcl, false);
size_t nparam = 12; // number of Bspline parameters
size_t npts = 100; // number of 1D grid points for basis functions
RealType cusp = SRA.df(0);
@ -200,7 +200,8 @@ void RPAJastrow::makeShortRange()
Psi.push_back(std::move(j2));
}
void RPAJastrow::checkOutVariables(const opt_variables_type& active) {
void RPAJastrow::checkOutVariables(const opt_variables_type& active)
{
LongRangeRPA->checkOutVariables(active);
ShortRangeRPA->checkOutVariables(active);
}

View File

@ -15,12 +15,11 @@
#include <PlatformSelector.hpp>
#include "QMCWaveFunctions/Jastrow/J1OrbitalSoA.h"
#include "QMCWaveFunctions/Jastrow/J1Spin.h"
#include "QMCWaveFunctions/Jastrow/J2OrbitalSoA.h"
#include "QMCWaveFunctions/Jastrow/J2OMPTarget.h"
#include "QMCWaveFunctions/Jastrow/TwoBodyJastrow.h"
#if defined(QMC_CUDA)
#include "QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h"
#include "QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.h"
#include "QMCWaveFunctions/Jastrow/OneBodyJastrowCUDA.h"
#include "QMCWaveFunctions/Jastrow/TwoBodyJastrowCUDA.h"
#endif
#include "QMCWaveFunctions/Jastrow/RPAJastrow.h"
@ -50,7 +49,7 @@ class JastrowTypeHelper
public:
using J1Type = J1OrbitalSoA<RadFuncType>;
using J1SpinType = J1Spin<RadFuncType>;
using J2Type = J2OrbitalSoA<RadFuncType>;
using J2Type = TwoBodyJastrow<RadFuncType>;
};
#if defined(QMC_CUDA)
@ -59,9 +58,9 @@ class JastrowTypeHelper<BsplineFunctor<RadialJastrowBuilder::RealType>, RadialJa
{
public:
using RadFuncType = BsplineFunctor<RadialJastrowBuilder::RealType>;
using J1Type = OneBodyJastrowOrbitalBspline<RadFuncType>;
using J1Type = OneBodyJastrowCUDA<RadFuncType>;
using J1SpinType = void;
using J2Type = TwoBodyJastrowOrbitalBspline<RadFuncType>;
using J2Type = TwoBodyJastrowCUDA<RadFuncType>;
};
#endif
@ -70,7 +69,7 @@ class JastrowTypeHelper<BsplineFunctor<RadialJastrowBuilder::RealType>, RadialJa
{
public:
using RadFuncType = BsplineFunctor<RadialJastrowBuilder::RealType>;
using J2Type = J2OMPTarget<RadFuncType>;
using J2Type = TwoBodyJastrow<RadFuncType>;
};
RadialJastrowBuilder::RadialJastrowBuilder(Communicate* comm, ParticleSet& target, ParticleSet& source)
@ -158,7 +157,7 @@ std::unique_ptr<WaveFunctionComponent> RadialJastrowBuilder::createJ2(xmlNodePtr
std::string input_name(getXMLAttributeValue(cur, "name"));
std::string j2name = input_name.empty() ? "J2_" + Jastfunction : input_name;
SpeciesSet& species(targetPtcl.getSpeciesSet());
auto J2 = std::make_unique<J2Type>(j2name, targetPtcl);
auto J2 = std::make_unique<J2Type>(j2name, targetPtcl, Implementation == RadialJastrowBuilder::detail::OMPTARGET);
std::string init_mode("0");
{
@ -608,7 +607,7 @@ std::unique_ptr<WaveFunctionComponent> RadialJastrowBuilder::buildComponent(xmlN
if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET)
{
static_assert(std::is_same<JastrowTypeHelper<BsplineFunctor<RealType>, OMPTARGET>::J2Type,
J2OMPTarget<BsplineFunctor<RealType>>>::value,
TwoBodyJastrow<BsplineFunctor<RealType>>>::value,
"check consistent type");
if (targetPtcl.getCoordinates().getKind() != DynamicCoordinateKind::DC_POS_OFFLOAD)
{

View File

@ -13,7 +13,7 @@
// -*- C++ -*-
#include "J2OMPTarget.h"
#include "TwoBodyJastrow.h"
#include "CPU/SIMD/algorithm.hpp"
#include "SoaDistanceTableABOMPTarget.h"
#include "ResourceCollection.h"
@ -23,7 +23,7 @@ namespace qmcplusplus
{
template<typename T>
struct J2OMPTargetMultiWalkerMem : public Resource
struct TwoBodyJastrowMultiWalkerMem : public Resource
{
// fused buffer for fast transfer in mw_accept
Vector<char, OffloadPinnedAllocator<char>> mw_update_buffer;
@ -40,25 +40,25 @@ struct J2OMPTargetMultiWalkerMem : public Resource
/// memory pool for cur_u, cur_du, cur_d2u [3][Nw][N_padded]. 3 is for value, first and second derivatives.
Vector<T, OffloadPinnedAllocator<T>> mw_cur_allu;
J2OMPTargetMultiWalkerMem() : Resource("J2OMPTargetMultiWalkerMem") {}
TwoBodyJastrowMultiWalkerMem() : Resource("TwoBodyJastrowMultiWalkerMem") {}
J2OMPTargetMultiWalkerMem(const J2OMPTargetMultiWalkerMem&) : J2OMPTargetMultiWalkerMem() {}
TwoBodyJastrowMultiWalkerMem(const TwoBodyJastrowMultiWalkerMem&) : TwoBodyJastrowMultiWalkerMem() {}
Resource* makeClone() const override { return new J2OMPTargetMultiWalkerMem(*this); }
Resource* makeClone() const override { return new TwoBodyJastrowMultiWalkerMem(*this); }
};
template<typename FT>
void J2OMPTarget<FT>::createResource(ResourceCollection& collection) const
void TwoBodyJastrow<FT>::createResource(ResourceCollection& collection) const
{
collection.addResource(std::make_unique<J2OMPTargetMultiWalkerMem<RealType>>());
collection.addResource(std::make_unique<TwoBodyJastrowMultiWalkerMem<RealType>>());
}
template<typename FT>
void J2OMPTarget<FT>::acquireResource(ResourceCollection& collection,
const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
void TwoBodyJastrow<FT>::acquireResource(ResourceCollection& collection,
const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
{
auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
auto res_ptr = dynamic_cast<J2OMPTargetMultiWalkerMem<RealType>*>(collection.lendResource().release());
auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
auto res_ptr = dynamic_cast<TwoBodyJastrowMultiWalkerMem<RealType>*>(collection.lendResource().release());
if (!res_ptr)
throw std::runtime_error("VirtualParticleSet::acquireResource dynamic_cast failed");
wfc_leader.mw_mem_.reset(res_ptr);
@ -68,7 +68,7 @@ void J2OMPTarget<FT>::acquireResource(ResourceCollection& collection,
for (size_t iw = 0; iw < nw; iw++)
{
// copy per walker Uat, dUat, d2Uat to shared buffer and attach buffer
auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
Vector<valT, aligned_allocator<valT>> Uat_view(mw_allUat.data() + iw * N_padded, N);
Uat_view = wfc.Uat;
@ -91,16 +91,16 @@ void J2OMPTarget<FT>::acquireResource(ResourceCollection& collection,
}
template<typename FT>
void J2OMPTarget<FT>::releaseResource(ResourceCollection& collection,
const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
void TwoBodyJastrow<FT>::releaseResource(ResourceCollection& collection,
const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
{
auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
const size_t nw = wfc_list.size();
auto& mw_allUat = wfc_leader.mw_mem_->mw_allUat;
for (size_t iw = 0; iw < nw; iw++)
{
// detach buffer and copy per walker Uat, dUat, d2Uat from shared buffer
auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
Vector<valT, aligned_allocator<valT>> Uat_view(mw_allUat.data() + iw * N_padded, N);
wfc.Uat.free();
@ -123,14 +123,14 @@ void J2OMPTarget<FT>::releaseResource(ResourceCollection& collection,
}
template<typename FT>
void J2OMPTarget<FT>::extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs)
void TwoBodyJastrow<FT>::extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs)
{
for (auto& [key, functor] : J2Unique)
opt_obj_refs.push_back(*functor);
}
template<typename FT>
void J2OMPTarget<FT>::checkOutVariables(const opt_variables_type& active)
void TwoBodyJastrow<FT>::checkOutVariables(const opt_variables_type& active)
{
myVars.clear();
for (auto& [key, functor] : J2Unique)
@ -169,7 +169,7 @@ void J2OMPTarget<FT>::checkOutVariables(const opt_variables_type& active)
}
template<typename FT>
void J2OMPTarget<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
void TwoBodyJastrow<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
{
for (int k = 0; k < ratios.size(); ++k)
ratios[k] =
@ -177,14 +177,20 @@ void J2OMPTarget<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<V
}
template<typename FT>
void J2OMPTarget<FT>::mw_evaluateRatios(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
std::vector<std::vector<ValueType>>& ratios) const
void TwoBodyJastrow<FT>::mw_evaluateRatios(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
std::vector<std::vector<ValueType>>& ratios) const
{
if (!use_offload_)
{
WaveFunctionComponent::mw_evaluateRatios(wfc_list, vp_list, ratios);
return;
}
// add early return to prevent from accessing vp_list[0]
if (wfc_list.size() == 0)
return;
auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
auto& vp_leader = vp_list.getLeader();
const auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls();
auto& mw_vals = wfc_leader.mw_mem_->mw_vals;
@ -206,7 +212,7 @@ void J2OMPTarget<FT>::mw_evaluateRatios(const RefVectorWithLeader<WaveFunctionCo
for (int iw = 0; iw < nw; ++iw)
{
const VirtualParticleSet& vp = vp_list[iw];
const auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
const auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
for (int k = 0; k < vp.getTotalNum(); ++k, ivp++)
ratios[iw][k] = std::exp(wfc.Uat[mw_refPctls[ivp]] - mw_vals[ivp]);
}
@ -214,7 +220,7 @@ void J2OMPTarget<FT>::mw_evaluateRatios(const RefVectorWithLeader<WaveFunctionCo
}
template<typename FT>
void J2OMPTarget<FT>::registerData(ParticleSet& P, WFBufferType& buf)
void TwoBodyJastrow<FT>::registerData(ParticleSet& P, WFBufferType& buf)
{
if (Bytes_in_WFBuffer == 0)
{
@ -235,7 +241,7 @@ void J2OMPTarget<FT>::registerData(ParticleSet& P, WFBufferType& buf)
}
template<typename FT>
void J2OMPTarget<FT>::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
void TwoBodyJastrow<FT>::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
{
Uat.attachReference(buf.lendReference<valT>(N), N);
dUat.attachReference(N, N_padded, buf.lendReference<valT>(N_padded * DIM));
@ -243,9 +249,9 @@ void J2OMPTarget<FT>::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
}
template<typename FT>
typename J2OMPTarget<FT>::LogValueType J2OMPTarget<FT>::updateBuffer(ParticleSet& P,
WFBufferType& buf,
bool fromscratch)
typename TwoBodyJastrow<FT>::LogValueType TwoBodyJastrow<FT>::updateBuffer(ParticleSet& P,
WFBufferType& buf,
bool fromscratch)
{
log_value_ = computeGL(P.G, P.L);
buf.forward(Bytes_in_WFBuffer);
@ -253,7 +259,7 @@ typename J2OMPTarget<FT>::LogValueType J2OMPTarget<FT>::updateBuffer(ParticleSet
}
template<typename FT>
typename J2OMPTarget<FT>::valT J2OMPTarget<FT>::computeU(const ParticleSet& P, int iat, const DistRow& dist)
typename TwoBodyJastrow<FT>::valT TwoBodyJastrow<FT>::computeU(const ParticleSet& P, int iat, const DistRow& dist)
{
valT curUat(0);
const int igt = P.GroupID[iat] * NumGroups;
@ -268,7 +274,7 @@ typename J2OMPTarget<FT>::valT J2OMPTarget<FT>::computeU(const ParticleSet& P, i
}
template<typename FT>
typename J2OMPTarget<FT>::posT J2OMPTarget<FT>::accumulateG(const valT* restrict du, const DisplRow& displ) const
typename TwoBodyJastrow<FT>::posT TwoBodyJastrow<FT>::accumulateG(const valT* restrict du, const DisplRow& displ) const
{
posT grad;
for (int idim = 0; idim < DIM; ++idim)
@ -285,16 +291,17 @@ typename J2OMPTarget<FT>::posT J2OMPTarget<FT>::accumulateG(const valT* restrict
}
template<typename FT>
J2OMPTarget<FT>::J2OMPTarget(const std::string& obj_name, ParticleSet& p)
TwoBodyJastrow<FT>::TwoBodyJastrow(const std::string& obj_name, ParticleSet& p, bool use_offload)
: WaveFunctionComponent(obj_name),
N(p.getTotalNum()),
N_padded(getAlignedSize<valT>(N)),
NumGroups(p.groups()),
use_offload_(use_offload),
N_padded(getAlignedSize<valT>(N)),
my_table_ID_(p.addTable(p)),
j2_ke_corr_helper(p, F)
{
if (my_name_.empty())
throw std::runtime_error("J2OMPTarget object name cannot be empty!");
throw std::runtime_error("TwoBodyJastrow object name cannot be empty!");
F.resize(NumGroups * NumGroups, nullptr);
@ -313,10 +320,10 @@ J2OMPTarget<FT>::J2OMPTarget(const std::string& obj_name, ParticleSet& p)
}
template<typename FT>
J2OMPTarget<FT>::~J2OMPTarget() = default;
TwoBodyJastrow<FT>::~TwoBodyJastrow() = default;
template<typename FT>
void J2OMPTarget<FT>::resizeInternalStorage()
void TwoBodyJastrow<FT>::resizeInternalStorage()
{
Uat.resize(N);
dUat.resize(N);
@ -333,7 +340,7 @@ void J2OMPTarget<FT>::resizeInternalStorage()
}
template<typename FT>
void J2OMPTarget<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
void TwoBodyJastrow<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
{
assert(ia < NumGroups);
assert(ib < NumGroups);
@ -367,9 +374,9 @@ void J2OMPTarget<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
}
template<typename FT>
std::unique_ptr<WaveFunctionComponent> J2OMPTarget<FT>::makeClone(ParticleSet& tqp) const
std::unique_ptr<WaveFunctionComponent> TwoBodyJastrow<FT>::makeClone(ParticleSet& tqp) const
{
auto j2copy = std::make_unique<J2OMPTarget<FT>>(my_name_, tqp);
auto j2copy = std::make_unique<TwoBodyJastrow<FT>>(my_name_, tqp, use_offload_);
std::map<const FT*, FT*> fcmap;
for (int ig = 0; ig < NumGroups; ++ig)
for (int jg = ig; jg < NumGroups; ++jg)
@ -403,13 +410,13 @@ std::unique_ptr<WaveFunctionComponent> J2OMPTarget<FT>::makeClone(ParticleSet& t
* @param d2u starting second deriv
*/
template<typename FT>
void J2OMPTarget<FT>::computeU3(const ParticleSet& P,
int iat,
const DistRow& dist,
RealType* restrict u,
RealType* restrict du,
RealType* restrict d2u,
bool triangle)
void TwoBodyJastrow<FT>::computeU3(const ParticleSet& P,
int iat,
const DistRow& dist,
RealType* restrict u,
RealType* restrict du,
RealType* restrict d2u,
bool triangle)
{
const int jelmax = triangle ? iat : N;
constexpr valT czero(0);
@ -431,7 +438,7 @@ void J2OMPTarget<FT>::computeU3(const ParticleSet& P,
}
template<typename FT>
typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratio(ParticleSet& P, int iat)
typename TwoBodyJastrow<FT>::PsiValueType TwoBodyJastrow<FT>::ratio(ParticleSet& P, int iat)
{
//only ratio, ready to compute it again
UpdateMode = ORB_PBYP_RATIO;
@ -440,14 +447,20 @@ typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratio(ParticleSet& P, in
}
template<typename FT>
void J2OMPTarget<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
int iat,
std::vector<PsiValueType>& ratios) const
void TwoBodyJastrow<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
int iat,
std::vector<PsiValueType>& ratios) const
{
if (!use_offload_)
{
WaveFunctionComponent::mw_calcRatio(wfc_list, p_list, iat, ratios);
return;
}
//right now. Directly use FT::mw_evaluateVGL implementation.
assert(this == &wfc_list.getLeader());
auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
auto& p_leader = p_list.getLeader();
const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_);
const int nw = wfc_list.size();
@ -464,7 +477,7 @@ void J2OMPTarget<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionCompone
for (int iw = 0; iw < nw; iw++)
{
auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
wfc.cur_Uat = mw_vgl[iw][0];
ratios[iw] = std::exp(static_cast<PsiValueType>(wfc.Uat[iat] - wfc.cur_Uat));
}
@ -472,7 +485,7 @@ void J2OMPTarget<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionCompone
template<typename FT>
void J2OMPTarget<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
void TwoBodyJastrow<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
{
const auto& d_table = P.getDistTableAA(my_table_ID_);
const auto& dist = d_table.getTempDists();
@ -499,13 +512,13 @@ void J2OMPTarget<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueTy
}
template<typename FT>
typename J2OMPTarget<FT>::GradType J2OMPTarget<FT>::evalGrad(ParticleSet& P, int iat)
typename TwoBodyJastrow<FT>::GradType TwoBodyJastrow<FT>::evalGrad(ParticleSet& P, int iat)
{
return GradType(dUat[iat]);
}
template<typename FT>
typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
typename TwoBodyJastrow<FT>::PsiValueType TwoBodyJastrow<FT>::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
{
UpdateMode = ORB_PBYP_PARTIAL;
@ -517,14 +530,20 @@ typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratioGrad(ParticleSet& P
}
template<typename FT>
void J2OMPTarget<FT>::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
int iat,
std::vector<PsiValueType>& ratios,
std::vector<GradType>& grad_new) const
void TwoBodyJastrow<FT>::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
int iat,
std::vector<PsiValueType>& ratios,
std::vector<GradType>& grad_new) const
{
if (!use_offload_)
{
WaveFunctionComponent::mw_ratioGrad(wfc_list, p_list, iat, ratios, grad_new);
return;
}
assert(this == &wfc_list.getLeader());
auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
auto& p_leader = p_list.getLeader();
const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_);
const int nw = wfc_list.size();
@ -541,7 +560,7 @@ void J2OMPTarget<FT>::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionCompone
for (int iw = 0; iw < nw; iw++)
{
auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
wfc.cur_Uat = mw_vgl[iw][0];
ratios[iw] = std::exp(static_cast<PsiValueType>(wfc.Uat[iat] - wfc.cur_Uat));
for (int idim = 0; idim < DIM; idim++)
@ -550,7 +569,7 @@ void J2OMPTarget<FT>::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionCompone
}
template<typename FT>
void J2OMPTarget<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
void TwoBodyJastrow<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
{
// get the old u, du, d2u
const auto& d_table = P.getDistTableAA(my_table_ID_);
@ -601,14 +620,20 @@ void J2OMPTarget<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
}
template<typename FT>
void J2OMPTarget<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
int iat,
const std::vector<bool>& isAccepted,
bool safe_to_delay) const
void TwoBodyJastrow<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
int iat,
const std::vector<bool>& isAccepted,
bool safe_to_delay) const
{
if (!use_offload_)
{
WaveFunctionComponent::mw_accept_rejectMove(wfc_list, p_list, iat, isAccepted, safe_to_delay);
return;
}
assert(this == &wfc_list.getLeader());
auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
auto& p_leader = p_list.getLeader();
const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_);
const int nw = wfc_list.size();
@ -620,7 +645,7 @@ void J2OMPTarget<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctio
for (int iw = 0; iw < nw; iw++)
{
auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
wfc.log_value_ += wfc.Uat[iat] - mw_vgl[iw][0];
}
@ -631,7 +656,7 @@ void J2OMPTarget<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctio
}
template<typename FT>
void J2OMPTarget<FT>::recompute(const ParticleSet& P)
void TwoBodyJastrow<FT>::recompute(const ParticleSet& P)
{
const auto& d_table = P.getDistTableAA(my_table_ID_);
for (int ig = 0; ig < NumGroups; ++ig)
@ -681,11 +706,17 @@ void J2OMPTarget<FT>::recompute(const ParticleSet& P)
}
template<typename FT>
void J2OMPTarget<FT>::mw_recompute(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
const std::vector<bool>& recompute) const
void TwoBodyJastrow<FT>::mw_recompute(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
const std::vector<bool>& recompute) const
{
auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
if (!use_offload_)
{
WaveFunctionComponent::mw_recompute(wfc_list, p_list, recompute);
return;
}
auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
assert(this == &wfc_leader);
for (int iw = 0; iw < wfc_list.size(); iw++)
if (recompute[iw])
@ -694,36 +725,42 @@ void J2OMPTarget<FT>::mw_recompute(const RefVectorWithLeader<WaveFunctionCompone
}
template<typename FT>
typename J2OMPTarget<FT>::LogValueType J2OMPTarget<FT>::evaluateLog(const ParticleSet& P,
ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L)
typename TwoBodyJastrow<FT>::LogValueType TwoBodyJastrow<FT>::evaluateLog(const ParticleSet& P,
ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L)
{
recompute(P);
return log_value_ = computeGL(G, L);
}
template<typename FT>
void J2OMPTarget<FT>::mw_evaluateLog(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
const RefVector<ParticleSet::ParticleGradient>& G_list,
const RefVector<ParticleSet::ParticleLaplacian>& L_list) const
void TwoBodyJastrow<FT>::mw_evaluateLog(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
const RefVector<ParticleSet::ParticleGradient>& G_list,
const RefVector<ParticleSet::ParticleLaplacian>& L_list) const
{
if (!use_offload_)
{
WaveFunctionComponent::mw_evaluateLog(wfc_list, p_list, G_list, L_list);
return;
}
assert(this == &wfc_list.getLeader());
const std::vector<bool> recompute_all(wfc_list.size(), true);
mw_recompute(wfc_list, p_list, recompute_all);
for (int iw = 0; iw < wfc_list.size(); iw++)
{
auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
wfc.log_value_ = wfc.computeGL(G_list[iw], L_list[iw]);
}
}
template<typename FT>
typename J2OMPTarget<FT>::QTFull::RealType J2OMPTarget<FT>::computeGL(ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L) const
typename TwoBodyJastrow<FT>::QTFull::RealType TwoBodyJastrow<FT>::computeGL(ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L) const
{
for (int iat = 0; iat < N; ++iat)
{
@ -734,31 +771,37 @@ typename J2OMPTarget<FT>::QTFull::RealType J2OMPTarget<FT>::computeGL(ParticleSe
}
template<typename FT>
WaveFunctionComponent::LogValueType J2OMPTarget<FT>::evaluateGL(const ParticleSet& P,
ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L,
bool fromscratch)
WaveFunctionComponent::LogValueType TwoBodyJastrow<FT>::evaluateGL(const ParticleSet& P,
ParticleSet::ParticleGradient& G,
ParticleSet::ParticleLaplacian& L,
bool fromscratch)
{
return log_value_ = computeGL(G, L);
}
template<typename FT>
void J2OMPTarget<FT>::mw_evaluateGL(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
const RefVector<ParticleSet::ParticleGradient>& G_list,
const RefVector<ParticleSet::ParticleLaplacian>& L_list,
bool fromscratch) const
void TwoBodyJastrow<FT>::mw_evaluateGL(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
const RefVectorWithLeader<ParticleSet>& p_list,
const RefVector<ParticleSet::ParticleGradient>& G_list,
const RefVector<ParticleSet::ParticleLaplacian>& L_list,
bool fromscratch) const
{
if (!use_offload_)
{
WaveFunctionComponent::mw_evaluateGL(wfc_list, p_list, G_list, L_list, fromscratch);
return;
}
assert(this == &wfc_list.getLeader());
for (int iw = 0; iw < wfc_list.size(); iw++)
{
auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
wfc.log_value_ = wfc.computeGL(G_list[iw], L_list[iw]);
}
}
template<typename FT>
void J2OMPTarget<FT>::evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi)
void TwoBodyJastrow<FT>::evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi)
{
log_value_ = 0.0;
const auto& d_ee(P.getDistTableAA(my_table_ID_));
@ -790,10 +833,10 @@ void J2OMPTarget<FT>::evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi)
}
template<typename FT>
void J2OMPTarget<FT>::evaluateDerivatives(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi,
Vector<ValueType>& dhpsioverpsi)
void TwoBodyJastrow<FT>::evaluateDerivatives(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi,
Vector<ValueType>& dhpsioverpsi)
{
if (myVars.size() == 0)
return;
@ -826,9 +869,9 @@ void J2OMPTarget<FT>::evaluateDerivatives(ParticleSet& P,
}
template<typename FT>
void J2OMPTarget<FT>::evaluateDerivativesWF(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi)
void TwoBodyJastrow<FT>::evaluateDerivativesWF(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi)
{
if (myVars.size() == 0)
return;
@ -924,10 +967,10 @@ void J2OMPTarget<FT>::evaluateDerivativesWF(ParticleSet& P,
}
template<typename FT>
void J2OMPTarget<FT>::evaluateDerivRatios(const VirtualParticleSet& VP,
const opt_variables_type& optvars,
std::vector<ValueType>& ratios,
Matrix<ValueType>& dratios)
void TwoBodyJastrow<FT>::evaluateDerivRatios(const VirtualParticleSet& VP,
const opt_variables_type& optvars,
std::vector<ValueType>& ratios,
Matrix<ValueType>& dratios)
{
evaluateRatios(VP, ratios);
if (myVars.size() == 0)
@ -994,7 +1037,8 @@ void J2OMPTarget<FT>::evaluateDerivRatios(const VirtualParticleSet& VP,
}
}
template class J2OMPTarget<BsplineFunctor<QMCTraits::RealType>>;
template class J2OMPTarget<PadeFunctor<QMCTraits::RealType>>;
template class J2OMPTarget<UserFunctor<QMCTraits::RealType>>;
template class TwoBodyJastrow<BsplineFunctor<QMCTraits::RealType>>;
template class TwoBodyJastrow<PadeFunctor<QMCTraits::RealType>>;
template class TwoBodyJastrow<UserFunctor<QMCTraits::RealType>>;
template class TwoBodyJastrow<FakeFunctor<QMCTraits::RealType>>;
} // namespace qmcplusplus

View File

@ -28,12 +28,13 @@
#include "BsplineFunctor.h"
#include "PadeFunctors.h"
#include "UserFunctor.h"
#include "FakeFunctor.h"
namespace qmcplusplus
{
template<typename T>
struct J2OMPTargetMultiWalkerMem;
struct TwoBodyJastrowMultiWalkerMem;
/** @ingroup WaveFunctionComponent
* @brief Specialization for two-body Jastrow function using multiple functors
@ -42,7 +43,7 @@ struct J2OMPTargetMultiWalkerMem;
* For electrons, distinct pair correlation functions are used
* for spins up-up/down-down and up-down/down-up.
*
* Based on J2OMPTarget.h with these considerations
* Based on TwoBodyJastrow.h with these considerations
* - DistanceTable using SoA containers
* - support mixed precision: FT::real_type != OHMMS_PRECISION
* - loops over the groups: elminated PairID
@ -51,7 +52,7 @@ struct J2OMPTargetMultiWalkerMem;
* - Memory use is O(N).
*/
template<class FT>
class J2OMPTarget : public WaveFunctionComponent
class TwoBodyJastrow : public WaveFunctionComponent
{
public:
///alias FuncType
@ -67,16 +68,21 @@ public:
using GradDerivVec = ParticleAttrib<QTFull::GradType>;
using ValueDerivVec = ParticleAttrib<QTFull::ValueType>;
protected:
///number of particles
const size_t N;
///number of groups of the target particleset
const size_t NumGroups;
private:
/// if true use offload
const bool use_offload_;
/** initialize storage Uat,dUat, d2Uat */
void resizeInternalStorage();
///number of particles
const size_t N;
///number of particles + padded
const size_t N_padded;
///number of groups of the target particleset
const size_t NumGroups;
/// the group_id of each particle
Vector<int, OffloadPinnedAllocator<int>> grp_ids;
///diff value
@ -110,7 +116,7 @@ private:
std::vector<GradDerivVec> gradLogPsi;
std::vector<ValueDerivVec> lapLogPsi;
std::unique_ptr<J2OMPTargetMultiWalkerMem<RealType>> mw_mem_;
std::unique_ptr<TwoBodyJastrowMultiWalkerMem<RealType>> mw_mem_;
void resizeWFOptVectors()
{
@ -139,9 +145,9 @@ private:
/**@} */
public:
J2OMPTarget(const std::string& obj_name, ParticleSet& p);
J2OMPTarget(const J2OMPTarget& rhs) = delete;
~J2OMPTarget() override;
TwoBodyJastrow(const std::string& obj_name, ParticleSet& p, bool use_offload);
TwoBodyJastrow(const TwoBodyJastrow& rhs) = delete;
~TwoBodyJastrow() override;
/** add functor for (ia,ib) pair */
void addFunc(int ia, int ib, std::unique_ptr<FT> j);
@ -154,7 +160,7 @@ public:
void releaseResource(ResourceCollection& collection,
const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const override;
std::string getClassName() const override { return "J2OMPTarget"; }
std::string getClassName() const override { return "TwoBodyJastrow"; }
bool isOptimizable() const override { return true; }
void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override;
/** check out optimizable variables
@ -236,14 +242,17 @@ public:
const std::vector<FT*>& getPairFunctions() const { return F; }
// Accessors for unit testing
std::pair<int, int> getComponentOffset(int index) { return OffSet.at(index); }
opt_variables_type& getComponentVars() { return myVars; }
void evaluateDerivatives(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi,
Vector<ValueType>& dhpsioverpsi) override;
void evaluateDerivativesWF(ParticleSet& P,
const opt_variables_type& active,
Vector<ValueType>& dlogpsi) override;
void evaluateDerivativesWF(ParticleSet& P, const opt_variables_type& active, Vector<ValueType>& dlogpsi) override;
void evaluateDerivRatios(const VirtualParticleSet& VP,
const opt_variables_type& optvars,
@ -251,8 +260,9 @@ public:
Matrix<ValueType>& dratios) override;
};
extern template class J2OMPTarget<BsplineFunctor<QMCTraits::RealType>>;
extern template class J2OMPTarget<PadeFunctor<QMCTraits::RealType>>;
extern template class J2OMPTarget<UserFunctor<QMCTraits::RealType>>;
extern template class TwoBodyJastrow<BsplineFunctor<QMCTraits::RealType>>;
extern template class TwoBodyJastrow<PadeFunctor<QMCTraits::RealType>>;
extern template class TwoBodyJastrow<UserFunctor<QMCTraits::RealType>>;
extern template class TwoBodyJastrow<FakeFunctor<QMCTraits::RealType>>;
} // namespace qmcplusplus
#endif

View File

@ -14,7 +14,7 @@
//////////////////////////////////////////////////////////////////////////////////////
#include "TwoBodyJastrowOrbitalBspline.h"
#include "TwoBodyJastrowCUDA.h"
#include "CudaSpline.h"
#include "Lattice/ParticleBConds.h"
#include "QMCWaveFunctions/detail/CUDA_legacy/BsplineJastrowCuda.h"
@ -25,7 +25,7 @@
namespace qmcplusplus
{
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::freeGPUmem()
void TwoBodyJastrowCUDA<FT>::freeGPUmem()
{
UpdateListGPU.clear();
SumGPU.clear();
@ -43,18 +43,18 @@ void TwoBodyJastrowOrbitalBspline<FT>::freeGPUmem()
};
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::recompute(MCWalkerConfiguration& W, bool firstTime)
void TwoBodyJastrowCUDA<FT>::recompute(MCWalkerConfiguration& W, bool firstTime)
{}
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::reserve(PointerPool<gpu::device_vector<CTS::RealType>>& pool)
void TwoBodyJastrowCUDA<FT>::reserve(PointerPool<gpu::device_vector<CTS::RealType>>& pool)
{}
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
void TwoBodyJastrowCUDA<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
{
CudaSpline<CTS::RealType>* newSpline = new CudaSpline<CTS::RealType>(*j);
J2OrbitalSoA<BsplineFunctor<WaveFunctionComponent::RealType>>::addFunc(ia, ib, std::move(j));
JBase::addFunc(ia, ib, std::move(j));
UniqueSplines.push_back(newSpline);
if (ia == ib)
{
@ -83,7 +83,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::addFunc(int ia, int ib, std::unique_ptr<F
}
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::addLog(MCWalkerConfiguration& W, std::vector<RealType>& logPsi)
void TwoBodyJastrowCUDA<FT>::addLog(MCWalkerConfiguration& W, std::vector<RealType>& logPsi)
{
auto& walkers = W.WalkerList;
if (SumGPU.size() < 4 * walkers.size())
@ -144,11 +144,11 @@ void TwoBodyJastrowOrbitalBspline<FT>::addLog(MCWalkerConfiguration& W, std::vec
}
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::update(MCWalkerConfiguration* W,
std::vector<Walker_t*>& walkers,
int iat,
std::vector<bool>* acc,
int k)
void TwoBodyJastrowCUDA<FT>::update(MCWalkerConfiguration* W,
std::vector<Walker_t*>& walkers,
int iat,
std::vector<bool>* acc,
int k)
{
// for (int iw=0; iw<walkers.size(); iw++)
// UpdateListHost[iw] = (CTS::RealType*)walkers[iw]->R_GPU.data();
@ -160,11 +160,11 @@ void TwoBodyJastrowOrbitalBspline<FT>::update(MCWalkerConfiguration* W,
// This currently does not actually compute the gradient or laplacian
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::ratio(MCWalkerConfiguration& W,
int iat,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
void TwoBodyJastrowCUDA<FT>::ratio(MCWalkerConfiguration& W,
int iat,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
{
auto& walkers = W.WalkerList;
int N = W.Rnew_GPU.size();
@ -245,11 +245,11 @@ void TwoBodyJastrowOrbitalBspline<FT>::ratio(MCWalkerConfiguration& W,
// This currently does not actually compute the gradient or laplacian
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::calcRatio(MCWalkerConfiguration& W,
int iat,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
void TwoBodyJastrowCUDA<FT>::calcRatio(MCWalkerConfiguration& W,
int iat,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
{
auto& walkers = W.WalkerList;
int N = W.Rnew_GPU.size();
@ -317,12 +317,12 @@ void TwoBodyJastrowOrbitalBspline<FT>::calcRatio(MCWalkerConfiguration& W,
}
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::addRatio(MCWalkerConfiguration& W,
int iat,
int k,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
void TwoBodyJastrowCUDA<FT>::addRatio(MCWalkerConfiguration& W,
int iat,
int k,
std::vector<ValueType>& psi_ratios,
std::vector<GradType>& grad,
std::vector<ValueType>& lapl)
{
#ifndef CPU_RATIO
auto& walkers = W.WalkerList;
@ -345,10 +345,10 @@ void TwoBodyJastrowOrbitalBspline<FT>::addRatio(MCWalkerConfiguration& W,
}
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::NLratios(MCWalkerConfiguration& W,
std::vector<NLjob>& jobList,
std::vector<PosType>& quadPoints,
std::vector<ValueType>& psi_ratios)
void TwoBodyJastrowCUDA<FT>::NLratios(MCWalkerConfiguration& W,
std::vector<NLjob>& jobList,
std::vector<PosType>& quadPoints,
std::vector<ValueType>& psi_ratios)
{
CTS::RealType sim_cell_radius = W.getLattice().SimulationCellRadius;
auto& walkers = W.WalkerList;
@ -419,10 +419,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::NLratios(MCWalkerConfiguration& W,
}
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::calcGradient(MCWalkerConfiguration& W,
int iat,
int k,
std::vector<GradType>& grad)
void TwoBodyJastrowCUDA<FT>::calcGradient(MCWalkerConfiguration& W, int iat, int k, std::vector<GradType>& grad)
{
CTS::RealType sim_cell_radius = W.getLattice().SimulationCellRadius;
auto& walkers = W.WalkerList;
@ -451,7 +448,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::calcGradient(MCWalkerConfiguration& W,
}
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::addGradient(MCWalkerConfiguration& W, int iat, std::vector<GradType>& grad)
void TwoBodyJastrowCUDA<FT>::addGradient(MCWalkerConfiguration& W, int iat, std::vector<GradType>& grad)
{
auto& walkers = W.WalkerList;
cudaCheck(cudaEventSynchronize(gpu::gradientSyncTwoBodyEvent));
@ -479,7 +476,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::addGradient(MCWalkerConfiguration& W, int
}
template<class FT>
void TwoBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMatrix& grad, ValueMatrix& lapl)
void TwoBodyJastrowCUDA<FT>::gradLapl(MCWalkerConfiguration& W, GradMatrix& grad, ValueMatrix& lapl)
{
CTS::RealType sim_cell_radius = W.getLattice().SimulationCellRadius;
auto& walkers = W.WalkerList;
@ -573,7 +570,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMa
{
char buff[500];
gethostname(buff, 500);
fprintf(stderr, "NAN in TwoBodyJastrowOrbitalBspline laplacian. Host=%s\n", buff);
fprintf(stderr, "NAN in TwoBodyJastrowCUDA laplacian. Host=%s\n", buff);
abort();
}
lapl(iw, ptcl) += GradLaplHost[4 * this->N * iw + +4 * ptcl + 3];
@ -582,7 +579,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMa
}
// explicit instantiations of templates
template class TwoBodyJastrowOrbitalBspline<BsplineFunctor<WaveFunctionComponent::RealType>>;
template class TwoBodyJastrowCUDA<BsplineFunctor<WaveFunctionComponent::RealType>>;
} // namespace qmcplusplus

View File

@ -17,7 +17,7 @@
#define TWO_BODY_JASTROW_ORBITAL_BSPLINE_H
#include "Particle/DistanceTable.h"
#include "QMCWaveFunctions/Jastrow/J2OrbitalSoA.h"
#include "QMCWaveFunctions/Jastrow/TwoBodyJastrow.h"
#include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
#include "Configuration.h"
#include "QMCWaveFunctions/Jastrow/CudaSpline.h"
@ -26,7 +26,7 @@
namespace qmcplusplus
{
template<class FT>
class TwoBodyJastrowOrbitalBspline : public J2OrbitalSoA<FT>
class TwoBodyJastrowCUDA : public TwoBodyJastrow<FT>
{
private:
bool UsePBC;
@ -37,7 +37,7 @@ private:
// The following is so we can refer to type aliases(defs) below the
// templated base class in the object hierarchy
// Mostly QMCTraits here
using JBase = J2OrbitalSoA<FT>;
using JBase = TwoBodyJastrow<FT>;
// Duplication that should be removed
using RealType = typename JBase::RealType;
using ValueType = typename JBase::ValueType;
@ -143,8 +143,8 @@ public:
std::vector<PosType>& quadPoints,
std::vector<ValueType>& psi_ratios) override;
TwoBodyJastrowOrbitalBspline(const std::string& obj_name, ParticleSet& pset)
: J2OrbitalSoA<FT>(obj_name, pset),
TwoBodyJastrowCUDA(const std::string& obj_name, ParticleSet& pset, bool use_offload)
: TwoBodyJastrow<FT>(obj_name, pset, use_offload),
PtclRef(pset),
L(obj_name + "L"),
Linv(obj_name + "Linv"),
@ -164,8 +164,7 @@ public:
{
UsePBC = pset.getLattice().SuperCellEnum;
app_log() << "UsePBC = " << UsePBC << std::endl;
int nsp = this->NumGroups = pset.groups();
GPUSplines.resize(nsp * nsp, 0);
GPUSplines.resize(this->NumGroups * this->NumGroups, 0);
if (UsePBC)
{
gpu::host_vector<CTS::RealType> LHost(OHMMS_DIM * OHMMS_DIM), LinvHost(OHMMS_DIM * OHMMS_DIM);

View File

@ -11,9 +11,22 @@
//////////////////////////////////////////////////////////////////////////////////////
#include "SpinorSet.h"
#include "Utilities/ResourceCollection.h"
#include "Platforms/OMPTarget/OMPTargetMath.hpp"
namespace qmcplusplus
{
struct SpinorSet::SpinorSetMultiWalkerResource : public Resource
{
SpinorSetMultiWalkerResource() : Resource("SpinorSet") {}
SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : SpinorSetMultiWalkerResource() {}
Resource* makeClone() const override { return new SpinorSetMultiWalkerResource(*this); }
OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v;
std::vector<ValueType> up_ratios, dn_ratios;
std::vector<GradType> up_grads, dn_grads;
std::vector<RealType> spins;
};
SpinorSet::SpinorSet(const std::string& my_name) : SPOSet(my_name), spo_up(nullptr), spo_dn(nullptr) {}
SpinorSet::~SpinorSet() = default;
@ -139,13 +152,10 @@ void SpinorSet::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSet>& spo_li
auto& P_leader = P_list.getLeader();
assert(this == &spo_leader);
IndexType nw = spo_list.size();
SPOSet& up_spo_leader = *(spo_leader.spo_up);
SPOSet& dn_spo_leader = *(spo_leader.spo_dn);
RefVectorWithLeader<SPOSet> up_spo_list(up_spo_leader);
RefVectorWithLeader<SPOSet> dn_spo_list(dn_spo_leader);
up_spo_list.reserve(nw);
dn_spo_list.reserve(nw);
IndexType nw = spo_list.size();
auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
auto& up_spo_leader = up_spo_list.getLeader();
auto& dn_spo_leader = dn_spo_list.getLeader();
std::vector<ValueVector> mw_up_psi_work, mw_dn_psi_work;
std::vector<GradVector> mw_up_dpsi_work, mw_dn_dpsi_work;
@ -171,10 +181,6 @@ void SpinorSet::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSet>& spo_li
GradVector tmp_grad_vec(OrbitalSetSize);
for (int iw = 0; iw < nw; iw++)
{
SpinorSet& spinor = spo_list.getCastedElement<SpinorSet>(iw);
up_spo_list.emplace_back(*(spinor.spo_up));
dn_spo_list.emplace_back(*(spinor.spo_dn));
mw_up_psi_work.emplace_back(tmp_val_vec);
up_psi_v_list.emplace_back(mw_up_psi_work.back());
mw_dn_psi_work.emplace_back(tmp_val_vec);
@ -228,42 +234,35 @@ void SpinorSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader
const size_t nw = spo_list.size();
const size_t norb_requested = phi_vgl_v.size(2);
OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v;
auto& mw_res = *spo_leader.mw_res_;
auto& up_phi_vgl_v = mw_res.up_phi_vgl_v;
auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v;
auto& up_ratios = mw_res.up_ratios;
auto& dn_ratios = mw_res.dn_ratios;
auto& up_grads = mw_res.up_grads;
auto& dn_grads = mw_res.dn_grads;
auto& spins = mw_res.spins;
up_phi_vgl_v.resize(DIM_VGL, nw, norb_requested);
dn_phi_vgl_v.resize(DIM_VGL, nw, norb_requested);
std::vector<ValueType> up_ratios(nw), dn_ratios(nw);
std::vector<GradType> up_grads(nw), dn_grads(nw);
up_ratios.resize(nw);
dn_ratios.resize(nw);
up_grads.resize(nw);
dn_grads.resize(nw);
spins.resize(nw);
SPOSet& up_spo_leader = *(spo_leader.spo_up);
SPOSet& dn_spo_leader = *(spo_leader.spo_dn);
RefVectorWithLeader<SPOSet> up_spo_list(up_spo_leader);
RefVectorWithLeader<SPOSet> dn_spo_list(dn_spo_leader);
up_spo_list.reserve(nw);
dn_spo_list.reserve(nw);
for (int iw = 0; iw < nw; iw++)
{
SpinorSet& spinor = spo_list.getCastedElement<SpinorSet>(iw);
up_spo_list.emplace_back(*(spinor.spo_up));
dn_spo_list.emplace_back(*(spinor.spo_dn));
}
auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
auto& up_spo_leader = up_spo_list.getLeader();
auto& dn_spo_leader = dn_spo_list.getLeader();
up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, invRow_ptr_list, up_phi_vgl_v, up_ratios,
up_grads);
if (up_spo_leader.isOMPoffload())
up_phi_vgl_v.updateFrom();
dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, invRow_ptr_list, dn_phi_vgl_v, dn_ratios,
dn_grads);
if (dn_spo_leader.isOMPoffload())
dn_phi_vgl_v.updateFrom();
//To do: this is not optimized. Right now, we are building the spinors on the CPU and then updating the data to the GPU at the end
//with the updateTo(). Need to eventually rework this to be all on the GPU to avoid these data transfers, but this should work as an
//initial implementation
for (int iw = 0; iw < nw; iw++)
{
ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
spins[iw] = s;
RealType coss = std::cos(s);
RealType sins = std::sin(s);
@ -274,20 +273,27 @@ void SpinorSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader
ratios[iw] = eis * up_ratios[iw] + emis * dn_ratios[iw];
grads[iw] = (eis * up_grads[iw] * up_ratios[iw] + emis * dn_grads[iw] * dn_ratios[iw]) / ratios[iw];
spingrads[iw] = eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw];
//loop over vgl to construct spinor vgl
for (int idim = 0; idim < DIM_VGL; idim++)
{
ValueType* phi_v = phi_vgl_v.data_at(idim, iw, 0);
ValueType* up_phi_v = up_phi_vgl_v.data_at(idim, iw, 0);
ValueType* dn_phi_v = dn_phi_vgl_v.data_at(idim, iw, 0);
for (int iorb = 0; iorb < norb_requested; iorb++)
phi_v[iorb] = eis * up_phi_v[iorb] + emis * dn_phi_v[iorb];
}
}
//Now update data to device
phi_vgl_v.updateTo();
auto* spins_ptr = spins.data();
//This data lives on the device
auto* phi_vgl_ptr = phi_vgl_v.data();
auto* up_phi_vgl_ptr = up_phi_vgl_v.data();
auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data();
PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])")
for (int iw = 0; iw < nw; iw++)
{
RealType c, s;
omptarget::sincos(spins_ptr[iw], &s, &c);
ValueType eis(c, s), emis(c, -s);
PRAGMA_OFFLOAD("omp parallel for collapse(2)")
for (int idim = 0; idim < DIM_VGL; idim++)
for (int iorb = 0; iorb < norb_requested; iorb++)
{
auto offset = idim * nw * norb_requested + iw * norb_requested + iorb;
phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + emis * dn_phi_vgl_ptr[offset];
}
}
}
void SpinorSet::evaluate_notranspose(const ParticleSet& P,
@ -348,12 +354,9 @@ void SpinorSet::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSet>& spo_l
IndexType nw = spo_list.size();
IndexType nelec = P_leader.getTotalNum();
SPOSet& up_spo_leader = *(spo_leader.spo_up);
SPOSet& dn_spo_leader = *(spo_leader.spo_dn);
RefVectorWithLeader<SPOSet> up_spo_list(up_spo_leader);
RefVectorWithLeader<SPOSet> dn_spo_list(dn_spo_leader);
up_spo_list.reserve(nw);
dn_spo_list.reserve(nw);
auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
auto& up_spo_leader = up_spo_list.getLeader();
auto& dn_spo_leader = dn_spo_list.getLeader();
std::vector<ValueMatrix> mw_up_logdet, mw_dn_logdet;
std::vector<GradMatrix> mw_up_dlogdet, mw_dn_dlogdet;
@ -379,10 +382,6 @@ void SpinorSet::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSet>& spo_l
GradMatrix tmp_grad_mat(nelec, OrbitalSetSize);
for (int iw = 0; iw < nw; iw++)
{
SpinorSet& spinor = spo_list.getCastedElement<SpinorSet>(iw);
up_spo_list.emplace_back(*(spinor.spo_up));
dn_spo_list.emplace_back(*(spinor.spo_dn));
mw_up_logdet.emplace_back(tmp_val_mat);
up_logdet_list.emplace_back(mw_up_logdet.back());
mw_dn_logdet.emplace_back(tmp_val_mat);
@ -530,4 +529,56 @@ std::unique_ptr<SPOSet> SpinorSet::makeClone() const
return myclone;
}
void SpinorSet::createResource(ResourceCollection& collection) const
{
spo_up->createResource(collection);
spo_dn->createResource(collection);
auto index = collection.addResource(std::make_unique<SpinorSetMultiWalkerResource>());
}
void SpinorSet::acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const
{
auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
auto& spo_leader = spo_list.getCastedLeader<SpinorSet>();
auto& up_spo_leader = up_spo_list.getLeader();
auto& dn_spo_leader = dn_spo_list.getLeader();
up_spo_leader.acquireResource(collection, up_spo_list);
dn_spo_leader.acquireResource(collection, dn_spo_list);
auto res_ptr = dynamic_cast<SpinorSetMultiWalkerResource*>(collection.lendResource().release());
if (!res_ptr)
throw std::runtime_error("SpinorSet::acquireResource dynamic_cast failed");
spo_leader.mw_res_.reset(res_ptr);
}
void SpinorSet::releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const
{
auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
auto& spo_leader = spo_list.getCastedLeader<SpinorSet>();
auto& up_spo_leader = up_spo_list.getLeader();
auto& dn_spo_leader = dn_spo_list.getLeader();
up_spo_leader.releaseResource(collection, up_spo_list);
dn_spo_leader.releaseResource(collection, dn_spo_list);
collection.takebackResource(std::move(spo_leader.mw_res_));
}
std::pair<RefVectorWithLeader<SPOSet>, RefVectorWithLeader<SPOSet>> SpinorSet::extractSpinComponentRefList(
const RefVectorWithLeader<SPOSet>& spo_list) const
{
auto& spo_leader = spo_list.getCastedLeader<SpinorSet>();
IndexType nw = spo_list.size();
SPOSet& up_spo_leader = *(spo_leader.spo_up);
SPOSet& dn_spo_leader = *(spo_leader.spo_dn);
RefVectorWithLeader<SPOSet> up_spo_list(up_spo_leader);
RefVectorWithLeader<SPOSet> dn_spo_list(dn_spo_leader);
up_spo_list.reserve(nw);
dn_spo_list.reserve(nw);
for (int iw = 0; iw < nw; iw++)
{
SpinorSet& spinor = spo_list.getCastedElement<SpinorSet>(iw);
up_spo_list.emplace_back(*(spinor.spo_up));
dn_spo_list.emplace_back(*(spinor.spo_dn));
}
return std::make_pair(up_spo_list, dn_spo_list);
}
} // namespace qmcplusplus

View File

@ -167,7 +167,22 @@ public:
std::unique_ptr<SPOSet> makeClone() const override;
void createResource(ResourceCollection& collection) const override;
void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const override;
void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const override;
/// check if the multi walker resource is owned. For testing only.
bool isResourceOwned() const { return bool(mw_res_); }
private:
struct SpinorSetMultiWalkerResource;
std::unique_ptr<SpinorSetMultiWalkerResource> mw_res_;
std::pair<RefVectorWithLeader<SPOSet>, RefVectorWithLeader<SPOSet>> extractSpinComponentRefList(
const RefVectorWithLeader<SPOSet>& spo_list) const;
//Sposet for the up and down channels of our spinors.
std::unique_ptr<SPOSet> spo_up;
std::unique_ptr<SPOSet> spo_dn;

View File

@ -19,7 +19,7 @@
#include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
#include "QMCWaveFunctions/Jastrow/RadialJastrowBuilder.h"
#include "ParticleBase/ParticleAttribOps.h"
#include "QMCWaveFunctions/Jastrow/J2OrbitalSoA.h"
#include "QMCWaveFunctions/Jastrow/TwoBodyJastrow.h"
#include <cstdio>
#include <string>
@ -84,7 +84,7 @@ TEST_CASE("BSpline builder Jastrow J2", "[wavefunction]")
RadialJastrowBuilder jastrow(c, elec_);
using J2Type = J2OrbitalSoA<BsplineFunctor<RealType>>;
using J2Type = TwoBodyJastrow<BsplineFunctor<RealType>>;
auto j2_uptr = jastrow.buildComponent(jas1);
J2Type* j2 = dynamic_cast<J2Type*>(j2_uptr.get());
REQUIRE(j2);

View File

@ -12,7 +12,7 @@
#include "catch.hpp"
#include <memory>
#include "Jastrow/J2OrbitalSoA.h"
#include "Jastrow/TwoBodyJastrow.h"
#include "Jastrow/FakeFunctor.h"
namespace qmcplusplus
@ -20,25 +20,25 @@ namespace qmcplusplus
using FakeJasFunctor = FakeFunctor<OHMMS_PRECISION>;
TEST_CASE("J2OrbitalSoA simple", "[wavefunction]")
TEST_CASE("TwoBodyJastrow simple", "[wavefunction]")
{
const SimulationCell simulation_cell;
ParticleSet elec(simulation_cell);
elec.setName("e");
elec.create({1, 1});
J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);
opt_variables_type active;
jorb.checkOutVariables(active);
}
TEST_CASE("J2OrbitalSoA one species and two variables", "[wavefunction]")
TEST_CASE("TwoBodyJastrow one species and two variables", "[wavefunction]")
{
const SimulationCell simulation_cell;
ParticleSet elec(simulation_cell);
elec.setName("e");
elec.create({1, 1});
J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);
auto j2_uptr = std::make_unique<FakeJasFunctor>("test_fake");
auto& j2 = *j2_uptr;
@ -79,12 +79,12 @@ ParticleSet get_two_species_particleset(const SimulationCell& simulation_cell)
}
// Two variables, both active
TEST_CASE("J2OrbitalSoA two variables", "[wavefunction]")
TEST_CASE("TwoBodyJastrow two variables", "[wavefunction]")
{
const SimulationCell simulation_cell;
ParticleSet elec = get_two_species_particleset(simulation_cell);
J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);
auto j2a_uptr = std::make_unique<FakeJasFunctor>("test_fake_a");
auto& j2a = *j2a_uptr;
@ -146,12 +146,12 @@ TEST_CASE("J2OrbitalSoA two variables", "[wavefunction]")
// Reproduce 2814. If the variational parameter for the first Jastrow factor
// is not active, the offsets are not correct.
// "First" means the first in the function list F, which has species indices 0,0
TEST_CASE("J2OrbitalSoA variables fail", "[wavefunction]")
TEST_CASE("TwoBodyJastrow variables fail", "[wavefunction]")
{
const SimulationCell simulation_cell;
ParticleSet elec = get_two_species_particleset(simulation_cell);
J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);
auto j2a_uptr = std::make_unique<FakeJasFunctor>("test_fake_a");
auto& j2a = *j2a_uptr;
@ -212,12 +212,12 @@ TEST_CASE("J2OrbitalSoA variables fail", "[wavefunction]")
// Other variational parameters in the wavefunction (e.g. one-body Jastrow)
TEST_CASE("J2OrbitalSoA other variables", "[wavefunction]")
TEST_CASE("TwoBodyJastrow other variables", "[wavefunction]")
{
const SimulationCell simulation_cell;
ParticleSet elec = get_two_species_particleset(simulation_cell);
J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);
auto j2a_uptr = std::make_unique<FakeJasFunctor>("test_fake_a");
auto j2a = *j2a_uptr;
@ -285,7 +285,7 @@ TEST_CASE("J2OrbitalSoA other variables", "[wavefunction]")
// Reproduce 3137. If the number of particle types equals the number of particles
// the two body jastrow is not constructed correctly (except in the case of two
// particles).
TEST_CASE("J2OrbitalSoA Jastrow three particles of three types", "[wavefunction]")
TEST_CASE("TwoBodyJastrow Jastrow three particles of three types", "[wavefunction]")
{
const SimulationCell simulation_cell;
ParticleSet ions(simulation_cell);
@ -309,8 +309,7 @@ TEST_CASE("J2OrbitalSoA Jastrow three particles of three types", "[wavefunction]
elec.R[2][1] = 0.9679;
elec.R[2][2] = 0.0128914;
J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);
// 0 uu (0,0)
// 1 ud (0,1)

View File

@ -19,6 +19,7 @@
#include "Particle/DistanceTable.h"
#include "QMCWaveFunctions/SPOSetBuilderFactory.h"
#include "Utilities/ResourceCollection.h"
#include "QMCWaveFunctions/SpinorSet.h"
namespace qmcplusplus
{
@ -72,7 +73,7 @@ void test_lcao_spinor()
<basisset transform="yes"/>
<sposet name="myspo" size="1"/>
</sposet_builder>
</tmp>)XML";
</tmp>)XML";
Libxml2Document doc;
bool okay = doc.parseFromString(particles);
@ -253,6 +254,15 @@ void test_lcao_spinor()
spo_list.push_back(*spo);
spo_list.push_back(*spo_2);
//test resource APIs
//First resource is created, and then passed to the colleciton so it should be null
ResourceCollection spo_res("test_spo_res");
spo->createResource(spo_res);
SpinorSet& spinor = spo_list.getCastedLeader<SpinorSet>();
REQUIRE(!spinor.isResourceOwned());
ResourceCollectionTeamLock<SPOSet> mw_spo_lock(spo_res, spo_list);
REQUIRE(spinor.isResourceOwned());
SPOSet::ValueMatrix psiM_2(elec_.R.size(), spo->getOrbitalSetSize());
SPOSet::GradMatrix dpsiM_2(elec_.R.size(), spo->getOrbitalSetSize());
SPOSet::ValueMatrix d2psiM_2(elec_.R.size(), spo->getOrbitalSetSize());
@ -724,7 +734,7 @@ void test_lcao_spinor_ion_derivs()
<basisset transform="yes"/>
<sposet name="myspo" size="1"/>
</sposet_builder>
</tmp>)XML";
</tmp>)XML";
Libxml2Document doc;
bool okay = doc.parseFromString(particles);

View File

@ -154,49 +154,52 @@ TEST_CASE("TrialWaveFunction flex_evaluateParameterDerivatives", "[wavefunction]
elec.createResource(pset_res);
psi.createResource(twf_res);
RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi});
RefVectorWithLeader<ParticleSet> p_list(elec, {elec});
{
// Test list with one wavefunction
RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi});
RefVectorWithLeader<ParticleSet> p_list(elec, {elec});
ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);
ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);
// Test list with one wavefunction
int nentry = 1;
RecordArray<ValueType> dlogpsi_list(nentry, nparam);
RecordArray<ValueType> dhpsi_over_psi_list(nentry, nparam);
const int nentry = 1;
RecordArray<ValueType> dlogpsi_list(nentry, nparam);
RecordArray<ValueType> dhpsi_over_psi_list(nentry, nparam);
TrialWaveFunction::mw_evaluateParameterDerivatives(wf_list, p_list, var_param, dlogpsi_list, dhpsi_over_psi_list);
TrialWaveFunction::mw_evaluateParameterDerivatives(wf_list, p_list, var_param, dlogpsi_list, dhpsi_over_psi_list);
CHECK(dlogpsi[0] == ValueApprox(dlogpsi_list[0][0]));
CHECK(dhpsioverpsi[0] == ValueApprox(dhpsi_over_psi_list[0][0]));
CHECK(dlogpsi[0] == ValueApprox(dlogpsi_list[0][0]));
CHECK(dhpsioverpsi[0] == ValueApprox(dhpsi_over_psi_list[0][0]));
}
// Test list with two wavefunctions
{ // Test list with two wavefunctions
const int nentry = 2;
RecordArray<ValueType> dlogpsi_list(nentry, nparam);
RecordArray<ValueType> dhpsi_over_psi_list(nentry, nparam);
nentry = 2;
dlogpsi_list.resize(nentry, nparam);
dhpsi_over_psi_list.resize(nentry, nparam);
ParticleSet elec2(elec);
elec2.R[0][0] = 0.9;
elec2.update();
ParticleSet elec2(elec);
elec2.R[0][0] = 0.9;
elec2.update();
RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi, psi});
RefVectorWithLeader<ParticleSet> p_list(elec, {elec, elec2});
ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);
// Will re-using the same TrialWaveFunction work, or should a new one be created.
// If a new one is needed, what is the easiest way to copy?
wf_list.push_back(psi);
p_list.push_back(elec2);
TrialWaveFunction::mw_evaluateParameterDerivatives(wf_list, p_list, var_param, dlogpsi_list, dhpsi_over_psi_list);
TrialWaveFunction::mw_evaluateParameterDerivatives(wf_list, p_list, var_param, dlogpsi_list, dhpsi_over_psi_list);
Vector<ValueType> dlogpsi2(nparam);
Vector<ValueType> dhpsioverpsi2(nparam);
Vector<ValueType> dlogpsi2(nparam);
Vector<ValueType> dhpsioverpsi2(nparam);
psi.evaluateDerivatives(elec2, var_param, dlogpsi2, dhpsioverpsi2);
psi.evaluateDerivatives(elec2, var_param, dlogpsi2, dhpsioverpsi2);
CHECK(dlogpsi[0] == ValueApprox(dlogpsi_list[0][0]));
CHECK(dhpsioverpsi[0] == ValueApprox(dhpsi_over_psi_list[0][0]));
CHECK(dlogpsi[0] == ValueApprox(dlogpsi_list[0][0]));
CHECK(dhpsioverpsi[0] == ValueApprox(dhpsi_over_psi_list[0][0]));
CHECK(dlogpsi2[0] == ValueApprox(dlogpsi_list[1][0]));
CHECK(dhpsioverpsi2[0] == ValueApprox(dhpsi_over_psi_list[1][0]));
CHECK(dlogpsi2[0] == ValueApprox(dlogpsi_list[1][0]));
CHECK(dhpsioverpsi2[0] == ValueApprox(dhpsi_over_psi_list[1][0]));
}
}
@ -258,24 +261,6 @@ TEST_CASE("TrialWaveFunction flex_evaluateDeltaLogSetup", "[wavefunction]")
psi2.addComponent(std::move(orb2));
// Prepare to compare using list with one wavefunction and particleset
int nentry = 1;
int nelec = 2;
RefVectorWithLeader<ParticleSet> p_list(elec1b, {elec1b});
RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi});
// Evaluate new flex_evaluateDeltaLogSetup
std::vector<RealType> logpsi_fixed_list(nentry);
std::vector<RealType> logpsi_opt_list(nentry);
auto fixedG_list_ptr = create_particle_gradient(nelec, nentry);
auto fixedL_list_ptr = create_particle_laplacian(nelec, nentry);
auto fixedG_list = convertUPtrToRefVector(fixedG_list_ptr);
auto fixedL_list = convertUPtrToRefVector(fixedL_list_ptr);
// testing batched interfaces
ResourceCollection pset_res("test_pset_res");
ResourceCollection twf_res("test_twf_res");
@ -283,16 +268,7 @@ TEST_CASE("TrialWaveFunction flex_evaluateDeltaLogSetup", "[wavefunction]")
elec1b.createResource(pset_res);
psi.createResource(twf_res);
ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);
TrialWaveFunction::mw_evaluateDeltaLogSetup(wf_list, p_list, logpsi_fixed_list, logpsi_opt_list, fixedG_list,
fixedL_list);
// Evaluate old (single item) evaluateDeltaLog
const int nelec = 2;
RealType logpsi_fixed_r1;
RealType logpsi_opt_r1;
ParticleSet::ParticleGradient fixedG1;
@ -300,153 +276,176 @@ TEST_CASE("TrialWaveFunction flex_evaluateDeltaLogSetup", "[wavefunction]")
fixedG1.resize(nelec);
fixedL1.resize(nelec);
psi.evaluateDeltaLogSetup(elec1, logpsi_fixed_r1, logpsi_opt_r1, fixedG1, fixedL1);
{ // Prepare to compare using list with one wavefunction and particleset
const int nentry = 1;
CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_list[0]));
CHECK(logpsi_opt_r1 == Approx(logpsi_opt_list[0]));
RefVectorWithLeader<ParticleSet> p_list(elec1b, {elec1b});
RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi});
CHECK(fixedG1[0][0] == ValueApprox(fixedG_list[0].get()[0][0]));
CHECK(fixedG1[0][1] == ValueApprox(fixedG_list[0].get()[0][1]));
CHECK(fixedG1[0][2] == ValueApprox(fixedG_list[0].get()[0][2]));
CHECK(fixedG1[1][0] == ValueApprox(fixedG_list[0].get()[1][0]));
CHECK(fixedG1[1][1] == ValueApprox(fixedG_list[0].get()[1][1]));
CHECK(fixedG1[1][2] == ValueApprox(fixedG_list[0].get()[1][2]));
// Evaluate new flex_evaluateDeltaLogSetup
auto fixedG_list_ptr = create_particle_gradient(nelec, nentry);
auto fixedL_list_ptr = create_particle_laplacian(nelec, nentry);
auto fixedG_list = convertUPtrToRefVector(fixedG_list_ptr);
auto fixedL_list = convertUPtrToRefVector(fixedL_list_ptr);
CHECK(fixedL1[0] == ValueApprox(fixedL_list[0].get()[0]));
CHECK(fixedL1[1] == ValueApprox(fixedL_list[0].get()[1]));
std::vector<RealType> logpsi_fixed_list(nentry);
std::vector<RealType> logpsi_opt_list(nentry);
// Compare the ParticleSet gradient and laplacian storage
// This should be temporary until these get removed from ParticleSet
CHECK(elec1b.L[0] == ValueApprox(elec1.L[0]));
CHECK(elec1b.L[1] == ValueApprox(elec1.L[1]));
ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);
CHECK(elec1b.G[0][0] == ValueApprox(elec1.G[0][0]));
CHECK(elec1b.G[1][1] == ValueApprox(elec1.G[1][1]));
// Prepare to compare using list with two wavefunctions and particlesets
nentry = 2;
wf_list.push_back(psi2);
p_list.push_back(elec2b);
ParticleSet::ParticleGradient G2;
ParticleSet::ParticleLaplacian L2;
G2.resize(nelec);
L2.resize(nelec);
fixedG_list.push_back(G2);
fixedL_list.push_back(L2);
std::vector<RealType> logpsi_fixed_list2(nentry);
std::vector<RealType> logpsi_opt_list2(nentry);
RealType logpsi_fixed_r1b;
RealType logpsi_opt_r1b;
psi2.evaluateDeltaLogSetup(elec1, logpsi_fixed_r1b, logpsi_opt_r1b, fixedG1, fixedL1);
CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_r1b));
CHECK(logpsi_opt_r1 == Approx(logpsi_opt_r1b));
auto fixedG_list2_ptr = create_particle_gradient(nelec, nentry);
auto fixedL_list2_ptr = create_particle_laplacian(nelec, nentry);
auto fixedG_list2 = convertUPtrToRefVector(fixedG_list2_ptr);
auto fixedL_list2 = convertUPtrToRefVector(fixedL_list2_ptr);
TrialWaveFunction::mw_evaluateDeltaLogSetup(wf_list, p_list, logpsi_fixed_list2, logpsi_opt_list2, fixedG_list2,
fixedL_list2);
// Evaluate old (single item) evaluateDeltaLog corresponding to the second wavefunction/particleset
RealType logpsi_fixed_r2;
RealType logpsi_opt_r2;
ParticleSet::ParticleGradient fixedG2;
ParticleSet::ParticleLaplacian fixedL2;
fixedG2.resize(nelec);
fixedL2.resize(nelec);
psi2.setLogPsi(0.0);
psi2.evaluateDeltaLogSetup(elec2, logpsi_fixed_r2, logpsi_opt_r2, fixedG2, fixedL2);
TrialWaveFunction::mw_evaluateDeltaLogSetup(wf_list, p_list, logpsi_fixed_list, logpsi_opt_list, fixedG_list,
fixedL_list);
CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_r1b));
// Evaluate old (single item) evaluateDeltaLog
psi.evaluateDeltaLogSetup(elec1, logpsi_fixed_r1, logpsi_opt_r1, fixedG1, fixedL1);
CHECK(logpsi_fixed_list[0] == Approx(logpsi_fixed_list2[0]));
CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_list[0]));
CHECK(logpsi_opt_r1 == Approx(logpsi_opt_list[0]));
CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_list2[0]));
CHECK(logpsi_opt_r1 == Approx(logpsi_opt_list2[0]));
CHECK(fixedG1[0][0] == ValueApprox(fixedG_list[0].get()[0][0]));
CHECK(fixedG1[0][1] == ValueApprox(fixedG_list[0].get()[0][1]));
CHECK(fixedG1[0][2] == ValueApprox(fixedG_list[0].get()[0][2]));
CHECK(fixedG1[1][0] == ValueApprox(fixedG_list[0].get()[1][0]));
CHECK(fixedG1[1][1] == ValueApprox(fixedG_list[0].get()[1][1]));
CHECK(fixedG1[1][2] == ValueApprox(fixedG_list[0].get()[1][2]));
CHECK(logpsi_fixed_r2 == Approx(logpsi_fixed_list2[1]));
CHECK(logpsi_opt_r2 == Approx(logpsi_opt_list2[1]));
CHECK(fixedL1[0] == ValueApprox(fixedL_list[0].get()[0]));
CHECK(fixedL1[1] == ValueApprox(fixedL_list[0].get()[1]));
// Laplacian for first entry in the wavefunction/particleset list
CHECK(fixedL1[0] == ValueApprox(fixedL_list2[0].get()[0]));
CHECK(fixedL1[1] == ValueApprox(fixedL_list2[0].get()[1]));
// Laplacian for second entry in the wavefunction/particleset list
CHECK(fixedL2[0] == ValueApprox(fixedL_list2[1].get()[0]));
CHECK(fixedL2[1] == ValueApprox(fixedL_list2[1].get()[1]));
// Compare the ParticleSet gradient and laplacian storage
// This should be temporary until these get removed from ParticleSet
CHECK(elec1b.L[0] == ValueApprox(elec1.L[0]));
CHECK(elec1b.L[1] == ValueApprox(elec1.L[1]));
CHECK(elec1b.G[0][0] == ValueApprox(elec1.G[0][0]));
CHECK(elec1b.G[1][1] == ValueApprox(elec1.G[1][1]));
}
{ // Prepare to compare using list with two wavefunctions and particlesets
const int nentry = 2;
RefVectorWithLeader<ParticleSet> p_list(elec1b, {elec1b, elec2b});
RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi, psi2});
ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);
auto fixedG_list_ptr = create_particle_gradient(nelec, nentry);
auto fixedL_list_ptr = create_particle_laplacian(nelec, nentry);
auto fixedG_list = convertUPtrToRefVector(fixedG_list_ptr);
auto fixedL_list = convertUPtrToRefVector(fixedL_list_ptr);
std::vector<RealType> logpsi_fixed_list2(nentry);
std::vector<RealType> logpsi_opt_list2(nentry);
RealType logpsi_fixed_r1b;
RealType logpsi_opt_r1b;
psi2.evaluateDeltaLogSetup(elec1, logpsi_fixed_r1b, logpsi_opt_r1b, fixedG1, fixedL1);
CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_r1b));
CHECK(logpsi_opt_r1 == Approx(logpsi_opt_r1b));
auto fixedG_list2_ptr = create_particle_gradient(nelec, nentry);
auto fixedL_list2_ptr = create_particle_laplacian(nelec, nentry);
auto fixedG_list2 = convertUPtrToRefVector(fixedG_list2_ptr);
auto fixedL_list2 = convertUPtrToRefVector(fixedL_list2_ptr);
TrialWaveFunction::mw_evaluateDeltaLogSetup(wf_list, p_list, logpsi_fixed_list2, logpsi_opt_list2, fixedG_list2,
fixedL_list2);
// Evaluate old (single item) evaluateDeltaLog corresponding to the second wavefunction/particleset
RealType logpsi_fixed_r2;
RealType logpsi_opt_r2;
ParticleSet::ParticleGradient fixedG2;
ParticleSet::ParticleLaplacian fixedL2;
fixedG2.resize(nelec);
fixedL2.resize(nelec);
psi2.setLogPsi(0.0);
psi2.evaluateDeltaLogSetup(elec2, logpsi_fixed_r2, logpsi_opt_r2, fixedG2, fixedL2);
CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_r1b));
CHECK(logpsi_opt_r1 == Approx(logpsi_opt_r1b));
CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_list2[0]));
CHECK(logpsi_opt_r1 == Approx(logpsi_opt_list2[0]));
CHECK(logpsi_fixed_r2 == Approx(logpsi_fixed_list2[1]));
CHECK(logpsi_opt_r2 == Approx(logpsi_opt_list2[1]));
// Laplacian for first entry in the wavefunction/particleset list
CHECK(fixedL1[0] == ValueApprox(fixedL_list2[0].get()[0]));
CHECK(fixedL1[1] == ValueApprox(fixedL_list2[0].get()[1]));
// Laplacian for second entry in the wavefunction/particleset list
CHECK(fixedL2[0] == ValueApprox(fixedL_list2[1].get()[0]));
CHECK(fixedL2[1] == ValueApprox(fixedL_list2[1].get()[1]));
// First entry wavefunction/particleset list
// Gradient for first electron
CHECK(fixedG1[0][0] == ValueApprox(fixedG_list2[0].get()[0][0]));
CHECK(fixedG1[0][1] == ValueApprox(fixedG_list2[0].get()[0][1]));
CHECK(fixedG1[0][2] == ValueApprox(fixedG_list2[0].get()[0][2]));
// Gradient for second electron
CHECK(fixedG1[1][0] == ValueApprox(fixedG_list2[0].get()[1][0]));
CHECK(fixedG1[1][1] == ValueApprox(fixedG_list2[0].get()[1][1]));
CHECK(fixedG1[1][2] == ValueApprox(fixedG_list2[0].get()[1][2]));
// First entry wavefunction/particleset list
// Gradient for first electron
CHECK(fixedG1[0][0] == ValueApprox(fixedG_list2[0].get()[0][0]));
CHECK(fixedG1[0][1] == ValueApprox(fixedG_list2[0].get()[0][1]));
CHECK(fixedG1[0][2] == ValueApprox(fixedG_list2[0].get()[0][2]));
// Gradient for second electron
CHECK(fixedG1[1][0] == ValueApprox(fixedG_list2[0].get()[1][0]));
CHECK(fixedG1[1][1] == ValueApprox(fixedG_list2[0].get()[1][1]));
CHECK(fixedG1[1][2] == ValueApprox(fixedG_list2[0].get()[1][2]));
// Second entry wavefunction/particleset list
// Gradient for first electron
CHECK(fixedG2[0][0] == ValueApprox(fixedG_list2[1].get()[0][0]));
CHECK(fixedG2[0][1] == ValueApprox(fixedG_list2[1].get()[0][1]));
CHECK(fixedG2[0][2] == ValueApprox(fixedG_list2[1].get()[0][2]));
// Gradient for second electron
CHECK(fixedG2[1][0] == ValueApprox(fixedG_list2[1].get()[1][0]));
CHECK(fixedG2[1][1] == ValueApprox(fixedG_list2[1].get()[1][1]));
CHECK(fixedG2[1][2] == ValueApprox(fixedG_list2[1].get()[1][2]));
// Second entry wavefunction/particleset list
// Gradient for first electron
CHECK(fixedG2[0][0] == ValueApprox(fixedG_list2[1].get()[0][0]));
CHECK(fixedG2[0][1] == ValueApprox(fixedG_list2[1].get()[0][1]));
CHECK(fixedG2[0][2] == ValueApprox(fixedG_list2[1].get()[0][2]));
// Gradient for second electron
CHECK(fixedG2[1][0] == ValueApprox(fixedG_list2[1].get()[1][0]));
CHECK(fixedG2[1][1] == ValueApprox(fixedG_list2[1].get()[1][1]));
CHECK(fixedG2[1][2] == ValueApprox(fixedG_list2[1].get()[1][2]));
// Compare the ParticleSet gradient and laplacian storage
// This should be temporary until these get removed from ParticleSet
CHECK(elec1b.L[0] == ValueApprox(elec1.L[0]));
CHECK(elec1b.L[1] == ValueApprox(elec1.L[1]));
CHECK(elec2b.L[0] == ValueApprox(elec2.L[0]));
CHECK(elec2b.L[1] == ValueApprox(elec2.L[1]));
// Compare the ParticleSet gradient and laplacian storage
// This should be temporary until these get removed from ParticleSet
CHECK(elec1b.L[0] == ValueApprox(elec1.L[0]));
CHECK(elec1b.L[1] == ValueApprox(elec1.L[1]));
CHECK(elec2b.L[0] == ValueApprox(elec2.L[0]));
CHECK(elec2b.L[1] == ValueApprox(elec2.L[1]));
CHECK(elec2b.G[0][0] == ValueApprox(elec2.G[0][0]));
CHECK(elec2b.G[1][1] == ValueApprox(elec2.G[1][1]));
CHECK(elec2b.G[0][0] == ValueApprox(elec2.G[0][0]));
CHECK(elec2b.G[1][1] == ValueApprox(elec2.G[1][1]));
// these lists not used if 'recompute' is false
RefVector<ParticleSet::ParticleGradient> dummyG_list;
RefVector<ParticleSet::ParticleLaplacian> dummyL_list;
// these lists not used if 'recompute' is false
RefVector<ParticleSet::ParticleGradient> dummyG_list;
RefVector<ParticleSet::ParticleLaplacian> dummyL_list;
std::vector<RealType> logpsi_variable_list(nentry);
TrialWaveFunction::mw_evaluateDeltaLog(wf_list, p_list, logpsi_variable_list, dummyG_list, dummyL_list, false);
std::vector<RealType> logpsi_variable_list(nentry);
TrialWaveFunction::mw_evaluateDeltaLog(wf_list, p_list, logpsi_variable_list, dummyG_list, dummyL_list, false);
RealType logpsi1 = psi.evaluateDeltaLog(p_list[0], false);
CHECK(logpsi1 == Approx(logpsi_variable_list[0]));
RealType logpsi1 = psi.evaluateDeltaLog(p_list[0], false);
CHECK(logpsi1 == Approx(logpsi_variable_list[0]));
RealType logpsi2 = psi2.evaluateDeltaLog(p_list[1], false);
CHECK(logpsi2 == Approx(logpsi_variable_list[1]));
RealType logpsi2 = psi2.evaluateDeltaLog(p_list[1], false);
CHECK(logpsi2 == Approx(logpsi_variable_list[1]));
// Now check with 'recompute = true'
auto dummyG_list2_ptr = create_particle_gradient(nelec, nentry);
auto dummyL_list2_ptr = create_particle_laplacian(nelec, nentry);
auto dummyG_list2 = convertUPtrToRefVector(dummyG_list2_ptr);
auto dummyL_list2 = convertUPtrToRefVector(dummyL_list2_ptr);
// Now check with 'recompute = true'
auto dummyG_list2_ptr = create_particle_gradient(nelec, nentry);
auto dummyL_list2_ptr = create_particle_laplacian(nelec, nentry);
auto dummyG_list2 = convertUPtrToRefVector(dummyG_list2_ptr);
auto dummyL_list2 = convertUPtrToRefVector(dummyL_list2_ptr);
std::vector<RealType> logpsi_variable_list2(nentry);
std::vector<RealType> logpsi_variable_list2(nentry);
TrialWaveFunction::mw_evaluateDeltaLog(wf_list, p_list, logpsi_variable_list2, dummyG_list2, dummyL_list2, true);
TrialWaveFunction::mw_evaluateDeltaLog(wf_list, p_list, logpsi_variable_list2, dummyG_list2, dummyL_list2, true);
RealType logpsi1b = psi.evaluateDeltaLog(p_list[0], true);
CHECK(logpsi1b == Approx(logpsi_variable_list2[0]));
RealType logpsi1b = psi.evaluateDeltaLog(p_list[0], true);
CHECK(logpsi1b == Approx(logpsi_variable_list2[0]));
RealType logpsi2b = psi2.evaluateDeltaLog(p_list[1], true);
CHECK(logpsi2b == Approx(logpsi_variable_list2[1]));
RealType logpsi2b = psi2.evaluateDeltaLog(p_list[1], true);
CHECK(logpsi2b == Approx(logpsi_variable_list2[1]));
}
}
#endif

View File

@ -20,6 +20,7 @@
#include "QMCWaveFunctions/WaveFunctionComponent.h"
#include "QMCWaveFunctions/SPOSetBuilderFactory.h"
#include "Utilities/ResourceCollection.h"
#include "QMCWaveFunctions/SpinorSet.h"
#include <stdio.h>
#include <string>
@ -477,6 +478,16 @@ TEST_CASE("Einspline SpinorSet from HDF", "[wavefunction]")
spo_list.push_back(*spo);
spo_list.push_back(*spo_2);
//test resource APIs
//First resource is created, and then passed to the collection so it should be null
ResourceCollection spo_res("test_spo_res");
spo->createResource(spo_res);
SpinorSet& spinor = spo_list.getCastedLeader<SpinorSet>();
REQUIRE(!spinor.isResourceOwned());
//team lock calls the acquireResource, so now the leader's resource shouldn't be null
ResourceCollectionTeamLock<SPOSet> mw_spo_lock(spo_res, spo_list);
REQUIRE(spinor.isResourceOwned());
SPOSet::ValueMatrix psiM_2(elec_.R.size(), spo->getOrbitalSetSize());
SPOSet::GradMatrix dpsiM_2(elec_.R.size(), spo->getOrbitalSetSize());
SPOSet::ValueMatrix d2psiM_2(elec_.R.size(), spo->getOrbitalSetSize());

View File

@ -63,8 +63,8 @@ struct h5data_proxy<boost::multi::array<T, 2, Alloc>> : public h5_space_type<T,
inline h5data_proxy(const data_type& a)
{
dims[0] = a.size(0);
dims[1] = a.size(1);
dims[0] = std::get<0>(a.sizes());
dims[1] = std::get<1>(a.sizes());
}
inline bool read(data_type& ref, hid_t grp, const std::string& aname, hid_t xfer_plist = H5P_DEFAULT)
@ -97,7 +97,7 @@ struct h5data_proxy<boost::multi::array_ref<T, 1, Ptr>> : public h5_space_type<T
if (dims[0] > 0)
{
std::cerr << " Error: multi::array_ref can't be resized in h5data_proxy<>::read." << std::endl;
std::cerr << dims[0] << " " << ref.size(0) << std::endl;
std::cerr << dims[0] << " " << std::get<0>(ref.sizes()) << std::endl;
}
return false;
}
@ -120,8 +120,8 @@ struct h5data_proxy<boost::multi::array_ref<T, 2, Ptr>> : public h5_space_type<T
inline h5data_proxy(const data_type& a)
{
dims[0] = a.size(0);
dims[1] = a.size(1);
dims[0] = std::get<0>(a.sizes());
dims[1] = std::get<1>(a.sizes());
}
inline bool read(data_type& ref, hid_t grp, const std::string& aname, hid_t xfer_plist = H5P_DEFAULT)
@ -131,7 +131,7 @@ struct h5data_proxy<boost::multi::array_ref<T, 2, Ptr>> : public h5_space_type<T
if (dims[0] * dims[1] > 0)
{
std::cerr << " Error: multi::array_ref can't be resized in h5data_proxy<>::read." << std::endl;
std::cerr << dims[0] << " " << dims[1] << " " << ref.size(0) << " " << ref.size(1) << std::endl;
std::cerr << dims[0] << " " << dims[1] << " " << std::get<0>(ref.sizes()) << " " << std::get<1>(ref.sizes()) << std::endl;
}
return false;
}
@ -257,8 +257,8 @@ struct h5data_proxy<boost::multi::array_ref<T, 2, device::device_pointer<T>>> :
inline h5data_proxy(const data_type& a)
{
dims[0] = a.size(0);
dims[1] = a.size(1);
dims[0] = std::get<0>(a.sizes());
dims[1] = std::get<1>(a.sizes());
}
inline bool read(data_type& ref, hid_t grp, const std::string& aname, hid_t xfer_plist = H5P_DEFAULT)
@ -268,7 +268,7 @@ struct h5data_proxy<boost::multi::array_ref<T, 2, device::device_pointer<T>>> :
if (dims[0] * dims[1] > 0)
{
std::cerr << " Error: multi::array_ref can't be resized in h5data_proxy<>::read." << std::endl;
std::cerr << dims[0] << " " << dims[1] << " " << ref.size(0) << " " << ref.size(1) << std::endl;
std::cerr << dims[0] << " " << dims[1] << " " << std::get<0>(ref.sizes()) << " " << std::get<1>(ref.sizes()) << std::endl;
}
return false;
}

View File

@ -107,6 +107,12 @@ if(NOT QMC_CUDA)
2
check.sh
false)
add_test_check_file_existence(deterministic-restart_batch-8-2 qmc_short_batch.s000.config.h5 TRUE)
add_test_check_file_existence(deterministic-restart_batch-8-2 qmc_short_batch.s000.random.h5 TRUE)
add_test_check_file_existence(deterministic-restart_batch-8-2 qmc_short_batch.s001.config.h5 FALSE)
add_test_check_file_existence(deterministic-restart_batch-8-2 qmc_short_batch.s001.random.h5 FALSE)
run_restart_and_check(
deterministic-restart_batch
"${qmcpack_SOURCE_DIR}/tests/io/restart_batch"

View File

@ -135,15 +135,15 @@ qmc_run_and_check(
)
# Excited state
list(APPEND DIAMOND_EXCITED_SCALARS "totenergy" "-10.28759285 0.001023")
list(APPEND DIAMOND_EXCITED_SCALARS "kinetic" "11.41778633 0.066764")
list(APPEND DIAMOND_EXCITED_SCALARS "potential" "-21.70537918 0.008371")
list(APPEND DIAMOND_EXCITED_SCALARS "eeenergy" "-2.78731301 0.002097")
list(APPEND DIAMOND_EXCITED_SCALARS "ionion" "-12.77566741 0.000001")
list(APPEND DIAMOND_EXCITED_SCALARS "localecp" "-6.74480856 0.009475")
list(APPEND DIAMOND_EXCITED_SCALARS "nonlocalecp" "0.60240980 0.002763")
list(APPEND DIAMOND_EXCITED_SCALARS "totenergy" "-10.28757413 0.001966")
list(APPEND DIAMOND_EXCITED_SCALARS "kinetic" "11.41769866 0.01674")
list(APPEND DIAMOND_EXCITED_SCALARS "potential" "-21.70527280 0.016756")
list(APPEND DIAMOND_EXCITED_SCALARS "eeenergy" "-2.78729624 0.004153")
list(APPEND DIAMOND_EXCITED_SCALARS "ionion" "-12.77566741 0.00001")
list(APPEND DIAMOND_EXCITED_SCALARS "localecp" "-6.74474055 0.018968")
list(APPEND DIAMOND_EXCITED_SCALARS "nonlocalecp" "0.60243140 0.005574")
list(APPEND DIAMOND_EXCITED_SCALARS "samples" "128000 0.0")
list(APPEND DIAMOND_EXCITED_SCALARS "mpc" "-2.52743302 0.00222")
list(APPEND DIAMOND_EXCITED_SCALARS "mpc" "-2.52740814 0.004392")
qmc_run_and_check(
short-diamondC_1x1x1_pp-vmc_sdj_excited
@ -157,13 +157,13 @@ qmc_run_and_check(
DIAMOND_EXCITED_SCALARS # VMC
)
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "totenergy" "-10.33095572 0.002735")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "kinetic" "11.48677399 0.027304")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "potential" "-21.81772971 0.028082")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "localecp" "-6.87597125 0.03417")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "nonlocalecp" "0.63559539 0.008290")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "eeenergy" "-2.80168644 0.00774")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "mpc" "-2.55373919 0.008215")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "totenergy" "-10.33047547 0.002606")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "kinetic" "11.48348626 0.026185")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "potential" "-21.81396173 0.02679")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "localecp" "-6.87151058 0.033201")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "nonlocalecp" "0.63492715 0.008002")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "eeenergy" "-2.80171089 0.007489")
list(APPEND DIAMOND_DMC_EXCITED_SCALARS "mpc" "-2.55372811 0.00795")
qmc_run_and_check(
short-diamondC_1x1x1_pp-dmc_sdj_excited
@ -321,9 +321,9 @@ qmc_run_and_check(
)
# Excited state
list(APPEND LONG_DIAMOND_EXCITED_SCALARS "totenergy" "-10.28759285 0.000033")
list(APPEND LONG_DIAMOND_EXCITED_SCALARS "totenergy" "-10.28757413 0.000067")
list(APPEND LONG_DIAMOND_EXCITED_SCALARS "samples" "122880000 0.0")
list(APPEND LONG_DIAMOND_EXCITED_SCALARS "flux" "-0.01469770 0.025542")
list(APPEND LONG_DIAMOND_EXCITED_SCALARS "flux" "-0.01538267 0.03786")
qmc_run_and_check(
long-diamondC_1x1x1_pp-vmc_sdj_excited
@ -337,7 +337,7 @@ qmc_run_and_check(
LONG_DIAMOND_EXCITED_SCALARS # VMC
)
list(APPEND LONG_DIAMOND_DMC_EXCITED_SCALARS "totenergy" "-10.33095572 0.000865")
list(APPEND LONG_DIAMOND_DMC_EXCITED_SCALARS "totenergy" "-10.33047547 0.00086")
qmc_run_and_check(
long-diamondC_1x1x1_pp-dmc_sdj_excited
@ -1156,11 +1156,11 @@ if(NOT QMC_CUDA)
list(APPEND DET_DIAMOND_EXCITED_SCALARS "potential" "-24.81924106 0.00001931")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "eeenergy" "-3.54361899 0.00001190")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "ionion" "-12.77567050 0.000001")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "localecp" "-8.35089725 0.00001516")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "localecp" "-8.35089725 0.00005")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "nonlocalecp" "-0.14905431 0.00002")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "mpc" "-3.24606471 0.00001149")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "samples" "9 0.0")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "flux" "-3.81746292 0.00004665")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "flux" "-3.81746292 0.0001")
else()
list(APPEND DET_DIAMOND_EXCITED_SCALARS "totenergy" "-10.14872875 0.000001")
list(APPEND DET_DIAMOND_EXCITED_SCALARS "kinetic" "14.67051169 0.000001")

View File

@ -86,7 +86,7 @@
<qmc method="vmc" move="pbyp">
<estimator name="LocalEnergy" hdf5="no"/>
<parameter name="walkers" > 16 </parameter>
<parameter name="blocks" > 10000 </parameter>
<parameter name="blocks" > 10000 </parameter>
<parameter name="steps" > 7680.0 </parameter>
<parameter name="subSteps" > 2 </parameter>
<parameter name="timestep" > 0.3 </parameter>

Some files were not shown because too many files have changed in this diff Show More