mirror of https://github.com/QMCPACK/qmcpack.git
Merge branch 'develop' into nx_batched_drivers
This commit is contained in:
commit
c31a404b42
|
@ -1126,8 +1126,8 @@ public:
|
|||
void vHS(MatA& Xw, MatB&& v, double a = 1., double c = 0.)
|
||||
{
|
||||
int nkpts = nopk.size();
|
||||
int nwalk = Xw.size(1);
|
||||
assert(v.size(0) == nwalk);
|
||||
int nwalk = std::get<1>(Xw.sizes());
|
||||
assert(v.size() == nwalk);
|
||||
int nspin = (walker_type == COLLINEAR ? 2 : 1);
|
||||
int nmo_tot = std::accumulate(nopk.begin(), nopk.end(), 0);
|
||||
int nmo_max = *std::max_element(nopk.begin(), nopk.end());
|
||||
|
@ -1139,7 +1139,7 @@ public:
|
|||
SPComplexType halfa(0.5 * a, 0.0);
|
||||
size_t local_memory_needs = nmo_max * nmo_max * nwalk;
|
||||
if (TMats.num_elements() < local_memory_needs)
|
||||
TMats.reextent({local_memory_needs, 1});
|
||||
TMats.reextent({static_cast<ptrdiff_t>(local_memory_needs), 1});
|
||||
|
||||
using vType = typename std::decay<MatB>::type::element;
|
||||
boost::multi::array_ref<vType, 3> v3D(to_address(v.origin()), {nwalk, nmo_tot, nmo_tot});
|
||||
|
|
|
@ -434,16 +434,6 @@ void NOMSD<devPsiT>::MixedDensityMatrix_for_E_from_SM(const MatSM& SM,
|
|||
TG.local_barrier();
|
||||
}
|
||||
|
||||
template<class T, class... As>
|
||||
auto Sizes(boost::multi::array<T, 2, As...> const& m)
|
||||
->decltype(m.sizes()) {
|
||||
return m.sizes(); }
|
||||
|
||||
template<class Matrix>
|
||||
auto Sizes(Matrix const& m)
|
||||
->decltype(std::array<long, 2>{static_cast<long>(std::get<0>(m.sizes())), static_cast<long>(std::get<1>(m.sizes()))}) {
|
||||
return std::array<long, 2>{static_cast<long>(std::get<0>(m.sizes())), static_cast<long>(std::get<1>(m.sizes()))}; }
|
||||
|
||||
/*
|
||||
* Computes the density matrix for a given reference.
|
||||
* G and Ov are expected to be in shared memory.
|
||||
|
@ -480,9 +470,9 @@ void NOMSD<devPsiT>::DensityMatrix_shared(const WlkSet& wset,
|
|||
if (walker_type != COLLINEAR)
|
||||
{
|
||||
if (herm) {
|
||||
assert(std::get<0>(Sizes(RefA)) == dm_dims(false, Alpha).first && std::get<1>(Sizes(RefA)) == dm_dims(false, Alpha).second);
|
||||
assert(std::get<0>(RefA.sizes()) == dm_dims(false, Alpha).first && std::get<1>(RefA.sizes()) == dm_dims(false, Alpha).second);
|
||||
} else {
|
||||
assert(std::get<1>(Sizes(RefA)) == dm_dims(false, Alpha).first && std::get<0>(Sizes(RefA)) == dm_dims(false, Alpha).second);
|
||||
assert(std::get<1>(RefA.sizes()) == dm_dims(false, Alpha).first && std::get<0>(RefA.sizes()) == dm_dims(false, Alpha).second);
|
||||
}
|
||||
|
||||
auto Gdims = dm_dims(not compact, Alpha);
|
||||
|
@ -507,14 +497,14 @@ void NOMSD<devPsiT>::DensityMatrix_shared(const WlkSet& wset,
|
|||
else
|
||||
{
|
||||
if (herm) {
|
||||
assert(std::get<0>(Sizes(RefA)) == dm_dims(false, Alpha).first && std::get<1>(Sizes(RefA)) == dm_dims(false, Alpha).second);
|
||||
assert(std::get<0>(RefA.sizes()) == dm_dims(false, Alpha).first && std::get<1>(RefA.sizes()) == dm_dims(false, Alpha).second);
|
||||
} else {
|
||||
assert(std::get<1>(Sizes(RefA)) == dm_dims(false, Alpha).first && std::get<0>(Sizes(RefA)) == dm_dims(false, Alpha).second);
|
||||
assert(std::get<1>(RefA.sizes()) == dm_dims(false, Alpha).first && std::get<0>(RefA.sizes()) == dm_dims(false, Alpha).second);
|
||||
}
|
||||
if (herm) {
|
||||
assert(std::get<0>(Sizes(RefB)) == dm_dims(false, Beta).first && std::get<1>(Sizes(RefB)) == dm_dims(false, Beta).second);
|
||||
assert(std::get<0>(RefB.sizes()) == dm_dims(false, Beta).first && std::get<1>(RefB.sizes()) == dm_dims(false, Beta).second);
|
||||
} else {
|
||||
assert(std::get<1>(Sizes(RefB)) == dm_dims(false, Beta).first && std::get<0>(Sizes(RefB)) == dm_dims(false, Beta).second);
|
||||
assert(std::get<1>(RefB.sizes()) == dm_dims(false, Beta).first && std::get<0>(RefB.sizes()) == dm_dims(false, Beta).second);
|
||||
}
|
||||
StaticVector ovlp2(iextensions<1u>{2 * nw}, buffer_manager.get_generator().template get_allocator<ComplexType>());
|
||||
fill_n(ovlp2.origin(), 2 * nw, ComplexType(0.0));
|
||||
|
@ -618,9 +608,9 @@ void NOMSD<devPsiT>::DensityMatrix_batched(const WlkSet& wset,
|
|||
if (walker_type != COLLINEAR)
|
||||
{
|
||||
if (herm) {
|
||||
assert(std::get<0>(Sizes(RefA)) == dm_dims(false, Alpha).first && std::get<1>(Sizes(RefA)) == dm_dims(false, Alpha).second);
|
||||
assert(std::get<0>(RefA.sizes()) == dm_dims(false, Alpha).first && std::get<1>(RefA.sizes()) == dm_dims(false, Alpha).second);
|
||||
} else {
|
||||
assert(std::get<1>(Sizes(RefA)) == dm_dims(false, Alpha).first && std::get<0>(Sizes(RefA)) == dm_dims(false, Alpha).second);
|
||||
assert(std::get<1>(RefA.sizes()) == dm_dims(false, Alpha).first && std::get<0>(RefA.sizes()) == dm_dims(false, Alpha).second);
|
||||
}
|
||||
Static3Tensor G3D_({nbatch__, GAdims.first, GAdims.second},
|
||||
buffer_manager.get_generator().template get_allocator<ComplexType>());
|
||||
|
@ -658,14 +648,14 @@ void NOMSD<devPsiT>::DensityMatrix_batched(const WlkSet& wset,
|
|||
else
|
||||
{
|
||||
if (herm) {
|
||||
assert(std::get<0>(Sizes(RefA)) == dm_dims(false, Alpha).first && std::get<1>(Sizes(RefA)) == dm_dims(false, Alpha).second);
|
||||
assert(std::get<0>(RefA.sizes()) == dm_dims(false, Alpha).first && std::get<1>(RefA.sizes()) == dm_dims(false, Alpha).second);
|
||||
} else {
|
||||
assert(std::get<1>(Sizes(RefA)) == dm_dims(false, Alpha).first && std::get<0>(Sizes(RefA)) == dm_dims(false, Alpha).second);
|
||||
assert(std::get<1>(RefA.sizes()) == dm_dims(false, Alpha).first && std::get<0>(RefA.sizes()) == dm_dims(false, Alpha).second);
|
||||
}
|
||||
if (herm) {
|
||||
assert(std::get<0>(Sizes(RefB)) == dm_dims(false, Beta).first && std::get<1>(Sizes(RefB)) == dm_dims(false, Beta).second);
|
||||
assert(std::get<0>(RefB.sizes()) == dm_dims(false, Beta).first && std::get<1>(RefB.sizes()) == dm_dims(false, Beta).second);
|
||||
} else {
|
||||
assert(std::get<1>(Sizes(RefB)) == dm_dims(false, Beta).first && std::get<0>(Sizes(RefB)) == dm_dims(false, Beta).second);
|
||||
assert(std::get<1>(RefB.sizes()) == dm_dims(false, Beta).first && std::get<0>(RefB.sizes()) == dm_dims(false, Beta).second);
|
||||
}
|
||||
|
||||
auto GBdims = dm_dims(not compact, Beta);
|
||||
|
|
|
@ -60,18 +60,19 @@ bool SplineR2R<ST>::write_splines(hdf_archive& h5f)
|
|||
spl_coefs has a complicated layout depending on dimensionality of splines.
|
||||
Luckily, for our purposes, we can think of spl_coefs as pointing to a
|
||||
matrix of size BasisSetSize x (OrbitalSetSize + padding), with the spline
|
||||
index adjacent in memory. NB: The orbital index is SIMD aligned and
|
||||
therefore may include padding.
|
||||
index adjacent in memory. The orbital index is SIMD aligned and therefore
|
||||
may include padding.
|
||||
|
||||
In other words, due to SIMD alignment, Nsplines may be larger than the
|
||||
actual number of splined orbitals, which means that in practice rot_mat
|
||||
may be smaller than the number of 'columns' in the coefs array.
|
||||
Therefore, we put rot_mat inside "tmpU". The padding of the splines
|
||||
is at the end, so if we put rot_mat at top left corner of tmpU, then
|
||||
we can apply tmpU to the coefs safely regardless of padding.
|
||||
As a result, due to SIMD alignment, Nsplines may be larger than the
|
||||
actual number of splined orbitals. This means that in practice rot_mat
|
||||
may be smaller than the number of 'columns' in the coefs array!
|
||||
To fix this problem, we put rot_mat inside "tmpU", which is guaranteed to have
|
||||
the 'right' size to match spl_coefs. The padding of the splines is at the end,
|
||||
so if we put rot_mat at top left corner of tmpU, then we can apply tmpU to the
|
||||
coefs safely regardless of padding.
|
||||
|
||||
Typically, BasisSetSize >> OrbitalSetSize, so the spl_coefs "matrix"
|
||||
is very tall and skinny.
|
||||
NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs
|
||||
"matrix" is very tall and skinny.
|
||||
*/
|
||||
template<typename ST>
|
||||
void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
|
||||
|
@ -99,6 +100,7 @@ void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
|
|||
}
|
||||
|
||||
// Apply rotation the dumb way b/c I can't get BLAS::gemm to work...
|
||||
std::vector<RealType> new_coefs(coefs_tot_size, 0);
|
||||
for (auto i = 0; i < BasisSetSize; i++)
|
||||
{
|
||||
for (auto j = 0; j < Nsplines; j++)
|
||||
|
@ -110,10 +112,13 @@ void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
|
|||
const auto index = i * Nsplines + k;
|
||||
newval += *(spl_coefs + index) * tmpU[k][j];
|
||||
}
|
||||
*(spl_coefs + cur_elem) = newval;
|
||||
new_coefs[cur_elem] = newval;
|
||||
}
|
||||
}
|
||||
|
||||
// Update the coefs
|
||||
std::copy(new_coefs.begin(), new_coefs.end(), spl_coefs);
|
||||
|
||||
/*
|
||||
// Here is my attempt to use gemm but it doesn't work...
|
||||
int smaller_BasisSetSize = static_cast<int>(BasisSetSize);
|
||||
|
|
Loading…
Reference in New Issue