mirror of https://github.com/QMCPACK/qmcpack.git
Merge pull request #2193 from ye-luo/fix-ALIGN
Remove ALIGN as independent template parameter when memory allocator is available
This commit is contained in:
commit
0d77ef7d7b
|
@ -15,8 +15,6 @@
|
|||
#ifndef QMCPLUSPLUS_SOA_FAST_PARTICLE_OPERATORS_H
|
||||
#define QMCPLUSPLUS_SOA_FAST_PARTICLE_OPERATORS_H
|
||||
|
||||
#include <simd/blas1.hpp>
|
||||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
//Need to reorg
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace qmcplusplus
|
|||
* @tparm T data type, float, double, complex<float>, complex<double>
|
||||
* @tparm Alloc memory allocator
|
||||
*/
|
||||
template<typename T, unsigned D, size_t ALIGN = QMC_CLINE, typename Alloc = Mallocator<T, ALIGN>>
|
||||
template<typename T, unsigned D, typename Alloc = aligned_allocator<T>>
|
||||
struct VectorSoaContainer
|
||||
{
|
||||
using AoSElement_t = TinyVector<T, D>;
|
||||
|
@ -109,7 +109,7 @@ struct VectorSoaContainer
|
|||
if (isRefAttached())
|
||||
throw std::runtime_error("Resize not allowed on VectorSoaContainer constructed by initialized memory.");
|
||||
|
||||
size_t n_padded = getAlignedSize<T, ALIGN>(n);
|
||||
size_t n_padded = getAlignedSize<T, Alloc::alignment>(n);
|
||||
|
||||
if (n_padded * D > nAllocated)
|
||||
{
|
||||
|
|
|
@ -29,11 +29,11 @@ using PinnedAllocator = CUDALockedPageAllocator<T>;
|
|||
using PinnedAllocator = std::allocator<T>;
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
template<typename T, size_t ALIGN = QMC_CLINE>
|
||||
#ifdef ENABLE_CUDA
|
||||
using PinnedAlignedAllocator = CUDALockedPageAllocator<T, aligned_allocator<T>>;
|
||||
using PinnedAlignedAllocator = CUDALockedPageAllocator<T, aligned_allocator<T, ALIGN>>;
|
||||
#else
|
||||
using PinnedAlignedAllocator = aligned_allocator<T>;
|
||||
using PinnedAlignedAllocator = aligned_allocator<T, ALIGN>;
|
||||
#endif
|
||||
|
||||
} // namespace qmcplusplus
|
||||
|
|
|
@ -38,7 +38,6 @@ template<typename ST>
|
|||
class SplineC2ROMP : public BsplineSet
|
||||
{
|
||||
public:
|
||||
static const int ALIGN = QMC_CLINE;
|
||||
template<typename DT>
|
||||
using OffloadAllocator = OMPallocator<DT, aligned_allocator<DT>>;
|
||||
template<typename DT>
|
||||
|
@ -69,7 +68,7 @@ private:
|
|||
///number of complex bands
|
||||
int nComplexBands;
|
||||
///multi bspline set
|
||||
std::shared_ptr<MultiBspline<ST, ALIGN, OffloadAllocator<ST>>> SplineInst;
|
||||
std::shared_ptr<MultiBspline<ST, OffloadAllocator<ST>>> SplineInst;
|
||||
|
||||
vContainer_type mKK;
|
||||
VectorSoaContainer<ST, 3> myKcart;
|
||||
|
@ -159,7 +158,7 @@ public:
|
|||
void create_spline(GT& xyz_g, BCT& xyz_bc)
|
||||
{
|
||||
resize_kpoints();
|
||||
SplineInst = std::make_shared<MultiBspline<ST, ALIGN, OffloadAllocator<ST>>>();
|
||||
SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>>>();
|
||||
SplineInst->create(xyz_g, xyz_bc, myV.size());
|
||||
|
||||
app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
|
||||
|
|
|
@ -31,7 +31,7 @@ namespace qmcplusplus
|
|||
* The scalar part works as PooledData, all the values are static_cast to T_scalar.
|
||||
*/
|
||||
#define DEFAULT_PAGE_SIZE 4096
|
||||
template<typename T_scalar = OHMMS_PRECISION_FULL, typename Alloc = Mallocator<char, DEFAULT_PAGE_SIZE>>
|
||||
template<typename T_scalar = OHMMS_PRECISION_FULL, typename Alloc = aligned_allocator<char, DEFAULT_PAGE_SIZE>>
|
||||
struct PooledMemory
|
||||
{
|
||||
typedef char T;
|
||||
|
|
|
@ -29,6 +29,8 @@ struct Mallocator
|
|||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
|
||||
static constexpr size_t alignment = ALIGN;
|
||||
|
||||
Mallocator() = default;
|
||||
template<class U>
|
||||
Mallocator(const Mallocator<U, ALIGN>&)
|
||||
|
|
|
@ -21,8 +21,8 @@
|
|||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
template<class T>
|
||||
using aligned_allocator = Mallocator<T, QMC_CLINE>;
|
||||
template<class T, size_t ALIGN = QMC_CLINE>
|
||||
using aligned_allocator = Mallocator<T, ALIGN>;
|
||||
template<class T>
|
||||
using aligned_vector = std::vector<T, aligned_allocator<T>>;
|
||||
|
||||
|
|
|
@ -1,131 +0,0 @@
|
|||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// This file is distributed under the University of Illinois/NCSA Open Source License.
|
||||
// See LICENSE file in top directory for details.
|
||||
//
|
||||
// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
|
||||
//
|
||||
// File developed by:
|
||||
//
|
||||
// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// -*- C++ -*-
|
||||
/** @file blas1.hpp
|
||||
*/
|
||||
#ifndef QMCPLUSPLUS_SIMD_BLAS1_HPP
|
||||
#define QMCPLUSPLUS_SIMD_BLAS1_HPP
|
||||
|
||||
#include <complex>
|
||||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
//going to replace simd
|
||||
namespace blas
|
||||
{
|
||||
/** copy function using memcpy
|
||||
* @param source starting address of the source
|
||||
* @param target starting address of the target
|
||||
* @param n size of the data to copy
|
||||
*/
|
||||
template<typename T1, typename T2>
|
||||
inline void copy(const T1* restrict source, T2* restrict target, size_t n)
|
||||
{
|
||||
std::copy(source, source + n, target);
|
||||
}
|
||||
|
||||
/** copy complex to two real containers */
|
||||
template<typename T1, typename T2>
|
||||
inline void copy(const std::complex<T1>* restrict source, T2* restrict target_r, T2* restrict target_i, size_t n)
|
||||
{
|
||||
const T1* restrict iptr = reinterpret_cast<T1*>(source);
|
||||
ASSUMED_ALIGNED(source);
|
||||
ASSUMED_ALIGNED(iptr);
|
||||
for (int i = 0, i2 = 0; i < n; ++i, i2 += 2)
|
||||
{
|
||||
target_r[i] = static_cast<T2>(iptr[i2 + 0]);
|
||||
target_i[i] = static_cast<T2>(iptr[i2 + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
/** dot product
|
||||
*/
|
||||
template<typename T, typename TSUM>
|
||||
inline TSUM dot(const T* restrict a, const T* restrict b, int n, TSUM res)
|
||||
{
|
||||
ASSUMED_ALIGNED(a);
|
||||
ASSUMED_ALIGNED(b);
|
||||
#pragma omp simd reduction(+ : res)
|
||||
for (int i = 0; i < n; i++)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
/** dot product of complex and real
|
||||
*/
|
||||
template<typename T, typename TSUM>
|
||||
inline std::complex<TSUM> dot(const std::complex<T>* restrict a,
|
||||
const T* restrict b,
|
||||
int n,
|
||||
const std::complex<TSUM>& res)
|
||||
{
|
||||
TSUM res_r = res.real(), res_i = res.imag();
|
||||
const T* restrict iptr = reinterpret_cast<T*>(a);
|
||||
ASSUMED_ALIGNED(iptr);
|
||||
ASSUMED_ALIGNED(b);
|
||||
for (int i = 0, i2 = 0; i < n; i++, i2 += 2)
|
||||
{
|
||||
res_r += b[i] * iptr[i2];
|
||||
res_i += b[i] * iptr[i2 + 1];
|
||||
}
|
||||
return std::complex<T>(res_r, res_i);
|
||||
}
|
||||
|
||||
template<typename T, typename TSUM>
|
||||
inline std::complex<TSUM> dot(const T* restrict a,
|
||||
const std::complex<T>* restrict b,
|
||||
int n,
|
||||
const std::complex<TSUM>& res)
|
||||
{
|
||||
return dot(b, a, n, res);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void axpy(T alpha, const T* restrict in, T* restrict out, int n)
|
||||
{
|
||||
#pragma omp simd aligned(in, out)
|
||||
for (int i = 0; i < n; ++i)
|
||||
out[i] = alpha * in[i];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void scal(T alpha, T* restrict inout, int n)
|
||||
{
|
||||
#pragma omp simd aligned(inout)
|
||||
for (int i = 0; i < n; ++i)
|
||||
inout[i] *= alpha;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void accumulate_phases(const int& n,
|
||||
const std::complex<T>* restrict x_,
|
||||
const std::complex<T>* restrict y_,
|
||||
T& rN,
|
||||
T& iN,
|
||||
T& riN)
|
||||
{
|
||||
const T* restrict x = reinterpret_cast<T*>(x_);
|
||||
const T* restrict y = reinterpret_cast<T*>(y_);
|
||||
for (int i = 0, i2 = 0; i < n; ++i, i2 += 2)
|
||||
{
|
||||
T tr = x[i2] * y[i2] - x[i2 + 1] * y[i2 + 1];
|
||||
T ti = x[i2] * y[i2 + 1] + x[i2 + 1] * y[i2];
|
||||
//T tr=x[i].real()*y[i].real()-x[i].imag()*y[i].imag();
|
||||
//T ti=x[i].real()*y[i].imag()+x[i].imag()*y[i].real();
|
||||
rN += tr * tr;
|
||||
iN += ti * ti;
|
||||
riN += tr * ti;
|
||||
} //
|
||||
}
|
||||
} // namespace blas
|
||||
|
||||
} // namespace qmcplusplus
|
||||
#endif
|
|
@ -15,7 +15,6 @@
|
|||
#ifndef QMCPLUSPLUS_EINSPLINE_BSPLINE_ALLOCATOR_H
|
||||
#define QMCPLUSPLUS_EINSPLINE_BSPLINE_ALLOCATOR_H
|
||||
|
||||
#include <simd/Mallocator.hpp>
|
||||
#include <simd/simd.hpp>
|
||||
#include <spline2/bspline_traits.hpp>
|
||||
#include "simd/allocator.hpp"
|
||||
|
@ -43,7 +42,7 @@ inline void find_coefs_1d(Ugrid grid, BCtype_s bc, float* data, intptr_t dstride
|
|||
}
|
||||
} // namespace einspline
|
||||
|
||||
template<typename T, size_t ALIGN = QMC_CLINE, typename ALLOC = Mallocator<T, ALIGN>>
|
||||
template<typename T, typename ALLOC = aligned_allocator<T>>
|
||||
class BsplineAllocator
|
||||
{
|
||||
using SplineType = typename bspline_traits<T, 3>::SplineType;
|
||||
|
@ -100,8 +99,8 @@ public:
|
|||
void copy(UBT* single, MBT* multi, int i, const int* offset, const int* N);
|
||||
};
|
||||
|
||||
template<typename T, size_t ALIGN, typename ALLOC>
|
||||
typename BsplineAllocator<T, ALIGN, ALLOC>::SplineType* BsplineAllocator<T, ALIGN, ALLOC>::allocateMultiBspline(
|
||||
template<typename T, typename ALLOC>
|
||||
typename BsplineAllocator<T, ALLOC>::SplineType* BsplineAllocator<T, ALLOC>::allocateMultiBspline(
|
||||
Ugrid x_grid,
|
||||
Ugrid y_grid,
|
||||
Ugrid z_grid,
|
||||
|
@ -150,7 +149,7 @@ typename BsplineAllocator<T, ALIGN, ALLOC>::SplineType* BsplineAllocator<T, ALIG
|
|||
z_grid.delta_inv = 1.0 / z_grid.delta;
|
||||
spline->z_grid = z_grid;
|
||||
|
||||
const int N = getAlignedSize<real_type, ALIGN>(num_splines);
|
||||
const int N = getAlignedSize<real_type, ALLOC::alignment>(num_splines);
|
||||
|
||||
spline->x_stride = (size_t)Ny * (size_t)Nz * (size_t)N;
|
||||
spline->y_stride = Nz * N;
|
||||
|
@ -162,8 +161,8 @@ typename BsplineAllocator<T, ALIGN, ALLOC>::SplineType* BsplineAllocator<T, ALIG
|
|||
return spline;
|
||||
}
|
||||
|
||||
template<typename T, size_t ALIGN, typename ALLOC>
|
||||
typename BsplineAllocator<T, ALIGN, ALLOC>::SingleSplineType* BsplineAllocator<T, ALIGN, ALLOC>::allocateUBspline(
|
||||
template<typename T, typename ALLOC>
|
||||
typename BsplineAllocator<T, ALLOC>::SingleSplineType* BsplineAllocator<T, ALLOC>::allocateUBspline(
|
||||
Ugrid x_grid,
|
||||
Ugrid y_grid,
|
||||
Ugrid z_grid,
|
||||
|
@ -252,9 +251,9 @@ typename BsplineAllocator<T, ALIGN, ALLOC>::SingleSplineType* BsplineAllocator<T
|
|||
return spline;
|
||||
}
|
||||
|
||||
template<typename T, size_t ALIGN, typename ALLOC>
|
||||
template<typename T, typename ALLOC>
|
||||
template<typename UBT, typename MBT>
|
||||
void BsplineAllocator<T, ALIGN, ALLOC>::copy(UBT* single, MBT* multi, int i, const int* offset, const int* N)
|
||||
void BsplineAllocator<T, ALLOC>::copy(UBT* single, MBT* multi, int i, const int* offset, const int* N)
|
||||
{
|
||||
typedef typename bspline_type<MBT>::value_type out_type;
|
||||
typedef typename bspline_type<UBT>::value_type in_type;
|
||||
|
|
|
@ -28,12 +28,11 @@ namespace qmcplusplus
|
|||
{
|
||||
/** container class to hold a 3D multi spline pointer and BsplineAllocator
|
||||
* @tparam T the precision of splines
|
||||
* @tparam ALIGN the alignment of the orbital dimension
|
||||
* @tparam ALLOC memory allocator
|
||||
*
|
||||
* This class contains a pointer to a C object, copy and assign of this class is forbidden.
|
||||
*/
|
||||
template<typename T, size_t ALIGN = QMC_CLINE, typename ALLOC = Mallocator<T, ALIGN>>
|
||||
template<typename T, typename ALLOC = aligned_allocator<T>>
|
||||
class MultiBspline
|
||||
{
|
||||
private:
|
||||
|
@ -44,7 +43,7 @@ private:
|
|||
///actual einspline multi-bspline object
|
||||
SplineType* spline_m;
|
||||
///use allocator
|
||||
BsplineAllocator<T, ALIGN, ALLOC> myAllocator;
|
||||
BsplineAllocator<T, ALLOC> myAllocator;
|
||||
|
||||
public:
|
||||
MultiBspline() : spline_m(nullptr) {}
|
||||
|
@ -70,7 +69,7 @@ public:
|
|||
void create(GT& grid, BCT& bc, int num_splines)
|
||||
{
|
||||
static_assert(std::is_same<T, typename ALLOC::value_type>::value, "MultiBspline and ALLOC data types must agree!");
|
||||
if (getAlignedSize<T, ALIGN>(num_splines) != num_splines)
|
||||
if (getAlignedSize<T, ALLOC::alignment>(num_splines) != num_splines)
|
||||
throw std::runtime_error("When creating the data space of MultiBspline, num_splines must be padded!\n");
|
||||
if (spline_m == nullptr)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue