Merge pull request #2193 from ye-luo/fix-ALIGN

Remove ALIGN as independent template parameter when memory allocator is available
This commit is contained in:
Ye Luo 2020-01-10 10:51:57 +08:00 committed by GitHub
commit 0d77ef7d7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 23 additions and 157 deletions

View File

@ -15,8 +15,6 @@
#ifndef QMCPLUSPLUS_SOA_FAST_PARTICLE_OPERATORS_H
#define QMCPLUSPLUS_SOA_FAST_PARTICLE_OPERATORS_H
#include <simd/blas1.hpp>
namespace qmcplusplus
{
//Need to reorg

View File

@ -29,7 +29,7 @@ namespace qmcplusplus
* @tparm T data type, float, double, complex<float>, complex<double>
* @tparm Alloc memory allocator
*/
template<typename T, unsigned D, size_t ALIGN = QMC_CLINE, typename Alloc = Mallocator<T, ALIGN>>
template<typename T, unsigned D, typename Alloc = aligned_allocator<T>>
struct VectorSoaContainer
{
using AoSElement_t = TinyVector<T, D>;
@ -109,7 +109,7 @@ struct VectorSoaContainer
if (isRefAttached())
throw std::runtime_error("Resize not allowed on VectorSoaContainer constructed by initialized memory.");
size_t n_padded = getAlignedSize<T, ALIGN>(n);
size_t n_padded = getAlignedSize<T, Alloc::alignment>(n);
if (n_padded * D > nAllocated)
{

View File

@ -29,11 +29,11 @@ using PinnedAllocator = CUDALockedPageAllocator<T>;
using PinnedAllocator = std::allocator<T>;
#endif
template<typename T>
template<typename T, size_t ALIGN = QMC_CLINE>
#ifdef ENABLE_CUDA
using PinnedAlignedAllocator = CUDALockedPageAllocator<T, aligned_allocator<T>>;
using PinnedAlignedAllocator = CUDALockedPageAllocator<T, aligned_allocator<T, ALIGN>>;
#else
using PinnedAlignedAllocator = aligned_allocator<T>;
using PinnedAlignedAllocator = aligned_allocator<T, ALIGN>;
#endif
} // namespace qmcplusplus

View File

@ -38,7 +38,6 @@ template<typename ST>
class SplineC2ROMP : public BsplineSet
{
public:
static const int ALIGN = QMC_CLINE;
template<typename DT>
using OffloadAllocator = OMPallocator<DT, aligned_allocator<DT>>;
template<typename DT>
@ -69,7 +68,7 @@ private:
///number of complex bands
int nComplexBands;
///multi bspline set
std::shared_ptr<MultiBspline<ST, ALIGN, OffloadAllocator<ST>>> SplineInst;
std::shared_ptr<MultiBspline<ST, OffloadAllocator<ST>>> SplineInst;
vContainer_type mKK;
VectorSoaContainer<ST, 3> myKcart;
@ -159,7 +158,7 @@ public:
void create_spline(GT& xyz_g, BCT& xyz_bc)
{
resize_kpoints();
SplineInst = std::make_shared<MultiBspline<ST, ALIGN, OffloadAllocator<ST>>>();
SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>>>();
SplineInst->create(xyz_g, xyz_bc, myV.size());
app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "

View File

@ -31,7 +31,7 @@ namespace qmcplusplus
* The scalar part works as PooledData, all the values are static_cast to T_scalar.
*/
#define DEFAULT_PAGE_SIZE 4096
template<typename T_scalar = OHMMS_PRECISION_FULL, typename Alloc = Mallocator<char, DEFAULT_PAGE_SIZE>>
template<typename T_scalar = OHMMS_PRECISION_FULL, typename Alloc = aligned_allocator<char, DEFAULT_PAGE_SIZE>>
struct PooledMemory
{
typedef char T;

View File

@ -29,6 +29,8 @@ struct Mallocator
typedef T* pointer;
typedef const T* const_pointer;
static constexpr size_t alignment = ALIGN;
Mallocator() = default;
template<class U>
Mallocator(const Mallocator<U, ALIGN>&)

View File

@ -21,8 +21,8 @@
namespace qmcplusplus
{
template<class T>
using aligned_allocator = Mallocator<T, QMC_CLINE>;
template<class T, size_t ALIGN = QMC_CLINE>
using aligned_allocator = Mallocator<T, ALIGN>;
template<class T>
using aligned_vector = std::vector<T, aligned_allocator<T>>;

View File

@ -1,131 +0,0 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
//
// File developed by:
//
// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
//////////////////////////////////////////////////////////////////////////////////////
// -*- C++ -*-
/** @file blas1.hpp
*/
#ifndef QMCPLUSPLUS_SIMD_BLAS1_HPP
#define QMCPLUSPLUS_SIMD_BLAS1_HPP
#include <complex>
namespace qmcplusplus
{
//going to replace simd
namespace blas
{
/** copy function using memcpy
* @param source starting address of the source
* @param target starting address of the target
* @param n size of the data to copy
*/
template<typename T1, typename T2>
inline void copy(const T1* restrict source, T2* restrict target, size_t n)
{
std::copy(source, source + n, target);
}
/** copy complex to two real containers */
template<typename T1, typename T2>
inline void copy(const std::complex<T1>* restrict source, T2* restrict target_r, T2* restrict target_i, size_t n)
{
const T1* restrict iptr = reinterpret_cast<T1*>(source);
ASSUMED_ALIGNED(source);
ASSUMED_ALIGNED(iptr);
for (int i = 0, i2 = 0; i < n; ++i, i2 += 2)
{
target_r[i] = static_cast<T2>(iptr[i2 + 0]);
target_i[i] = static_cast<T2>(iptr[i2 + 1]);
}
}
/** dot product
*/
template<typename T, typename TSUM>
inline TSUM dot(const T* restrict a, const T* restrict b, int n, TSUM res)
{
ASSUMED_ALIGNED(a);
ASSUMED_ALIGNED(b);
#pragma omp simd reduction(+ : res)
for (int i = 0; i < n; i++)
res += a[i] * b[i];
return res;
}
/** dot product of complex and real
*/
template<typename T, typename TSUM>
inline std::complex<TSUM> dot(const std::complex<T>* restrict a,
const T* restrict b,
int n,
const std::complex<TSUM>& res)
{
TSUM res_r = res.real(), res_i = res.imag();
const T* restrict iptr = reinterpret_cast<T*>(a);
ASSUMED_ALIGNED(iptr);
ASSUMED_ALIGNED(b);
for (int i = 0, i2 = 0; i < n; i++, i2 += 2)
{
res_r += b[i] * iptr[i2];
res_i += b[i] * iptr[i2 + 1];
}
return std::complex<T>(res_r, res_i);
}
template<typename T, typename TSUM>
inline std::complex<TSUM> dot(const T* restrict a,
const std::complex<T>* restrict b,
int n,
const std::complex<TSUM>& res)
{
return dot(b, a, n, res);
}
template<typename T>
inline void axpy(T alpha, const T* restrict in, T* restrict out, int n)
{
#pragma omp simd aligned(in, out)
for (int i = 0; i < n; ++i)
out[i] = alpha * in[i];
}
template<typename T>
inline void scal(T alpha, T* restrict inout, int n)
{
#pragma omp simd aligned(inout)
for (int i = 0; i < n; ++i)
inout[i] *= alpha;
}
template<typename T>
inline void accumulate_phases(const int& n,
const std::complex<T>* restrict x_,
const std::complex<T>* restrict y_,
T& rN,
T& iN,
T& riN)
{
const T* restrict x = reinterpret_cast<T*>(x_);
const T* restrict y = reinterpret_cast<T*>(y_);
for (int i = 0, i2 = 0; i < n; ++i, i2 += 2)
{
T tr = x[i2] * y[i2] - x[i2 + 1] * y[i2 + 1];
T ti = x[i2] * y[i2 + 1] + x[i2 + 1] * y[i2];
//T tr=x[i].real()*y[i].real()-x[i].imag()*y[i].imag();
//T ti=x[i].real()*y[i].imag()+x[i].imag()*y[i].real();
rN += tr * tr;
iN += ti * ti;
riN += tr * ti;
} //
}
} // namespace blas
} // namespace qmcplusplus
#endif

View File

@ -15,7 +15,6 @@
#ifndef QMCPLUSPLUS_EINSPLINE_BSPLINE_ALLOCATOR_H
#define QMCPLUSPLUS_EINSPLINE_BSPLINE_ALLOCATOR_H
#include <simd/Mallocator.hpp>
#include <simd/simd.hpp>
#include <spline2/bspline_traits.hpp>
#include "simd/allocator.hpp"
@ -43,7 +42,7 @@ inline void find_coefs_1d(Ugrid grid, BCtype_s bc, float* data, intptr_t dstride
}
} // namespace einspline
template<typename T, size_t ALIGN = QMC_CLINE, typename ALLOC = Mallocator<T, ALIGN>>
template<typename T, typename ALLOC = aligned_allocator<T>>
class BsplineAllocator
{
using SplineType = typename bspline_traits<T, 3>::SplineType;
@ -100,8 +99,8 @@ public:
void copy(UBT* single, MBT* multi, int i, const int* offset, const int* N);
};
template<typename T, size_t ALIGN, typename ALLOC>
typename BsplineAllocator<T, ALIGN, ALLOC>::SplineType* BsplineAllocator<T, ALIGN, ALLOC>::allocateMultiBspline(
template<typename T, typename ALLOC>
typename BsplineAllocator<T, ALLOC>::SplineType* BsplineAllocator<T, ALLOC>::allocateMultiBspline(
Ugrid x_grid,
Ugrid y_grid,
Ugrid z_grid,
@ -150,7 +149,7 @@ typename BsplineAllocator<T, ALIGN, ALLOC>::SplineType* BsplineAllocator<T, ALIG
z_grid.delta_inv = 1.0 / z_grid.delta;
spline->z_grid = z_grid;
const int N = getAlignedSize<real_type, ALIGN>(num_splines);
const int N = getAlignedSize<real_type, ALLOC::alignment>(num_splines);
spline->x_stride = (size_t)Ny * (size_t)Nz * (size_t)N;
spline->y_stride = Nz * N;
@ -162,8 +161,8 @@ typename BsplineAllocator<T, ALIGN, ALLOC>::SplineType* BsplineAllocator<T, ALIG
return spline;
}
template<typename T, size_t ALIGN, typename ALLOC>
typename BsplineAllocator<T, ALIGN, ALLOC>::SingleSplineType* BsplineAllocator<T, ALIGN, ALLOC>::allocateUBspline(
template<typename T, typename ALLOC>
typename BsplineAllocator<T, ALLOC>::SingleSplineType* BsplineAllocator<T, ALLOC>::allocateUBspline(
Ugrid x_grid,
Ugrid y_grid,
Ugrid z_grid,
@ -252,9 +251,9 @@ typename BsplineAllocator<T, ALIGN, ALLOC>::SingleSplineType* BsplineAllocator<T
return spline;
}
template<typename T, size_t ALIGN, typename ALLOC>
template<typename T, typename ALLOC>
template<typename UBT, typename MBT>
void BsplineAllocator<T, ALIGN, ALLOC>::copy(UBT* single, MBT* multi, int i, const int* offset, const int* N)
void BsplineAllocator<T, ALLOC>::copy(UBT* single, MBT* multi, int i, const int* offset, const int* N)
{
typedef typename bspline_type<MBT>::value_type out_type;
typedef typename bspline_type<UBT>::value_type in_type;

View File

@ -28,12 +28,11 @@ namespace qmcplusplus
{
/** container class to hold a 3D multi spline pointer and BsplineAllocator
* @tparam T the precision of splines
* @tparam ALIGN the alignment of the orbital dimension
* @tparam ALLOC memory allocator
*
* This class contains a pointer to a C object, copy and assign of this class is forbidden.
*/
template<typename T, size_t ALIGN = QMC_CLINE, typename ALLOC = Mallocator<T, ALIGN>>
template<typename T, typename ALLOC = aligned_allocator<T>>
class MultiBspline
{
private:
@ -44,7 +43,7 @@ private:
///actual einspline multi-bspline object
SplineType* spline_m;
///use allocator
BsplineAllocator<T, ALIGN, ALLOC> myAllocator;
BsplineAllocator<T, ALLOC> myAllocator;
public:
MultiBspline() : spline_m(nullptr) {}
@ -70,7 +69,7 @@ public:
void create(GT& grid, BCT& bc, int num_splines)
{
static_assert(std::is_same<T, typename ALLOC::value_type>::value, "MultiBspline and ALLOC data types must agree!");
if (getAlignedSize<T, ALIGN>(num_splines) != num_splines)
if (getAlignedSize<T, ALLOC::alignment>(num_splines) != num_splines)
throw std::runtime_error("When creating the data space of MultiBspline, num_splines must be padded!\n");
if (spline_m == nullptr)
{