Merge branch 'develop' into spinorset_resources

2022-09-09 07:27:53 -06:00 · 2022-09-09 07:27:53 -06:00 · 682f442a9f
parent eab8931d99 353654b72c
commit 682f442a9f
14 changed files with 469 additions and 1355 deletions
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@ -57,11 +57,10 @@ set(JASTROW_SRCS
    Jastrow/CountingJastrowBuilder.cpp
    Jastrow/RPAJastrow.cpp
    Jastrow/J1OrbitalSoA.cpp
-    Jastrow/J2OrbitalSoA.cpp
    LatticeGaussianProduct.cpp
    LatticeGaussianProductBuilder.cpp)
 set(JASTROW_OMPTARGET_SRCS
-    Jastrow/J2OMPTarget.cpp
+    Jastrow/TwoBodyJastrow.cpp
    Jastrow/BsplineFunctor.cpp)
 set(FERMION_SRCS ${FERMION_SRCS} ElectronGas/FreeOrbital.cpp ElectronGas/FreeOrbitalBuilder.cpp)

@ -144,7 +143,7 @@ set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} Fermion/MultiSlaterDetTable
 if(QMC_CUDA)
  set(FERMION_SRCS ${FERMION_SRCS} EinsplineSetCuda.cpp Fermion/DiracDeterminantCUDA.cpp Fermion/SlaterDetCUDA.cpp
                   TrialWaveFunction_CUDA.cpp)
-  set(JASTROW_SRCS ${JASTROW_SRCS} Jastrow/OneBodyJastrowOrbitalBspline.cpp Jastrow/TwoBodyJastrowOrbitalBspline.cpp)
+  set(JASTROW_SRCS ${JASTROW_SRCS} Jastrow/OneBodyJastrowCUDA.cpp Jastrow/TwoBodyJastrowCUDA.cpp)
 endif()

 ####################################
--- a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp
+++ b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp
@ -1,710 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2021 QMCPACK developers.
-//
-// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
-//                    Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
-//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-//
-// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
-//////////////////////////////////////////////////////////////////////////////////////
-// -*- C++ -*-
-
-
-#include "J2OrbitalSoA.h"
-#include "CPU/SIMD/algorithm.hpp"
-#include "ParticleBase/ParticleAttribOps.h"
-
-namespace qmcplusplus
-{
-template<typename FT>
-void J2OrbitalSoA<FT>::extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs)
-{
-  for (auto& [key, functor] : J2Unique)
-    opt_obj_refs.push_back(*functor);
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::checkOutVariables(const opt_variables_type& active)
-{
-  myVars.clear();
-  for (auto& [key, functor] : J2Unique)
-  {
-    functor->myVars.getIndex(active);
-    myVars.insertFrom(functor->myVars);
-  }
-  // Remove inactive variables so the mappings are correct
-  myVars.removeInactive();
-
-  myVars.getIndex(active);
-
-  const size_t NumVars = myVars.size();
-  if (NumVars)
-  {
-    OffSet.resize(F.size());
-    // Find first active variable for the starting offset
-    int varoffset = -1;
-    for (int i = 0; i < myVars.size(); i++)
-    {
-      varoffset = myVars.Index[i];
-      if (varoffset != -1)
-        break;
-    }
-
-    for (int i = 0; i < F.size(); ++i)
-    {
-      if (F[i] && F[i]->myVars.Index.size())
-      {
-        OffSet[i].first  = F[i]->myVars.Index.front() - varoffset;
-        OffSet[i].second = F[i]->myVars.Index.size() + OffSet[i].first;
-      }
-      else
-      {
-        OffSet[i].first = OffSet[i].second = -1;
-      }
-    }
-  }
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
-{
-  for (int k = 0; k < ratios.size(); ++k)
-    ratios[k] =
-        std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTableAB(my_table_ID_).getDistRow(k)));
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::registerData(ParticleSet& P, WFBufferType& buf)
-{
-  if (Bytes_in_WFBuffer == 0)
-  {
-    Bytes_in_WFBuffer = buf.current();
-    buf.add(Uat.begin(), Uat.end());
-    buf.add(dUat.data(), dUat.end());
-    buf.add(d2Uat.begin(), d2Uat.end());
-    Bytes_in_WFBuffer = buf.current() - Bytes_in_WFBuffer;
-    // free local space
-    Uat.free();
-    dUat.free();
-    d2Uat.free();
-  }
-  else
-  {
-    buf.forward(Bytes_in_WFBuffer);
-  }
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
-{
-  Uat.attachReference(buf.lendReference<valT>(N), N);
-  dUat.attachReference(N, N_padded, buf.lendReference<valT>(N_padded * OHMMS_DIM));
-  d2Uat.attachReference(buf.lendReference<valT>(N), N);
-}
-
-template<typename FT>
-typename J2OrbitalSoA<FT>::LogValueType J2OrbitalSoA<FT>::updateBuffer(ParticleSet& P,
-                                                                       WFBufferType& buf,
-                                                                       bool fromscratch)
-{
-  log_value_ = computeGL(P.G, P.L);
-  buf.forward(Bytes_in_WFBuffer);
-  return log_value_;
-}
-
-template<typename FT>
-typename J2OrbitalSoA<FT>::valT J2OrbitalSoA<FT>::computeU(const ParticleSet& P, int iat, const DistRow& dist)
-{
-  valT curUat(0);
-  const int igt = P.GroupID[iat] * NumGroups;
-  for (int jg = 0; jg < NumGroups; ++jg)
-  {
-    const FuncType& f2(*F[igt + jg]);
-    int iStart = P.first(jg);
-    int iEnd   = P.last(jg);
-    curUat += f2.evaluateV(iat, iStart, iEnd, dist.data(), DistCompressed.data());
-  }
-  return curUat;
-}
-
-template<typename FT>
-typename J2OrbitalSoA<FT>::posT J2OrbitalSoA<FT>::accumulateG(const valT* restrict du, const DisplRow& displ) const
-{
-  posT grad;
-  for (int idim = 0; idim < OHMMS_DIM; ++idim)
-  {
-    const valT* restrict dX = displ.data(idim);
-    valT s                  = valT();
-
-#pragma omp simd reduction(+ : s) aligned(du, dX : QMC_SIMD_ALIGNMENT)
-    for (int jat = 0; jat < N; ++jat)
-      s += du[jat] * dX[jat];
-    grad[idim] = s;
-  }
-  return grad;
-}
-
-template<typename FT>
-J2OrbitalSoA<FT>::J2OrbitalSoA(const std::string& obj_name, ParticleSet& p)
-    : WaveFunctionComponent(obj_name),
-      my_table_ID_(p.addTable(p, DTModes::NEED_TEMP_DATA_ON_HOST | DTModes::NEED_VP_FULL_TABLE_ON_HOST)),
-      j2_ke_corr_helper(p, F)
-{
-  if (my_name_.empty())
-    throw std::runtime_error("J2OrbitalSoA object name cannot be empty!");
-  init(p);
-  KEcorr = 0.0;
-}
-
-template<typename FT>
-J2OrbitalSoA<FT>::~J2OrbitalSoA() = default;
-
-template<typename FT>
-void J2OrbitalSoA<FT>::init(ParticleSet& p)
-{
-  N         = p.getTotalNum();
-  N_padded  = getAlignedSize<valT>(N);
-  NumGroups = p.groups();
-
-  Uat.resize(N);
-  dUat.resize(N);
-  d2Uat.resize(N);
-  cur_u.resize(N);
-  cur_du.resize(N);
-  cur_d2u.resize(N);
-  old_u.resize(N);
-  old_du.resize(N);
-  old_d2u.resize(N);
-  F.resize(NumGroups * NumGroups, nullptr);
-  DistCompressed.resize(N);
-  DistIndice.resize(N);
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
-{
-  assert(ia < NumGroups);
-  assert(ib < NumGroups);
-  if (ia == ib)
-  {
-    if (ia == 0) //first time, assign everything
-    {
-      int ij = 0;
-      for (int ig = 0; ig < NumGroups; ++ig)
-        for (int jg = 0; jg < NumGroups; ++jg, ++ij)
-          if (F[ij] == nullptr)
-            F[ij] = j.get();
-    }
-    else
-      F[ia * NumGroups + ib] = j.get();
-  }
-  else
-  {
-    // a very special case, 1 particle of each type (e.g. 1 up + 1 down)
-    // uu/dd/etc. was prevented by the builder
-    if (N == NumGroups)
-      for (int ig = 0; ig < NumGroups; ++ig)
-        F[ig * NumGroups + ig] = j.get();
-    // generic case
-    F[ia * NumGroups + ib] = j.get();
-    F[ib * NumGroups + ia] = j.get();
-  }
-  std::stringstream aname;
-  aname << ia << ib;
-  J2Unique[aname.str()] = std::move(j);
-}
-
-template<typename FT>
-std::unique_ptr<WaveFunctionComponent> J2OrbitalSoA<FT>::makeClone(ParticleSet& tqp) const
-{
-  auto j2copy = std::make_unique<J2OrbitalSoA<FT>>(my_name_, tqp);
-  std::map<const FT*, FT*> fcmap;
-  for (int ig = 0; ig < NumGroups; ++ig)
-    for (int jg = ig; jg < NumGroups; ++jg)
-    {
-      int ij = ig * NumGroups + jg;
-      if (F[ij] == 0)
-        continue;
-      typename std::map<const FT*, FT*>::iterator fit = fcmap.find(F[ij]);
-      if (fit == fcmap.end())
-      {
-        auto fc      = std::make_unique<FT>(*F[ij]);
-        fcmap[F[ij]] = fc.get();
-        j2copy->addFunc(ig, jg, std::move(fc));
-      }
-    }
-  j2copy->KEcorr = KEcorr;
-
-  j2copy->myVars.clear();
-  j2copy->myVars.insertFrom(myVars);
-  j2copy->OffSet = OffSet;
-
-  return j2copy;
-}
-
-/** intenal function to compute \f$\sum_j u(r_j), du/dr, d2u/dr2\f$
- * @param P particleset
- * @param iat particle index
- * @param dist starting distance
- * @param u starting value
- * @param du starting first deriv
- * @param d2u starting second deriv
- */
-template<typename FT>
-void J2OrbitalSoA<FT>::computeU3(const ParticleSet& P,
-                                 int iat,
-                                 const DistRow& dist,
-                                 RealType* restrict u,
-                                 RealType* restrict du,
-                                 RealType* restrict d2u,
-                                 bool triangle)
-{
-  const int jelmax = triangle ? iat : N;
-  constexpr valT czero(0);
-  std::fill_n(u, jelmax, czero);
-  std::fill_n(du, jelmax, czero);
-  std::fill_n(d2u, jelmax, czero);
-
-  const int igt = P.GroupID[iat] * NumGroups;
-  for (int jg = 0; jg < NumGroups; ++jg)
-  {
-    const FuncType& f2(*F[igt + jg]);
-    int iStart = P.first(jg);
-    int iEnd   = std::min(jelmax, P.last(jg));
-    f2.evaluateVGL(iat, iStart, iEnd, dist.data(), u, du, d2u, DistCompressed.data(), DistIndice.data());
-  }
-  //u[iat]=czero;
-  //du[iat]=czero;
-  //d2u[iat]=czero;
-}
-
-template<typename FT>
-typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratio(ParticleSet& P, int iat)
-{
-  //only ratio, ready to compute it again
-  UpdateMode = ORB_PBYP_RATIO;
-  cur_Uat    = computeU(P, iat, P.getDistTableAA(my_table_ID_).getTempDists());
-  return std::exp(static_cast<PsiValueType>(Uat[iat] - cur_Uat));
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
-{
-  const auto& d_table = P.getDistTableAA(my_table_ID_);
-  const auto& dist    = d_table.getTempDists();
-
-  for (int ig = 0; ig < NumGroups; ++ig)
-  {
-    const int igt = ig * NumGroups;
-    valT sumU(0);
-    for (int jg = 0; jg < NumGroups; ++jg)
-    {
-      const FuncType& f2(*F[igt + jg]);
-      int iStart = P.first(jg);
-      int iEnd   = P.last(jg);
-      sumU += f2.evaluateV(-1, iStart, iEnd, dist.data(), DistCompressed.data());
-    }
-
-    for (int i = P.first(ig); i < P.last(ig); ++i)
-    {
-      // remove self-interaction
-      const valT Uself = F[igt + ig]->evaluate(dist[i]);
-      ratios[i]        = std::exp(Uat[i] + Uself - sumU);
-    }
-  }
-}
-
-template<typename FT>
-typename J2OrbitalSoA<FT>::GradType J2OrbitalSoA<FT>::evalGrad(ParticleSet& P, int iat)
-{
-  return GradType(dUat[iat]);
-}
-
-template<typename FT>
-typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
-{
-  UpdateMode = ORB_PBYP_PARTIAL;
-
-  computeU3(P, iat, P.getDistTableAA(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data());
-  cur_Uat = simd::accumulate_n(cur_u.data(), N, valT());
-  DiffVal = Uat[iat] - cur_Uat;
-  grad_iat += accumulateG(cur_du.data(), P.getDistTableAA(my_table_ID_).getTempDispls());
-  return std::exp(static_cast<PsiValueType>(DiffVal));
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
-{
-  // get the old u, du, d2u
-  const auto& d_table = P.getDistTableAA(my_table_ID_);
-  computeU3(P, iat, d_table.getOldDists(), old_u.data(), old_du.data(), old_d2u.data());
-  if (UpdateMode == ORB_PBYP_RATIO)
-  { //ratio-only during the move; need to compute derivatives
-    const auto& dist = d_table.getTempDists();
-    computeU3(P, iat, dist, cur_u.data(), cur_du.data(), cur_d2u.data());
-  }
-
-  valT cur_d2Uat(0);
-  const auto& new_dr    = d_table.getTempDispls();
-  const auto& old_dr    = d_table.getOldDispls();
-  constexpr valT lapfac = OHMMS_DIM - RealType(1);
-#pragma omp simd reduction(+ : cur_d2Uat)
-  for (int jat = 0; jat < N; jat++)
-  {
-    const valT du   = cur_u[jat] - old_u[jat];
-    const valT newl = cur_d2u[jat] + lapfac * cur_du[jat];
-    const valT dl   = old_d2u[jat] + lapfac * old_du[jat] - newl;
-    Uat[jat] += du;
-    d2Uat[jat] += dl;
-    cur_d2Uat -= newl;
-  }
-  posT cur_dUat;
-  for (int idim = 0; idim < OHMMS_DIM; ++idim)
-  {
-    const valT* restrict new_dX    = new_dr.data(idim);
-    const valT* restrict old_dX    = old_dr.data(idim);
-    const valT* restrict cur_du_pt = cur_du.data();
-    const valT* restrict old_du_pt = old_du.data();
-    valT* restrict save_g          = dUat.data(idim);
-    valT cur_g                     = cur_dUat[idim];
-#pragma omp simd reduction(+ : cur_g) aligned(old_dX, new_dX, save_g, cur_du_pt, old_du_pt : QMC_SIMD_ALIGNMENT)
-    for (int jat = 0; jat < N; jat++)
-    {
-      const valT newg = cur_du_pt[jat] * new_dX[jat];
-      const valT dg   = newg - old_du_pt[jat] * old_dX[jat];
-      save_g[jat] -= dg;
-      cur_g += newg;
-    }
-    cur_dUat[idim] = cur_g;
-  }
-  log_value_ += Uat[iat] - cur_Uat;
-  Uat[iat]   = cur_Uat;
-  dUat(iat)  = cur_dUat;
-  d2Uat[iat] = cur_d2Uat;
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::recompute(const ParticleSet& P)
-{
-  const auto& d_table = P.getDistTableAA(my_table_ID_);
-  for (int ig = 0; ig < NumGroups; ++ig)
-  {
-    for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat)
-    {
-      computeU3(P, iat, d_table.getDistRow(iat), cur_u.data(), cur_du.data(), cur_d2u.data(), true);
-      Uat[iat] = simd::accumulate_n(cur_u.data(), iat, valT());
-      posT grad;
-      valT lap(0);
-      const valT* restrict u   = cur_u.data();
-      const valT* restrict du  = cur_du.data();
-      const valT* restrict d2u = cur_d2u.data();
-      const auto& displ        = d_table.getDisplRow(iat);
-      constexpr valT lapfac    = OHMMS_DIM - RealType(1);
-#pragma omp simd reduction(+ : lap) aligned(du, d2u : QMC_SIMD_ALIGNMENT)
-      for (int jat = 0; jat < iat; ++jat)
-        lap += d2u[jat] + lapfac * du[jat];
-      for (int idim = 0; idim < OHMMS_DIM; ++idim)
-      {
-        const valT* restrict dX = displ.data(idim);
-        valT s                  = valT();
-#pragma omp simd reduction(+ : s) aligned(du, dX : QMC_SIMD_ALIGNMENT)
-        for (int jat = 0; jat < iat; ++jat)
-          s += du[jat] * dX[jat];
-        grad[idim] = s;
-      }
-      dUat(iat)  = grad;
-      d2Uat[iat] = -lap;
-// add the contribution from the upper triangle
-#pragma omp simd aligned(u, du, d2u : QMC_SIMD_ALIGNMENT)
-      for (int jat = 0; jat < iat; jat++)
-      {
-        Uat[jat] += u[jat];
-        d2Uat[jat] -= d2u[jat] + lapfac * du[jat];
-      }
-      for (int idim = 0; idim < OHMMS_DIM; ++idim)
-      {
-        valT* restrict save_g   = dUat.data(idim);
-        const valT* restrict dX = displ.data(idim);
-#pragma omp simd aligned(save_g, du, dX : QMC_SIMD_ALIGNMENT)
-        for (int jat = 0; jat < iat; jat++)
-          save_g[jat] -= du[jat] * dX[jat];
-      }
-    }
-  }
-}
-
-template<typename FT>
-typename J2OrbitalSoA<FT>::LogValueType J2OrbitalSoA<FT>::evaluateLog(const ParticleSet& P,
-                                                                      ParticleSet::ParticleGradient& G,
-                                                                      ParticleSet::ParticleLaplacian& L)
-{
-  recompute(P);
-  return log_value_ = computeGL(G, L);
-}
-
-template<typename FT>
-typename J2OrbitalSoA<FT>::QTFull::RealType J2OrbitalSoA<FT>::computeGL(ParticleSet::ParticleGradient& G,
-                                                                        ParticleSet::ParticleLaplacian& L) const
-{
-  for (int iat = 0; iat < N; ++iat)
-  {
-    G[iat] += dUat[iat];
-    L[iat] += d2Uat[iat];
-  }
-  return -0.5 * simd::accumulate_n(Uat.data(), N, QTFull::RealType());
-}
-
-template<typename FT>
-WaveFunctionComponent::LogValueType J2OrbitalSoA<FT>::evaluateGL(const ParticleSet& P,
-                                                                 ParticleSet::ParticleGradient& G,
-                                                                 ParticleSet::ParticleLaplacian& L,
-                                                                 bool fromscratch)
-{
-  return log_value_ = computeGL(G, L);
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi)
-{
-  log_value_ = 0.0;
-  const auto& d_ee(P.getDistTableAA(my_table_ID_));
-  valT dudr, d2udr2;
-
-  Tensor<valT, DIM> ident;
-  grad_grad_psi = 0.0;
-  ident.diagonal(1.0);
-
-  for (int i = 1; i < N; ++i)
-  {
-    const auto& dist  = d_ee.getDistRow(i);
-    const auto& displ = d_ee.getDisplRow(i);
-    auto ig           = P.GroupID[i];
-    const int igt     = ig * NumGroups;
-    for (int j = 0; j < i; ++j)
-    {
-      auto r    = dist[j];
-      auto rinv = 1.0 / r;
-      auto dr   = displ[j];
-      auto jg   = P.GroupID[j];
-      auto uij  = F[igt + jg]->evaluate(r, dudr, d2udr2);
-      log_value_ -= uij;
-      auto hess = rinv * rinv * outerProduct(dr, dr) * (d2udr2 - dudr * rinv) + ident * dudr * rinv;
-      grad_grad_psi[i] -= hess;
-      grad_grad_psi[j] -= hess;
-    }
-  }
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::evaluateDerivatives(ParticleSet& P,
-                                           const opt_variables_type& active,
-                                           Vector<ValueType>& dlogpsi,
-                                           Vector<ValueType>& dhpsioverpsi)
-{
-  if (myVars.size() == 0)
-    return;
-
-  evaluateDerivativesWF(P, active, dlogpsi);
-  bool recalculate(false);
-  std::vector<bool> rcsingles(myVars.size(), false);
-  for (int k = 0; k < myVars.size(); ++k)
-  {
-    int kk = myVars.where(k);
-    if (kk < 0)
-      continue;
-    if (active.recompute(kk))
-      recalculate = true;
-    rcsingles[k] = true;
-  }
-  if (recalculate)
-  {
-    for (int k = 0; k < myVars.size(); ++k)
-    {
-      int kk = myVars.where(k);
-      if (kk < 0)
-        continue;
-      if (rcsingles[k])
-      {
-        dhpsioverpsi[kk] = -RealType(0.5) * ValueType(Sum(lapLogPsi[k])) - ValueType(Dot(P.G, gradLogPsi[k]));
-      }
-    }
-  }
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::evaluateDerivativesWF(ParticleSet& P,
-                                             const opt_variables_type& active,
-                                             Vector<ValueType>& dlogpsi)
-{
-  if (myVars.size() == 0)
-    return;
-
-  resizeWFOptVectors();
-
-  bool recalculate(false);
-  std::vector<bool> rcsingles(myVars.size(), false);
-  for (int k = 0; k < myVars.size(); ++k)
-  {
-    int kk = myVars.where(k);
-    if (kk < 0)
-      continue;
-    if (active.recompute(kk))
-      recalculate = true;
-    rcsingles[k] = true;
-  }
-  if (recalculate)
-  {
-    ///precomputed recalculation switch
-    std::vector<bool> RecalcSwitch(F.size(), false);
-    for (int i = 0; i < F.size(); ++i)
-    {
-      if (OffSet[i].first < 0)
-      {
-        // nothing to optimize
-        RecalcSwitch[i] = false;
-      }
-      else
-      {
-        bool recalcFunc(false);
-        for (int rcs = OffSet[i].first; rcs < OffSet[i].second; rcs++)
-          if (rcsingles[rcs] == true)
-            recalcFunc = true;
-        RecalcSwitch[i] = recalcFunc;
-      }
-    }
-    dLogPsi              = 0.0;
-    const size_t NumVars = myVars.size();
-    for (int p = 0; p < NumVars; ++p)
-    {
-      gradLogPsi[p] = 0.0;
-      lapLogPsi[p]  = 0.0;
-    }
-    std::vector<TinyVector<RealType, 3>> derivs(NumVars);
-    const auto& d_table = P.getDistTableAA(my_table_ID_);
-    constexpr RealType cone(1);
-    constexpr RealType lapfac(OHMMS_DIM - cone);
-    const size_t n  = d_table.sources();
-    const size_t ng = P.groups();
-    for (size_t i = 1; i < n; ++i)
-    {
-      const size_t ig   = P.GroupID[i] * ng;
-      const auto& dist  = d_table.getDistRow(i);
-      const auto& displ = d_table.getDisplRow(i);
-      for (size_t j = 0; j < i; ++j)
-      {
-        const size_t ptype = ig + P.GroupID[j];
-        if (RecalcSwitch[ptype])
-        {
-          std::fill(derivs.begin(), derivs.end(), 0.0);
-          if (!F[ptype]->evaluateDerivatives(dist[j], derivs))
-            continue;
-          RealType rinv(cone / dist[j]);
-          PosType dr(displ[j]);
-          for (int p = OffSet[ptype].first, ip = 0; p < OffSet[ptype].second; ++p, ++ip)
-          {
-            RealType dudr(rinv * derivs[ip][1]);
-            RealType lap(derivs[ip][2] + lapfac * dudr);
-            //RealType lap(derivs[ip][2]+(OHMMS_DIM-1.0)*dudr);
-            PosType gr(dudr * dr);
-            dLogPsi[p] -= derivs[ip][0];
-            gradLogPsi[p][i] += gr;
-            gradLogPsi[p][j] -= gr;
-            lapLogPsi[p][i] -= lap;
-            lapLogPsi[p][j] -= lap;
-          }
-        }
-      }
-    }
-    for (int k = 0; k < myVars.size(); ++k)
-    {
-      int kk = myVars.where(k);
-      if (kk < 0)
-        continue;
-      if (rcsingles[k])
-      {
-        dlogpsi[kk] = dLogPsi[k];
-      }
-      //optVars.setDeriv(p,dLogPsi[ip],-0.5*Sum(lapLogPsi[ip])-Dot(P.G,gradLogPsi[ip]));
-    }
-  }
-}
-
-template<typename FT>
-void J2OrbitalSoA<FT>::evaluateDerivRatios(const VirtualParticleSet& VP,
-                                           const opt_variables_type& optvars,
-                                           std::vector<ValueType>& ratios,
-                                           Matrix<ValueType>& dratios)
-{
-  evaluateRatios(VP, ratios);
-  if (myVars.size() == 0)
-    return;
-
-  bool recalculate(false);
-  std::vector<bool> rcsingles(myVars.size(), false);
-  for (int k = 0; k < myVars.size(); ++k)
-  {
-    int kk = myVars.where(k);
-    if (kk < 0)
-      continue;
-    if (optvars.recompute(kk))
-      recalculate = true;
-    rcsingles[k] = true;
-  }
-
-  if (recalculate)
-  {
-    ///precomputed recalculation switch
-    std::vector<bool> RecalcSwitch(F.size(), false);
-    for (int i = 0; i < F.size(); ++i)
-    {
-      if (OffSet[i].first < 0)
-      {
-        // nothing to optimize
-        RecalcSwitch[i] = false;
-      }
-      else
-      {
-        bool recalcFunc(false);
-        for (int rcs = OffSet[i].first; rcs < OffSet[i].second; rcs++)
-          if (rcsingles[rcs] == true)
-            recalcFunc = true;
-        RecalcSwitch[i] = recalcFunc;
-      }
-    }
-    const size_t NumVars = myVars.size();
-    std::vector<RealType> derivs_ref(NumVars);
-    std::vector<RealType> derivs(NumVars);
-    const auto& d_table = VP.getDistTableAB(my_table_ID_);
-    const size_t n      = d_table.sources();
-    const size_t nt     = VP.getTotalNum();
-    for (size_t i = 0; i < n; ++i)
-    {
-      if (i == VP.refPtcl)
-        continue;
-      const size_t ptype = VP.refPS.GroupID[i] * VP.refPS.groups() + VP.refPS.GroupID[VP.refPtcl];
-      if (!RecalcSwitch[ptype])
-        continue;
-      const auto dist_ref = i < VP.refPtcl ? VP.refPS.getDistTableAA(my_table_ID_).getDistRow(VP.refPtcl)[i]
-                                           : VP.refPS.getDistTableAA(my_table_ID_).getDistRow(i)[VP.refPtcl];
-      //first calculate the old derivatives VP.refPtcl.
-      std::fill(derivs_ref.begin(), derivs_ref.end(), 0.0);
-      F[ptype]->evaluateDerivatives(dist_ref, derivs_ref);
-      for (size_t j = 0; j < nt; ++j)
-      {
-        std::fill(derivs.begin(), derivs.end(), 0.0);
-        F[ptype]->evaluateDerivatives(d_table.getDistRow(j)[i], derivs);
-        for (int ip = 0, p = F[ptype]->myVars.Index.front(); ip < F[ptype]->myVars.Index.size(); ++ip, ++p)
-          dratios[j][p] += derivs_ref[ip] - derivs[ip];
-      }
-    }
-  }
-}
-
-template class J2OrbitalSoA<BsplineFunctor<QMCTraits::RealType>>;
-template class J2OrbitalSoA<PadeFunctor<QMCTraits::RealType>>;
-template class J2OrbitalSoA<UserFunctor<QMCTraits::RealType>>;
-template class J2OrbitalSoA<FakeFunctor<QMCTraits::RealType>>;
-} // namespace qmcplusplus
--- a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h
+++ b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h
@ -1,220 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2021 QMCPACK developers.
-//
-// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
-//                    Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
-//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-//
-// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
-//////////////////////////////////////////////////////////////////////////////////////
-// -*- C++ -*-
-#ifndef QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H
-#define QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H
-
-#include <map>
-#include <numeric>
-#include "Configuration.h"
-#if !defined(QMC_BUILD_SANDBOX_ONLY)
-#include "QMCWaveFunctions/WaveFunctionComponent.h"
-#endif
-#include "Particle/DistanceTable.h"
-#include "LongRange/StructFact.h"
-#include "CPU/SIMD/aligned_allocator.hpp"
-#include "J2KECorrection.h"
-
-#include "BsplineFunctor.h"
-#include "PadeFunctors.h"
-#include "UserFunctor.h"
-#include "FakeFunctor.h"
-
-namespace qmcplusplus
-{
-/** @ingroup WaveFunctionComponent
- *  @brief Specialization for two-body Jastrow function using multiple functors
- *
- * Each pair-type can have distinct function \f$u(r_{ij})\f$.
- * For electrons, distinct pair correlation functions are used
- * for spins up-up/down-down and up-down/down-up.
- *
- * Based on J2OrbitalSoA.h with these considerations
- * - DistanceTable using SoA containers
- * - support mixed precision: FT::real_type != OHMMS_PRECISION
- * - loops over the groups: elminated PairID
- * - support simd function
- * - double the loop counts
- * - Memory use is O(N). 
- */
-template<class FT>
-class J2OrbitalSoA : public WaveFunctionComponent
-{
-public:
-  ///alias FuncType
-  using FuncType = FT;
-  ///type of each component U, dU, d2U;
-  using valT = typename FT::real_type;
-  ///element position type
-  using posT = TinyVector<valT, OHMMS_DIM>;
-  ///use the same container
-  using DistRow         = DistanceTable::DistRow;
-  using DisplRow        = DistanceTable::DisplRow;
-  using gContainer_type = VectorSoaContainer<valT, OHMMS_DIM>;
-
-  using GradDerivVec  = ParticleAttrib<QTFull::GradType>;
-  using ValueDerivVec = ParticleAttrib<QTFull::ValueType>;
-
-protected:
-  ///number of particles
-  size_t N;
-  ///number of particles + padded
-  size_t N_padded;
-  ///number of groups of the target particleset
-  size_t NumGroups;
-  ///diff value
-  RealType DiffVal;
-  ///Correction
-  RealType KEcorr;
-  ///\f$Uat[i] = sum_(j) u_{i,j}\f$
-  Vector<valT> Uat;
-  ///\f$dUat[i] = sum_(j) du_{i,j}\f$
-  gContainer_type dUat;
-  ///\f$d2Uat[i] = sum_(j) d2u_{i,j}\f$
-  Vector<valT> d2Uat;
-  valT cur_Uat;
-  aligned_vector<valT> cur_u, cur_du, cur_d2u;
-  aligned_vector<valT> old_u, old_du, old_d2u;
-  aligned_vector<valT> DistCompressed;
-  aligned_vector<int> DistIndice;
-  ///Uniquue J2 set for cleanup
-  std::map<std::string, std::unique_ptr<FT>> J2Unique;
-  ///Container for \f$F[ig*NumGroups+jg]\f$. treat every pointer as a reference.
-  std::vector<FT*> F;
-  /// e-e table ID
-  const int my_table_ID_;
-  // helper for compute J2 Chiesa KE correction
-  J2KECorrection<RealType, FT> j2_ke_corr_helper;
-
-  /// Map indices from subcomponent variables to component variables
-  std::vector<std::pair<int, int>> OffSet;
-  Vector<RealType> dLogPsi;
-
-  std::vector<GradDerivVec> gradLogPsi;
-  std::vector<ValueDerivVec> lapLogPsi;
-
-  void resizeWFOptVectors()
-  {
-    dLogPsi.resize(myVars.size());
-    gradLogPsi.resize(myVars.size(), GradDerivVec(N));
-    lapLogPsi.resize(myVars.size(), ValueDerivVec(N));
-  }
-
-  /// compute G and L from internally stored data
-  QTFull::RealType computeGL(ParticleSet::ParticleGradient& G, ParticleSet::ParticleLaplacian& L) const;
-
-  /*@{ internal compute engines*/
-  valT computeU(const ParticleSet& P, int iat, const DistRow& dist);
-
-  void computeU3(const ParticleSet& P,
-                 int iat,
-                 const DistRow& dist,
-                 RealType* restrict u,
-                 RealType* restrict du,
-                 RealType* restrict d2u,
-                 bool triangle = false);
-
-  /** compute gradient
-   */
-  posT accumulateG(const valT* restrict du, const DisplRow& displ) const;
-  /**@} */
-
-public:
-  J2OrbitalSoA(const std::string& obj_name, ParticleSet& p);
-  J2OrbitalSoA(const J2OrbitalSoA& rhs) = delete;
-  ~J2OrbitalSoA() override;
-
-  /* initialize storage */
-  void init(ParticleSet& p);
-
-  /** add functor for (ia,ib) pair */
-  void addFunc(int ia, int ib, std::unique_ptr<FT> j);
-
-  std::string getClassName() const override { return "J2OrbitalSoA"; }
-
-  bool isOptimizable() const override { return true; }
-
-  void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override;
-  /** check out optimizable variables
-   */
-  void checkOutVariables(const opt_variables_type& active) override;
-
-  inline void finalizeOptimization() override { KEcorr = j2_ke_corr_helper.computeKEcorr(); }
-
-  std::unique_ptr<WaveFunctionComponent> makeClone(ParticleSet& tqp) const override;
-
-  LogValueType evaluateLog(const ParticleSet& P,
-                           ParticleSet::ParticleGradient& G,
-                           ParticleSet::ParticleLaplacian& L) override;
-
-  void evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi) override;
-
-  /** recompute internal data assuming distance table is fully ready */
-  void recompute(const ParticleSet& P) override;
-
-  PsiValueType ratio(ParticleSet& P, int iat) override;
-  void evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios) override;
-  void evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios) override;
-
-  GradType evalGrad(ParticleSet& P, int iat) override;
-
-  PsiValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat) override;
-
-  void acceptMove(ParticleSet& P, int iat, bool safe_to_delay = false) override;
-  inline void restore(int iat) override {}
-
-  /** compute G and L after the sweep
-   */
-  LogValueType evaluateGL(const ParticleSet& P,
-                          ParticleSet::ParticleGradient& G,
-                          ParticleSet::ParticleLaplacian& L,
-                          bool fromscratch = false) override;
-
-  void registerData(ParticleSet& P, WFBufferType& buf) override;
-
-  void copyFromBuffer(ParticleSet& P, WFBufferType& buf) override;
-
-  LogValueType updateBuffer(ParticleSet& P, WFBufferType& buf, bool fromscratch = false) override;
-
-  inline RealType ChiesaKEcorrection() { return KEcorr = j2_ke_corr_helper.computeKEcorr(); }
-
-  inline RealType KECorrection() override { return KEcorr; }
-
-  const std::vector<FT*>& getPairFunctions() const { return F; }
-
-  // Accessors for unit testing
-  std::pair<int, int> getComponentOffset(int index) { return OffSet.at(index); }
-
-  opt_variables_type& getComponentVars() { return myVars; }
-
-  void evaluateDerivatives(ParticleSet& P,
-                           const opt_variables_type& active,
-                           Vector<ValueType>& dlogpsi,
-                           Vector<ValueType>& dhpsioverpsi) override;
-
-  void evaluateDerivativesWF(ParticleSet& P,
-                             const opt_variables_type& active,
-                             Vector<ValueType>& dlogpsi) override;
-
-  void evaluateDerivRatios(const VirtualParticleSet& VP,
-                           const opt_variables_type& optvars,
-                           std::vector<ValueType>& ratios,
-                           Matrix<ValueType>& dratios) override;
-};
-
-extern template class J2OrbitalSoA<BsplineFunctor<QMCTraits::RealType>>;
-extern template class J2OrbitalSoA<PadeFunctor<QMCTraits::RealType>>;
-extern template class J2OrbitalSoA<UserFunctor<QMCTraits::RealType>>;
-extern template class J2OrbitalSoA<FakeFunctor<QMCTraits::RealType>>;
-} // namespace qmcplusplus
-#endif
--- a/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.cpp
+++ b/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.cpp
@ -14,7 +14,7 @@
 //////////////////////////////////////////////////////////////////////////////////////


-#include "OneBodyJastrowOrbitalBspline.h"
+#include "OneBodyJastrowCUDA.h"
 #include "CudaSpline.h"
 #include "Lattice/ParticleBConds.h"
 #include "QMCWaveFunctions/detail/CUDA_legacy/BsplineJastrowCuda.h"
@ -24,15 +24,15 @@
 namespace qmcplusplus
 {
 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::recompute(MCWalkerConfiguration& W, bool firstTime)
+void OneBodyJastrowCUDA<FT>::recompute(MCWalkerConfiguration& W, bool firstTime)
 {}

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::reserve(PointerPool<gpu::device_vector<CTS::RealType>>& pool)
+void OneBodyJastrowCUDA<FT>::reserve(PointerPool<gpu::device_vector<CTS::RealType>>& pool)
 {}

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::addFunc(int ig, std::unique_ptr<FT> j, int jg)
+void OneBodyJastrowCUDA<FT>::addFunc(int ig, std::unique_ptr<FT> j, int jg)
 {
  auto newSpline = std::make_unique<CudaSpline<CTS::RealType>>(*j);
  GPUSplines[ig] = newSpline.get();
@ -41,7 +41,7 @@ void OneBodyJastrowOrbitalBspline<FT>::addFunc(int ig, std::unique_ptr<FT> j, in
 }

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::addLog(MCWalkerConfiguration& W, std::vector<RealType>& logPsi)
+void OneBodyJastrowCUDA<FT>::addLog(MCWalkerConfiguration& W, std::vector<RealType>& logPsi)
 {
  auto& walkers = W.WalkerList;
  if (SumHost.size() < 4 * walkers.size())
@ -105,11 +105,11 @@ void OneBodyJastrowOrbitalBspline<FT>::addLog(MCWalkerConfiguration& W, std::vec
 }

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::update(MCWalkerConfiguration* W,
-                                              std::vector<Walker_t*>& walkers,
-                                              int iat,
-                                              std::vector<bool>* acc,
-                                              int k)
+void OneBodyJastrowCUDA<FT>::update(MCWalkerConfiguration* W,
+                                    std::vector<Walker_t*>& walkers,
+                                    int iat,
+                                    std::vector<bool>* acc,
+                                    int k)
 {
  // for (int iw=0; iw<walkers.size(); iw++)
  //   UpdateListHost[iw] = (CTS::RealType*)walkers[iw]->R_GPU.data();
@ -120,11 +120,11 @@ void OneBodyJastrowOrbitalBspline<FT>::update(MCWalkerConfiguration* W,
 // #define DEBUG_DELAYED

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::ratio(MCWalkerConfiguration& W,
-                                             int iat,
-                                             std::vector<ValueType>& psi_ratios,
-                                             std::vector<GradType>& grad,
-                                             std::vector<ValueType>& lapl)
+void OneBodyJastrowCUDA<FT>::ratio(MCWalkerConfiguration& W,
+                                   int iat,
+                                   std::vector<ValueType>& psi_ratios,
+                                   std::vector<GradType>& grad,
+                                   std::vector<ValueType>& lapl)
 {
  auto& walkers = W.WalkerList;
  int N         = W.Rnew_GPU.size();
@ -196,11 +196,11 @@ void OneBodyJastrowOrbitalBspline<FT>::ratio(MCWalkerConfiguration& W,
 }

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::calcRatio(MCWalkerConfiguration& W,
-                                                 int iat,
-                                                 std::vector<ValueType>& psi_ratios,
-                                                 std::vector<GradType>& grad,
-                                                 std::vector<ValueType>& lapl)
+void OneBodyJastrowCUDA<FT>::calcRatio(MCWalkerConfiguration& W,
+                                       int iat,
+                                       std::vector<ValueType>& psi_ratios,
+                                       std::vector<GradType>& grad,
+                                       std::vector<ValueType>& lapl)
 {
  int N         = W.Rnew_GPU.size();
  auto& walkers = W.WalkerList;
@ -244,12 +244,12 @@ void OneBodyJastrowOrbitalBspline<FT>::calcRatio(MCWalkerConfiguration& W,
 }

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::addRatio(MCWalkerConfiguration& W,
-                                                int iat,
-                                                int k,
-                                                std::vector<ValueType>& psi_ratios,
-                                                std::vector<GradType>& grad,
-                                                std::vector<ValueType>& lapl)
+void OneBodyJastrowCUDA<FT>::addRatio(MCWalkerConfiguration& W,
+                                      int iat,
+                                      int k,
+                                      std::vector<ValueType>& psi_ratios,
+                                      std::vector<GradType>& grad,
+                                      std::vector<ValueType>& lapl)
 {
  int N         = W.Rnew_GPU.size();
  auto& walkers = W.WalkerList;
@ -290,10 +290,10 @@ void OneBodyJastrowOrbitalBspline<FT>::addRatio(MCWalkerConfiguration& W,
 }

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::NLratios(MCWalkerConfiguration& W,
-                                                std::vector<NLjob>& jobList,
-                                                std::vector<PosType>& quadPoints,
-                                                std::vector<ValueType>& psi_ratios)
+void OneBodyJastrowCUDA<FT>::NLratios(MCWalkerConfiguration& W,
+                                      std::vector<NLjob>& jobList,
+                                      std::vector<PosType>& quadPoints,
+                                      std::vector<ValueType>& psi_ratios)
 {
  auto& walkers         = W.WalkerList;
  float sim_cell_radius = W.getLattice().SimulationCellRadius;
@ -357,10 +357,7 @@ void OneBodyJastrowOrbitalBspline<FT>::NLratios(MCWalkerConfiguration& W,
 }

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::calcGradient(MCWalkerConfiguration& W,
-                                                    int iat,
-                                                    int k,
-                                                    std::vector<GradType>& grad)
+void OneBodyJastrowCUDA<FT>::calcGradient(MCWalkerConfiguration& W, int iat, int k, std::vector<GradType>& grad)
 {
  CTS::RealType sim_cell_radius = W.getLattice().SimulationCellRadius;
  auto& walkers                 = W.WalkerList;
@ -393,7 +390,7 @@ void OneBodyJastrowOrbitalBspline<FT>::calcGradient(MCWalkerConfiguration& W,
 }

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::addGradient(MCWalkerConfiguration& W, int iat, std::vector<GradType>& grad)
+void OneBodyJastrowCUDA<FT>::addGradient(MCWalkerConfiguration& W, int iat, std::vector<GradType>& grad)
 {
  auto& walkers = W.WalkerList;
  cudaCheck(cudaEventSynchronize(gpu::gradientSyncOneBodyEvent));
@ -418,7 +415,7 @@ void OneBodyJastrowOrbitalBspline<FT>::addGradient(MCWalkerConfiguration& W, int
 }

 template<class FT>
-void OneBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMatrix& grad, ValueMatrix& lapl)
+void OneBodyJastrowCUDA<FT>::gradLapl(MCWalkerConfiguration& W, GradMatrix& grad, ValueMatrix& lapl)
 {
  auto& walkers = W.WalkerList;
  int numGL     = 4 * N * walkers.size();
@ -490,7 +487,7 @@ void OneBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMa
        grad(iw, ptcl)[i] += this->GradLaplHost[4 * this->N * iw + 4 * ptcl + i];
      if (std::isnan(this->GradLaplHost[4 * this->N * iw + +4 * ptcl + 3]))
      {
-        fprintf(stderr, "NAN in OneBodyJastrowOrbitalBspline<FT> laplacian.\n");
+        fprintf(stderr, "NAN in OneBodyJastrowCUDA<FT> laplacian.\n");
        abort();
      }
      lapl(iw, ptcl) += this->GradLaplHost[4 * this->N * iw + +4 * ptcl + 3];
@ -499,6 +496,6 @@ void OneBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMa
 }

 // explicit instantiations of templates
-template class OneBodyJastrowOrbitalBspline<BsplineFunctor<WaveFunctionComponent::RealType>>;
+template class OneBodyJastrowCUDA<BsplineFunctor<WaveFunctionComponent::RealType>>;

 } // namespace qmcplusplus
--- a/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h
+++ b/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h
@ -27,7 +27,7 @@
 namespace qmcplusplus
 {
 template<class FT>
-class OneBodyJastrowOrbitalBspline : public J1OrbitalSoA<FT>
+class OneBodyJastrowCUDA : public J1OrbitalSoA<FT>
 {
 private:
  bool UsePBC;
@ -140,7 +140,7 @@ public:
                std::vector<PosType>& quadPoints,
                std::vector<ValueType>& psi_ratios) override;

-  OneBodyJastrowOrbitalBspline(const std::string& obj_name, ParticleSet& centers, ParticleSet& elecs, bool use_offload)
+  OneBodyJastrowCUDA(const std::string& obj_name, ParticleSet& centers, ParticleSet& elecs, bool use_offload)
      : J1OrbitalSoA<FT>(obj_name, centers, elecs, use_offload),
        ElecRef(elecs),
        L(obj_name + "L"),
--- a/src/QMCWaveFunctions/Jastrow/RPAJastrow.cpp
+++ b/src/QMCWaveFunctions/Jastrow/RPAJastrow.cpp
@ -18,7 +18,7 @@

 #include "RPAJastrow.h"
 #include "QMCWaveFunctions/WaveFunctionComponentBuilder.h"
-#include "QMCWaveFunctions/Jastrow/J2OrbitalSoA.h"
+#include "QMCWaveFunctions/Jastrow/TwoBodyJastrow.h"
 #include "QMCWaveFunctions/Jastrow/LRBreakupUtilities.h"
 #include "QMCWaveFunctions/Jastrow/SplineFunctors.h"
 #include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
@ -174,7 +174,7 @@ void RPAJastrow::makeShortRange()
  nfunc           = nfunc_uptr.get();
  ShortRangePartAdapter<RealType> SRA(myHandler.get());
  SRA.setRmax(Rcut);
-  auto j2        = std::make_unique<J2OrbitalSoA<BsplineFunctor<RealType>>>("RPA", targetPtcl);
+  auto j2        = std::make_unique<TwoBodyJastrow<BsplineFunctor<RealType>>>("RPA", targetPtcl, false);
  size_t nparam  = 12;  // number of Bspline parameters
  size_t npts    = 100; // number of 1D grid points for basis functions
  RealType cusp  = SRA.df(0);
@ -200,7 +200,8 @@ void RPAJastrow::makeShortRange()
  Psi.push_back(std::move(j2));
 }

-void RPAJastrow::checkOutVariables(const opt_variables_type& active) {
+void RPAJastrow::checkOutVariables(const opt_variables_type& active)
+{
  LongRangeRPA->checkOutVariables(active);
  ShortRangeRPA->checkOutVariables(active);
 }
--- a/src/QMCWaveFunctions/Jastrow/RadialJastrowBuilder.cpp
+++ b/src/QMCWaveFunctions/Jastrow/RadialJastrowBuilder.cpp
@ -15,12 +15,11 @@
 #include <PlatformSelector.hpp>
 #include "QMCWaveFunctions/Jastrow/J1OrbitalSoA.h"
 #include "QMCWaveFunctions/Jastrow/J1Spin.h"
-#include "QMCWaveFunctions/Jastrow/J2OrbitalSoA.h"
-#include "QMCWaveFunctions/Jastrow/J2OMPTarget.h"
+#include "QMCWaveFunctions/Jastrow/TwoBodyJastrow.h"

 #if defined(QMC_CUDA)
-#include "QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h"
-#include "QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.h"
+#include "QMCWaveFunctions/Jastrow/OneBodyJastrowCUDA.h"
+#include "QMCWaveFunctions/Jastrow/TwoBodyJastrowCUDA.h"
 #endif

 #include "QMCWaveFunctions/Jastrow/RPAJastrow.h"
@ -50,7 +49,7 @@ class JastrowTypeHelper
 public:
  using J1Type     = J1OrbitalSoA<RadFuncType>;
  using J1SpinType = J1Spin<RadFuncType>;
-  using J2Type     = J2OrbitalSoA<RadFuncType>;
+  using J2Type     = TwoBodyJastrow<RadFuncType>;
 };

 #if defined(QMC_CUDA)
@ -59,9 +58,9 @@ class JastrowTypeHelper<BsplineFunctor<RadialJastrowBuilder::RealType>, RadialJa
 {
 public:
  using RadFuncType = BsplineFunctor<RadialJastrowBuilder::RealType>;
-  using J1Type      = OneBodyJastrowOrbitalBspline<RadFuncType>;
+  using J1Type      = OneBodyJastrowCUDA<RadFuncType>;
  using J1SpinType  = void;
-  using J2Type      = TwoBodyJastrowOrbitalBspline<RadFuncType>;
+  using J2Type      = TwoBodyJastrowCUDA<RadFuncType>;
 };
 #endif

@ -70,7 +69,7 @@ class JastrowTypeHelper<BsplineFunctor<RadialJastrowBuilder::RealType>, RadialJa
 {
 public:
  using RadFuncType = BsplineFunctor<RadialJastrowBuilder::RealType>;
-  using J2Type      = J2OMPTarget<RadFuncType>;
+  using J2Type      = TwoBodyJastrow<RadFuncType>;
 };

 RadialJastrowBuilder::RadialJastrowBuilder(Communicate* comm, ParticleSet& target, ParticleSet& source)
@ -158,7 +157,7 @@ std::unique_ptr<WaveFunctionComponent> RadialJastrowBuilder::createJ2(xmlNodePtr
  std::string input_name(getXMLAttributeValue(cur, "name"));
  std::string j2name = input_name.empty() ? "J2_" + Jastfunction : input_name;
  SpeciesSet& species(targetPtcl.getSpeciesSet());
-  auto J2 = std::make_unique<J2Type>(j2name, targetPtcl);
+  auto J2 = std::make_unique<J2Type>(j2name, targetPtcl, Implementation == RadialJastrowBuilder::detail::OMPTARGET);

  std::string init_mode("0");
  {
@ -608,7 +607,7 @@ std::unique_ptr<WaveFunctionComponent> RadialJastrowBuilder::buildComponent(xmlN
      if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET)
      {
        static_assert(std::is_same<JastrowTypeHelper<BsplineFunctor<RealType>, OMPTARGET>::J2Type,
-                                   J2OMPTarget<BsplineFunctor<RealType>>>::value,
+                                   TwoBodyJastrow<BsplineFunctor<RealType>>>::value,
                      "check consistent type");
        if (targetPtcl.getCoordinates().getKind() != DynamicCoordinateKind::DC_POS_OFFLOAD)
        {
--- a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrow.cpp
+++ b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrow.cpp
@ -13,7 +13,7 @@
 // -*- C++ -*-


-#include "J2OMPTarget.h"
+#include "TwoBodyJastrow.h"
 #include "CPU/SIMD/algorithm.hpp"
 #include "SoaDistanceTableABOMPTarget.h"
 #include "ResourceCollection.h"
@ -23,7 +23,7 @@ namespace qmcplusplus
 {

 template<typename T>
-struct J2OMPTargetMultiWalkerMem : public Resource
+struct TwoBodyJastrowMultiWalkerMem : public Resource
 {
  // fused buffer for fast transfer in mw_accept
  Vector<char, OffloadPinnedAllocator<char>> mw_update_buffer;
@ -40,25 +40,25 @@ struct J2OMPTargetMultiWalkerMem : public Resource
  /// memory pool for cur_u, cur_du, cur_d2u [3][Nw][N_padded]. 3 is for value, first and second derivatives.
  Vector<T, OffloadPinnedAllocator<T>> mw_cur_allu;

-  J2OMPTargetMultiWalkerMem() : Resource("J2OMPTargetMultiWalkerMem") {}
+  TwoBodyJastrowMultiWalkerMem() : Resource("TwoBodyJastrowMultiWalkerMem") {}

-  J2OMPTargetMultiWalkerMem(const J2OMPTargetMultiWalkerMem&) : J2OMPTargetMultiWalkerMem() {}
+  TwoBodyJastrowMultiWalkerMem(const TwoBodyJastrowMultiWalkerMem&) : TwoBodyJastrowMultiWalkerMem() {}

-  Resource* makeClone() const override { return new J2OMPTargetMultiWalkerMem(*this); }
+  Resource* makeClone() const override { return new TwoBodyJastrowMultiWalkerMem(*this); }
 };

 template<typename FT>
-void J2OMPTarget<FT>::createResource(ResourceCollection& collection) const
+void TwoBodyJastrow<FT>::createResource(ResourceCollection& collection) const
 {
-  collection.addResource(std::make_unique<J2OMPTargetMultiWalkerMem<RealType>>());
+  collection.addResource(std::make_unique<TwoBodyJastrowMultiWalkerMem<RealType>>());
 }

 template<typename FT>
-void J2OMPTarget<FT>::acquireResource(ResourceCollection& collection,
-                                      const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
+void TwoBodyJastrow<FT>::acquireResource(ResourceCollection& collection,
+                                         const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
 {
-  auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
-  auto res_ptr     = dynamic_cast<J2OMPTargetMultiWalkerMem<RealType>*>(collection.lendResource().release());
+  auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
+  auto res_ptr     = dynamic_cast<TwoBodyJastrowMultiWalkerMem<RealType>*>(collection.lendResource().release());
  if (!res_ptr)
    throw std::runtime_error("VirtualParticleSet::acquireResource dynamic_cast failed");
  wfc_leader.mw_mem_.reset(res_ptr);
@ -68,7 +68,7 @@ void J2OMPTarget<FT>::acquireResource(ResourceCollection& collection,
  for (size_t iw = 0; iw < nw; iw++)
  {
    // copy per walker Uat, dUat, d2Uat to shared buffer and attach buffer
-    auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);

    Vector<valT, aligned_allocator<valT>> Uat_view(mw_allUat.data() + iw * N_padded, N);
    Uat_view = wfc.Uat;
@ -91,16 +91,16 @@ void J2OMPTarget<FT>::acquireResource(ResourceCollection& collection,
 }

 template<typename FT>
-void J2OMPTarget<FT>::releaseResource(ResourceCollection& collection,
-                                      const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
+void TwoBodyJastrow<FT>::releaseResource(ResourceCollection& collection,
+                                         const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
 {
-  auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
+  auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
  const size_t nw  = wfc_list.size();
  auto& mw_allUat  = wfc_leader.mw_mem_->mw_allUat;
  for (size_t iw = 0; iw < nw; iw++)
  {
    // detach buffer and copy per walker Uat, dUat, d2Uat from shared buffer
-    auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);

    Vector<valT, aligned_allocator<valT>> Uat_view(mw_allUat.data() + iw * N_padded, N);
    wfc.Uat.free();
@ -123,14 +123,14 @@ void J2OMPTarget<FT>::releaseResource(ResourceCollection& collection,
 }

 template<typename FT>
-void J2OMPTarget<FT>::extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs)
+void TwoBodyJastrow<FT>::extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs)
 {
  for (auto& [key, functor] : J2Unique)
    opt_obj_refs.push_back(*functor);
 }

 template<typename FT>
-void J2OMPTarget<FT>::checkOutVariables(const opt_variables_type& active)
+void TwoBodyJastrow<FT>::checkOutVariables(const opt_variables_type& active)
 {
  myVars.clear();
  for (auto& [key, functor] : J2Unique)
@ -169,7 +169,7 @@ void J2OMPTarget<FT>::checkOutVariables(const opt_variables_type& active)
 }

 template<typename FT>
-void J2OMPTarget<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
+void TwoBodyJastrow<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
 {
  for (int k = 0; k < ratios.size(); ++k)
    ratios[k] =
@ -177,14 +177,20 @@ void J2OMPTarget<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<V
 }

 template<typename FT>
-void J2OMPTarget<FT>::mw_evaluateRatios(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
-                                        const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
-                                        std::vector<std::vector<ValueType>>& ratios) const
+void TwoBodyJastrow<FT>::mw_evaluateRatios(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
+                                           const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                           std::vector<std::vector<ValueType>>& ratios) const
 {
+  if (!use_offload_)
+  {
+    WaveFunctionComponent::mw_evaluateRatios(wfc_list, vp_list, ratios);
+    return;
+  }
+
  // add early return to prevent from accessing vp_list[0]
  if (wfc_list.size() == 0)
    return;
-  auto& wfc_leader        = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
+  auto& wfc_leader        = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
  auto& vp_leader         = vp_list.getLeader();
  const auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls();
  auto& mw_vals           = wfc_leader.mw_mem_->mw_vals;
@ -206,7 +212,7 @@ void J2OMPTarget<FT>::mw_evaluateRatios(const RefVectorWithLeader<WaveFunctionCo
  for (int iw = 0; iw < nw; ++iw)
  {
    const VirtualParticleSet& vp = vp_list[iw];
-    const auto& wfc              = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    const auto& wfc              = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
    for (int k = 0; k < vp.getTotalNum(); ++k, ivp++)
      ratios[iw][k] = std::exp(wfc.Uat[mw_refPctls[ivp]] - mw_vals[ivp]);
  }
@ -214,7 +220,7 @@ void J2OMPTarget<FT>::mw_evaluateRatios(const RefVectorWithLeader<WaveFunctionCo
 }

 template<typename FT>
-void J2OMPTarget<FT>::registerData(ParticleSet& P, WFBufferType& buf)
+void TwoBodyJastrow<FT>::registerData(ParticleSet& P, WFBufferType& buf)
 {
  if (Bytes_in_WFBuffer == 0)
  {
@ -235,7 +241,7 @@ void J2OMPTarget<FT>::registerData(ParticleSet& P, WFBufferType& buf)
 }

 template<typename FT>
-void J2OMPTarget<FT>::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
+void TwoBodyJastrow<FT>::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
 {
  Uat.attachReference(buf.lendReference<valT>(N), N);
  dUat.attachReference(N, N_padded, buf.lendReference<valT>(N_padded * DIM));
@ -243,9 +249,9 @@ void J2OMPTarget<FT>::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
 }

 template<typename FT>
-typename J2OMPTarget<FT>::LogValueType J2OMPTarget<FT>::updateBuffer(ParticleSet& P,
-                                                                     WFBufferType& buf,
-                                                                     bool fromscratch)
+typename TwoBodyJastrow<FT>::LogValueType TwoBodyJastrow<FT>::updateBuffer(ParticleSet& P,
+                                                                           WFBufferType& buf,
+                                                                           bool fromscratch)
 {
  log_value_ = computeGL(P.G, P.L);
  buf.forward(Bytes_in_WFBuffer);
@ -253,7 +259,7 @@ typename J2OMPTarget<FT>::LogValueType J2OMPTarget<FT>::updateBuffer(ParticleSet
 }

 template<typename FT>
-typename J2OMPTarget<FT>::valT J2OMPTarget<FT>::computeU(const ParticleSet& P, int iat, const DistRow& dist)
+typename TwoBodyJastrow<FT>::valT TwoBodyJastrow<FT>::computeU(const ParticleSet& P, int iat, const DistRow& dist)
 {
  valT curUat(0);
  const int igt = P.GroupID[iat] * NumGroups;
@ -268,7 +274,7 @@ typename J2OMPTarget<FT>::valT J2OMPTarget<FT>::computeU(const ParticleSet& P, i
 }

 template<typename FT>
-typename J2OMPTarget<FT>::posT J2OMPTarget<FT>::accumulateG(const valT* restrict du, const DisplRow& displ) const
+typename TwoBodyJastrow<FT>::posT TwoBodyJastrow<FT>::accumulateG(const valT* restrict du, const DisplRow& displ) const
 {
  posT grad;
  for (int idim = 0; idim < DIM; ++idim)
@ -285,16 +291,17 @@ typename J2OMPTarget<FT>::posT J2OMPTarget<FT>::accumulateG(const valT* restrict
 }

 template<typename FT>
-J2OMPTarget<FT>::J2OMPTarget(const std::string& obj_name, ParticleSet& p)
+TwoBodyJastrow<FT>::TwoBodyJastrow(const std::string& obj_name, ParticleSet& p, bool use_offload)
    : WaveFunctionComponent(obj_name),
      N(p.getTotalNum()),
-      N_padded(getAlignedSize<valT>(N)),
      NumGroups(p.groups()),
+      use_offload_(use_offload),
+      N_padded(getAlignedSize<valT>(N)),
      my_table_ID_(p.addTable(p)),
      j2_ke_corr_helper(p, F)
 {
  if (my_name_.empty())
-    throw std::runtime_error("J2OMPTarget object name cannot be empty!");
+    throw std::runtime_error("TwoBodyJastrow object name cannot be empty!");

  F.resize(NumGroups * NumGroups, nullptr);

@ -313,10 +320,10 @@ J2OMPTarget<FT>::J2OMPTarget(const std::string& obj_name, ParticleSet& p)
 }

 template<typename FT>
-J2OMPTarget<FT>::~J2OMPTarget() = default;
+TwoBodyJastrow<FT>::~TwoBodyJastrow() = default;

 template<typename FT>
-void J2OMPTarget<FT>::resizeInternalStorage()
+void TwoBodyJastrow<FT>::resizeInternalStorage()
 {
  Uat.resize(N);
  dUat.resize(N);
@ -333,7 +340,7 @@ void J2OMPTarget<FT>::resizeInternalStorage()
 }

 template<typename FT>
-void J2OMPTarget<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
+void TwoBodyJastrow<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
 {
  assert(ia < NumGroups);
  assert(ib < NumGroups);
@ -367,9 +374,9 @@ void J2OMPTarget<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
 }

 template<typename FT>
-std::unique_ptr<WaveFunctionComponent> J2OMPTarget<FT>::makeClone(ParticleSet& tqp) const
+std::unique_ptr<WaveFunctionComponent> TwoBodyJastrow<FT>::makeClone(ParticleSet& tqp) const
 {
-  auto j2copy = std::make_unique<J2OMPTarget<FT>>(my_name_, tqp);
+  auto j2copy = std::make_unique<TwoBodyJastrow<FT>>(my_name_, tqp, use_offload_);
  std::map<const FT*, FT*> fcmap;
  for (int ig = 0; ig < NumGroups; ++ig)
    for (int jg = ig; jg < NumGroups; ++jg)
@ -403,13 +410,13 @@ std::unique_ptr<WaveFunctionComponent> J2OMPTarget<FT>::makeClone(ParticleSet& t
 * @param d2u starting second deriv
 */
 template<typename FT>
-void J2OMPTarget<FT>::computeU3(const ParticleSet& P,
-                                int iat,
-                                const DistRow& dist,
-                                RealType* restrict u,
-                                RealType* restrict du,
-                                RealType* restrict d2u,
-                                bool triangle)
+void TwoBodyJastrow<FT>::computeU3(const ParticleSet& P,
+                                   int iat,
+                                   const DistRow& dist,
+                                   RealType* restrict u,
+                                   RealType* restrict du,
+                                   RealType* restrict d2u,
+                                   bool triangle)
 {
  const int jelmax = triangle ? iat : N;
  constexpr valT czero(0);
@ -431,7 +438,7 @@ void J2OMPTarget<FT>::computeU3(const ParticleSet& P,
 }

 template<typename FT>
-typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratio(ParticleSet& P, int iat)
+typename TwoBodyJastrow<FT>::PsiValueType TwoBodyJastrow<FT>::ratio(ParticleSet& P, int iat)
 {
  //only ratio, ready to compute it again
  UpdateMode = ORB_PBYP_RATIO;
@ -440,14 +447,20 @@ typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratio(ParticleSet& P, in
 }

 template<typename FT>
-void J2OMPTarget<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
-                                   const RefVectorWithLeader<ParticleSet>& p_list,
-                                   int iat,
-                                   std::vector<PsiValueType>& ratios) const
+void TwoBodyJastrow<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
+                                      const RefVectorWithLeader<ParticleSet>& p_list,
+                                      int iat,
+                                      std::vector<PsiValueType>& ratios) const
 {
+  if (!use_offload_)
+  {
+    WaveFunctionComponent::mw_calcRatio(wfc_list, p_list, iat, ratios);
+    return;
+  }
+
  //right now. Directly use FT::mw_evaluateVGL implementation.
  assert(this == &wfc_list.getLeader());
-  auto& wfc_leader      = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
+  auto& wfc_leader      = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
  auto& p_leader        = p_list.getLeader();
  const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_);
  const int nw          = wfc_list.size();
@ -464,7 +477,7 @@ void J2OMPTarget<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionCompone

  for (int iw = 0; iw < nw; iw++)
  {
-    auto& wfc   = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    auto& wfc   = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
    wfc.cur_Uat = mw_vgl[iw][0];
    ratios[iw]  = std::exp(static_cast<PsiValueType>(wfc.Uat[iat] - wfc.cur_Uat));
  }
@ -472,7 +485,7 @@ void J2OMPTarget<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionCompone


 template<typename FT>
-void J2OMPTarget<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
+void TwoBodyJastrow<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
 {
  const auto& d_table = P.getDistTableAA(my_table_ID_);
  const auto& dist    = d_table.getTempDists();
@ -499,13 +512,13 @@ void J2OMPTarget<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueTy
 }

 template<typename FT>
-typename J2OMPTarget<FT>::GradType J2OMPTarget<FT>::evalGrad(ParticleSet& P, int iat)
+typename TwoBodyJastrow<FT>::GradType TwoBodyJastrow<FT>::evalGrad(ParticleSet& P, int iat)
 {
  return GradType(dUat[iat]);
 }

 template<typename FT>
-typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
+typename TwoBodyJastrow<FT>::PsiValueType TwoBodyJastrow<FT>::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
 {
  UpdateMode = ORB_PBYP_PARTIAL;

@ -517,14 +530,20 @@ typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratioGrad(ParticleSet& P
 }

 template<typename FT>
-void J2OMPTarget<FT>::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
-                                   const RefVectorWithLeader<ParticleSet>& p_list,
-                                   int iat,
-                                   std::vector<PsiValueType>& ratios,
-                                   std::vector<GradType>& grad_new) const
+void TwoBodyJastrow<FT>::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
+                                      const RefVectorWithLeader<ParticleSet>& p_list,
+                                      int iat,
+                                      std::vector<PsiValueType>& ratios,
+                                      std::vector<GradType>& grad_new) const
 {
+  if (!use_offload_)
+  {
+    WaveFunctionComponent::mw_ratioGrad(wfc_list, p_list, iat, ratios, grad_new);
+    return;
+  }
+
  assert(this == &wfc_list.getLeader());
-  auto& wfc_leader      = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
+  auto& wfc_leader      = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
  auto& p_leader        = p_list.getLeader();
  const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_);
  const int nw          = wfc_list.size();
@ -541,7 +560,7 @@ void J2OMPTarget<FT>::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionCompone

  for (int iw = 0; iw < nw; iw++)
  {
-    auto& wfc   = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    auto& wfc   = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
    wfc.cur_Uat = mw_vgl[iw][0];
    ratios[iw]  = std::exp(static_cast<PsiValueType>(wfc.Uat[iat] - wfc.cur_Uat));
    for (int idim = 0; idim < DIM; idim++)
@ -550,7 +569,7 @@ void J2OMPTarget<FT>::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionCompone
 }

 template<typename FT>
-void J2OMPTarget<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
+void TwoBodyJastrow<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
 {
  // get the old u, du, d2u
  const auto& d_table = P.getDistTableAA(my_table_ID_);
@ -601,14 +620,20 @@ void J2OMPTarget<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
 }

 template<typename FT>
-void J2OMPTarget<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
-                                           const RefVectorWithLeader<ParticleSet>& p_list,
-                                           int iat,
-                                           const std::vector<bool>& isAccepted,
-                                           bool safe_to_delay) const
+void TwoBodyJastrow<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
+                                              const RefVectorWithLeader<ParticleSet>& p_list,
+                                              int iat,
+                                              const std::vector<bool>& isAccepted,
+                                              bool safe_to_delay) const
 {
+  if (!use_offload_)
+  {
+    WaveFunctionComponent::mw_accept_rejectMove(wfc_list, p_list, iat, isAccepted, safe_to_delay);
+    return;
+  }
+
  assert(this == &wfc_list.getLeader());
-  auto& wfc_leader      = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
+  auto& wfc_leader      = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
  auto& p_leader        = p_list.getLeader();
  const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_);
  const int nw          = wfc_list.size();
@ -620,7 +645,7 @@ void J2OMPTarget<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctio

  for (int iw = 0; iw < nw; iw++)
  {
-    auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    auto& wfc = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
    wfc.log_value_ += wfc.Uat[iat] - mw_vgl[iw][0];
  }

@ -631,7 +656,7 @@ void J2OMPTarget<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctio
 }

 template<typename FT>
-void J2OMPTarget<FT>::recompute(const ParticleSet& P)
+void TwoBodyJastrow<FT>::recompute(const ParticleSet& P)
 {
  const auto& d_table = P.getDistTableAA(my_table_ID_);
  for (int ig = 0; ig < NumGroups; ++ig)
@ -681,11 +706,17 @@ void J2OMPTarget<FT>::recompute(const ParticleSet& P)
 }

 template<typename FT>
-void J2OMPTarget<FT>::mw_recompute(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
-                                   const RefVectorWithLeader<ParticleSet>& p_list,
-                                   const std::vector<bool>& recompute) const
+void TwoBodyJastrow<FT>::mw_recompute(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
+                                      const RefVectorWithLeader<ParticleSet>& p_list,
+                                      const std::vector<bool>& recompute) const
 {
-  auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
+  if (!use_offload_)
+  {
+    WaveFunctionComponent::mw_recompute(wfc_list, p_list, recompute);
+    return;
+  }
+
+  auto& wfc_leader = wfc_list.getCastedLeader<TwoBodyJastrow<FT>>();
  assert(this == &wfc_leader);
  for (int iw = 0; iw < wfc_list.size(); iw++)
    if (recompute[iw])
@ -694,36 +725,42 @@ void J2OMPTarget<FT>::mw_recompute(const RefVectorWithLeader<WaveFunctionCompone
 }

 template<typename FT>
-typename J2OMPTarget<FT>::LogValueType J2OMPTarget<FT>::evaluateLog(const ParticleSet& P,
-                                                                    ParticleSet::ParticleGradient& G,
-                                                                    ParticleSet::ParticleLaplacian& L)
+typename TwoBodyJastrow<FT>::LogValueType TwoBodyJastrow<FT>::evaluateLog(const ParticleSet& P,
+                                                                          ParticleSet::ParticleGradient& G,
+                                                                          ParticleSet::ParticleLaplacian& L)
 {
  recompute(P);
  return log_value_ = computeGL(G, L);
 }

 template<typename FT>
-void J2OMPTarget<FT>::mw_evaluateLog(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
-                                     const RefVectorWithLeader<ParticleSet>& p_list,
-                                     const RefVector<ParticleSet::ParticleGradient>& G_list,
-                                     const RefVector<ParticleSet::ParticleLaplacian>& L_list) const
+void TwoBodyJastrow<FT>::mw_evaluateLog(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
+                                        const RefVectorWithLeader<ParticleSet>& p_list,
+                                        const RefVector<ParticleSet::ParticleGradient>& G_list,
+                                        const RefVector<ParticleSet::ParticleLaplacian>& L_list) const

 {
+  if (!use_offload_)
+  {
+    WaveFunctionComponent::mw_evaluateLog(wfc_list, p_list, G_list, L_list);
+    return;
+  }
+
  assert(this == &wfc_list.getLeader());
  const std::vector<bool> recompute_all(wfc_list.size(), true);
  mw_recompute(wfc_list, p_list, recompute_all);

  for (int iw = 0; iw < wfc_list.size(); iw++)
  {
-    auto& wfc      = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    auto& wfc      = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
    wfc.log_value_ = wfc.computeGL(G_list[iw], L_list[iw]);
  }
 }


 template<typename FT>
-typename J2OMPTarget<FT>::QTFull::RealType J2OMPTarget<FT>::computeGL(ParticleSet::ParticleGradient& G,
-                                                                      ParticleSet::ParticleLaplacian& L) const
+typename TwoBodyJastrow<FT>::QTFull::RealType TwoBodyJastrow<FT>::computeGL(ParticleSet::ParticleGradient& G,
+                                                                            ParticleSet::ParticleLaplacian& L) const
 {
  for (int iat = 0; iat < N; ++iat)
  {
@ -734,31 +771,37 @@ typename J2OMPTarget<FT>::QTFull::RealType J2OMPTarget<FT>::computeGL(ParticleSe
 }

 template<typename FT>
-WaveFunctionComponent::LogValueType J2OMPTarget<FT>::evaluateGL(const ParticleSet& P,
-                                                                ParticleSet::ParticleGradient& G,
-                                                                ParticleSet::ParticleLaplacian& L,
-                                                                bool fromscratch)
+WaveFunctionComponent::LogValueType TwoBodyJastrow<FT>::evaluateGL(const ParticleSet& P,
+                                                                   ParticleSet::ParticleGradient& G,
+                                                                   ParticleSet::ParticleLaplacian& L,
+                                                                   bool fromscratch)
 {
  return log_value_ = computeGL(G, L);
 }

 template<typename FT>
-void J2OMPTarget<FT>::mw_evaluateGL(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
-                                    const RefVectorWithLeader<ParticleSet>& p_list,
-                                    const RefVector<ParticleSet::ParticleGradient>& G_list,
-                                    const RefVector<ParticleSet::ParticleLaplacian>& L_list,
-                                    bool fromscratch) const
+void TwoBodyJastrow<FT>::mw_evaluateGL(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
+                                       const RefVectorWithLeader<ParticleSet>& p_list,
+                                       const RefVector<ParticleSet::ParticleGradient>& G_list,
+                                       const RefVector<ParticleSet::ParticleLaplacian>& L_list,
+                                       bool fromscratch) const
 {
+  if (!use_offload_)
+  {
+    WaveFunctionComponent::mw_evaluateGL(wfc_list, p_list, G_list, L_list, fromscratch);
+    return;
+  }
+
  assert(this == &wfc_list.getLeader());
  for (int iw = 0; iw < wfc_list.size(); iw++)
  {
-    auto& wfc      = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    auto& wfc      = wfc_list.getCastedElement<TwoBodyJastrow<FT>>(iw);
    wfc.log_value_ = wfc.computeGL(G_list[iw], L_list[iw]);
  }
 }

 template<typename FT>
-void J2OMPTarget<FT>::evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi)
+void TwoBodyJastrow<FT>::evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi)
 {
  log_value_ = 0.0;
  const auto& d_ee(P.getDistTableAA(my_table_ID_));
@ -790,10 +833,10 @@ void J2OMPTarget<FT>::evaluateHessian(ParticleSet& P, HessVector& grad_grad_psi)
 }

 template<typename FT>
-void J2OMPTarget<FT>::evaluateDerivatives(ParticleSet& P,
-                                          const opt_variables_type& active,
-                                          Vector<ValueType>& dlogpsi,
-                                          Vector<ValueType>& dhpsioverpsi)
+void TwoBodyJastrow<FT>::evaluateDerivatives(ParticleSet& P,
+                                             const opt_variables_type& active,
+                                             Vector<ValueType>& dlogpsi,
+                                             Vector<ValueType>& dhpsioverpsi)
 {
  if (myVars.size() == 0)
    return;
@ -826,9 +869,9 @@ void J2OMPTarget<FT>::evaluateDerivatives(ParticleSet& P,
 }

 template<typename FT>
-void J2OMPTarget<FT>::evaluateDerivativesWF(ParticleSet& P,
-                                            const opt_variables_type& active,
-                                            Vector<ValueType>& dlogpsi)
+void TwoBodyJastrow<FT>::evaluateDerivativesWF(ParticleSet& P,
+                                               const opt_variables_type& active,
+                                               Vector<ValueType>& dlogpsi)
 {
  if (myVars.size() == 0)
    return;
@ -924,10 +967,10 @@ void J2OMPTarget<FT>::evaluateDerivativesWF(ParticleSet& P,
 }

 template<typename FT>
-void J2OMPTarget<FT>::evaluateDerivRatios(const VirtualParticleSet& VP,
-                                          const opt_variables_type& optvars,
-                                          std::vector<ValueType>& ratios,
-                                          Matrix<ValueType>& dratios)
+void TwoBodyJastrow<FT>::evaluateDerivRatios(const VirtualParticleSet& VP,
+                                             const opt_variables_type& optvars,
+                                             std::vector<ValueType>& ratios,
+                                             Matrix<ValueType>& dratios)
 {
  evaluateRatios(VP, ratios);
  if (myVars.size() == 0)
@ -994,7 +1037,8 @@ void J2OMPTarget<FT>::evaluateDerivRatios(const VirtualParticleSet& VP,
  }
 }

-template class J2OMPTarget<BsplineFunctor<QMCTraits::RealType>>;
-template class J2OMPTarget<PadeFunctor<QMCTraits::RealType>>;
-template class J2OMPTarget<UserFunctor<QMCTraits::RealType>>;
+template class TwoBodyJastrow<BsplineFunctor<QMCTraits::RealType>>;
+template class TwoBodyJastrow<PadeFunctor<QMCTraits::RealType>>;
+template class TwoBodyJastrow<UserFunctor<QMCTraits::RealType>>;
+template class TwoBodyJastrow<FakeFunctor<QMCTraits::RealType>>;
 } // namespace qmcplusplus
--- a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrow.h
+++ b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrow.h
@ -28,12 +28,13 @@
 #include "BsplineFunctor.h"
 #include "PadeFunctors.h"
 #include "UserFunctor.h"
+#include "FakeFunctor.h"

 namespace qmcplusplus
 {

 template<typename T>
-struct J2OMPTargetMultiWalkerMem;
+struct TwoBodyJastrowMultiWalkerMem;

 /** @ingroup WaveFunctionComponent
 *  @brief Specialization for two-body Jastrow function using multiple functors
@ -42,7 +43,7 @@ struct J2OMPTargetMultiWalkerMem;
 * For electrons, distinct pair correlation functions are used
 * for spins up-up/down-down and up-down/down-up.
 *
- * Based on J2OMPTarget.h with these considerations
+ * Based on TwoBodyJastrow.h with these considerations
 * - DistanceTable using SoA containers
 * - support mixed precision: FT::real_type != OHMMS_PRECISION
 * - loops over the groups: elminated PairID
@ -51,7 +52,7 @@ struct J2OMPTargetMultiWalkerMem;
 * - Memory use is O(N). 
 */
 template<class FT>
-class J2OMPTarget : public WaveFunctionComponent
+class TwoBodyJastrow : public WaveFunctionComponent
 {
 public:
  ///alias FuncType
@ -67,16 +68,21 @@ public:
  using GradDerivVec  = ParticleAttrib<QTFull::GradType>;
  using ValueDerivVec = ParticleAttrib<QTFull::ValueType>;

+protected:
+  ///number of particles
+  const size_t N;
+  ///number of groups of the target particleset
+  const size_t NumGroups;
+
 private:
+  /// if true use offload
+  const bool use_offload_;
+
  /** initialize storage Uat,dUat, d2Uat */
  void resizeInternalStorage();

-  ///number of particles
-  const size_t N;
  ///number of particles + padded
  const size_t N_padded;
-  ///number of groups of the target particleset
-  const size_t NumGroups;
  /// the group_id of each particle
  Vector<int, OffloadPinnedAllocator<int>> grp_ids;
  ///diff value
@ -110,7 +116,7 @@ private:
  std::vector<GradDerivVec> gradLogPsi;
  std::vector<ValueDerivVec> lapLogPsi;

-  std::unique_ptr<J2OMPTargetMultiWalkerMem<RealType>> mw_mem_;
+  std::unique_ptr<TwoBodyJastrowMultiWalkerMem<RealType>> mw_mem_;

  void resizeWFOptVectors()
  {
@ -139,9 +145,9 @@ private:
  /**@} */

 public:
-  J2OMPTarget(const std::string& obj_name, ParticleSet& p);
-  J2OMPTarget(const J2OMPTarget& rhs) = delete;
-  ~J2OMPTarget() override;
+  TwoBodyJastrow(const std::string& obj_name, ParticleSet& p, bool use_offload);
+  TwoBodyJastrow(const TwoBodyJastrow& rhs) = delete;
+  ~TwoBodyJastrow() override;

  /** add functor for (ia,ib) pair */
  void addFunc(int ia, int ib, std::unique_ptr<FT> j);
@ -154,7 +160,7 @@ public:
  void releaseResource(ResourceCollection& collection,
                       const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const override;

-  std::string getClassName() const override { return "J2OMPTarget"; }
+  std::string getClassName() const override { return "TwoBodyJastrow"; }
  bool isOptimizable() const override { return true; }
  void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override;
  /** check out optimizable variables
@ -236,14 +242,17 @@ public:

  const std::vector<FT*>& getPairFunctions() const { return F; }

+  // Accessors for unit testing
+  std::pair<int, int> getComponentOffset(int index) { return OffSet.at(index); }
+
+  opt_variables_type& getComponentVars() { return myVars; }
+
  void evaluateDerivatives(ParticleSet& P,
                           const opt_variables_type& active,
                           Vector<ValueType>& dlogpsi,
                           Vector<ValueType>& dhpsioverpsi) override;

-  void evaluateDerivativesWF(ParticleSet& P,
-                             const opt_variables_type& active,
-                             Vector<ValueType>& dlogpsi) override;
+  void evaluateDerivativesWF(ParticleSet& P, const opt_variables_type& active, Vector<ValueType>& dlogpsi) override;

  void evaluateDerivRatios(const VirtualParticleSet& VP,
                           const opt_variables_type& optvars,
@ -251,8 +260,9 @@ public:
                           Matrix<ValueType>& dratios) override;
 };

-extern template class J2OMPTarget<BsplineFunctor<QMCTraits::RealType>>;
-extern template class J2OMPTarget<PadeFunctor<QMCTraits::RealType>>;
-extern template class J2OMPTarget<UserFunctor<QMCTraits::RealType>>;
+extern template class TwoBodyJastrow<BsplineFunctor<QMCTraits::RealType>>;
+extern template class TwoBodyJastrow<PadeFunctor<QMCTraits::RealType>>;
+extern template class TwoBodyJastrow<UserFunctor<QMCTraits::RealType>>;
+extern template class TwoBodyJastrow<FakeFunctor<QMCTraits::RealType>>;
 } // namespace qmcplusplus
 #endif
--- a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.cpp
+++ b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.cpp
@ -14,7 +14,7 @@
 //////////////////////////////////////////////////////////////////////////////////////


-#include "TwoBodyJastrowOrbitalBspline.h"
+#include "TwoBodyJastrowCUDA.h"
 #include "CudaSpline.h"
 #include "Lattice/ParticleBConds.h"
 #include "QMCWaveFunctions/detail/CUDA_legacy/BsplineJastrowCuda.h"
@ -25,7 +25,7 @@
 namespace qmcplusplus
 {
 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::freeGPUmem()
+void TwoBodyJastrowCUDA<FT>::freeGPUmem()
 {
  UpdateListGPU.clear();
  SumGPU.clear();
@ -43,18 +43,18 @@ void TwoBodyJastrowOrbitalBspline<FT>::freeGPUmem()
 };

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::recompute(MCWalkerConfiguration& W, bool firstTime)
+void TwoBodyJastrowCUDA<FT>::recompute(MCWalkerConfiguration& W, bool firstTime)
 {}

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::reserve(PointerPool<gpu::device_vector<CTS::RealType>>& pool)
+void TwoBodyJastrowCUDA<FT>::reserve(PointerPool<gpu::device_vector<CTS::RealType>>& pool)
 {}

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
+void TwoBodyJastrowCUDA<FT>::addFunc(int ia, int ib, std::unique_ptr<FT> j)
 {
  CudaSpline<CTS::RealType>* newSpline = new CudaSpline<CTS::RealType>(*j);
-  J2OrbitalSoA<BsplineFunctor<WaveFunctionComponent::RealType>>::addFunc(ia, ib, std::move(j));
+  JBase::addFunc(ia, ib, std::move(j));
  UniqueSplines.push_back(newSpline);
  if (ia == ib)
  {
@ -83,7 +83,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::addFunc(int ia, int ib, std::unique_ptr<F
 }

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::addLog(MCWalkerConfiguration& W, std::vector<RealType>& logPsi)
+void TwoBodyJastrowCUDA<FT>::addLog(MCWalkerConfiguration& W, std::vector<RealType>& logPsi)
 {
  auto& walkers = W.WalkerList;
  if (SumGPU.size() < 4 * walkers.size())
@ -144,11 +144,11 @@ void TwoBodyJastrowOrbitalBspline<FT>::addLog(MCWalkerConfiguration& W, std::vec
 }

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::update(MCWalkerConfiguration* W,
-                                              std::vector<Walker_t*>& walkers,
-                                              int iat,
-                                              std::vector<bool>* acc,
-                                              int k)
+void TwoBodyJastrowCUDA<FT>::update(MCWalkerConfiguration* W,
+                                    std::vector<Walker_t*>& walkers,
+                                    int iat,
+                                    std::vector<bool>* acc,
+                                    int k)
 {
  // for (int iw=0; iw<walkers.size(); iw++)
  //   UpdateListHost[iw] = (CTS::RealType*)walkers[iw]->R_GPU.data();
@ -160,11 +160,11 @@ void TwoBodyJastrowOrbitalBspline<FT>::update(MCWalkerConfiguration* W,

 // This currently does not actually compute the gradient or laplacian
 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::ratio(MCWalkerConfiguration& W,
-                                             int iat,
-                                             std::vector<ValueType>& psi_ratios,
-                                             std::vector<GradType>& grad,
-                                             std::vector<ValueType>& lapl)
+void TwoBodyJastrowCUDA<FT>::ratio(MCWalkerConfiguration& W,
+                                   int iat,
+                                   std::vector<ValueType>& psi_ratios,
+                                   std::vector<GradType>& grad,
+                                   std::vector<ValueType>& lapl)
 {
  auto& walkers = W.WalkerList;
  int N         = W.Rnew_GPU.size();
@ -245,11 +245,11 @@ void TwoBodyJastrowOrbitalBspline<FT>::ratio(MCWalkerConfiguration& W,

 // This currently does not actually compute the gradient or laplacian
 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::calcRatio(MCWalkerConfiguration& W,
-                                                 int iat,
-                                                 std::vector<ValueType>& psi_ratios,
-                                                 std::vector<GradType>& grad,
-                                                 std::vector<ValueType>& lapl)
+void TwoBodyJastrowCUDA<FT>::calcRatio(MCWalkerConfiguration& W,
+                                       int iat,
+                                       std::vector<ValueType>& psi_ratios,
+                                       std::vector<GradType>& grad,
+                                       std::vector<ValueType>& lapl)
 {
  auto& walkers = W.WalkerList;
  int N         = W.Rnew_GPU.size();
@ -317,12 +317,12 @@ void TwoBodyJastrowOrbitalBspline<FT>::calcRatio(MCWalkerConfiguration& W,
 }

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::addRatio(MCWalkerConfiguration& W,
-                                                int iat,
-                                                int k,
-                                                std::vector<ValueType>& psi_ratios,
-                                                std::vector<GradType>& grad,
-                                                std::vector<ValueType>& lapl)
+void TwoBodyJastrowCUDA<FT>::addRatio(MCWalkerConfiguration& W,
+                                      int iat,
+                                      int k,
+                                      std::vector<ValueType>& psi_ratios,
+                                      std::vector<GradType>& grad,
+                                      std::vector<ValueType>& lapl)
 {
 #ifndef CPU_RATIO
  auto& walkers = W.WalkerList;
@ -345,10 +345,10 @@ void TwoBodyJastrowOrbitalBspline<FT>::addRatio(MCWalkerConfiguration& W,
 }

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::NLratios(MCWalkerConfiguration& W,
-                                                std::vector<NLjob>& jobList,
-                                                std::vector<PosType>& quadPoints,
-                                                std::vector<ValueType>& psi_ratios)
+void TwoBodyJastrowCUDA<FT>::NLratios(MCWalkerConfiguration& W,
+                                      std::vector<NLjob>& jobList,
+                                      std::vector<PosType>& quadPoints,
+                                      std::vector<ValueType>& psi_ratios)
 {
  CTS::RealType sim_cell_radius = W.getLattice().SimulationCellRadius;
  auto& walkers                 = W.WalkerList;
@ -419,10 +419,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::NLratios(MCWalkerConfiguration& W,
 }

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::calcGradient(MCWalkerConfiguration& W,
-                                                    int iat,
-                                                    int k,
-                                                    std::vector<GradType>& grad)
+void TwoBodyJastrowCUDA<FT>::calcGradient(MCWalkerConfiguration& W, int iat, int k, std::vector<GradType>& grad)
 {
  CTS::RealType sim_cell_radius = W.getLattice().SimulationCellRadius;
  auto& walkers                 = W.WalkerList;
@ -451,7 +448,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::calcGradient(MCWalkerConfiguration& W,
 }

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::addGradient(MCWalkerConfiguration& W, int iat, std::vector<GradType>& grad)
+void TwoBodyJastrowCUDA<FT>::addGradient(MCWalkerConfiguration& W, int iat, std::vector<GradType>& grad)
 {
  auto& walkers = W.WalkerList;
  cudaCheck(cudaEventSynchronize(gpu::gradientSyncTwoBodyEvent));
@ -479,7 +476,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::addGradient(MCWalkerConfiguration& W, int
 }

 template<class FT>
-void TwoBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMatrix& grad, ValueMatrix& lapl)
+void TwoBodyJastrowCUDA<FT>::gradLapl(MCWalkerConfiguration& W, GradMatrix& grad, ValueMatrix& lapl)
 {
  CTS::RealType sim_cell_radius = W.getLattice().SimulationCellRadius;
  auto& walkers                 = W.WalkerList;
@ -573,7 +570,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMa
      {
        char buff[500];
        gethostname(buff, 500);
-        fprintf(stderr, "NAN in TwoBodyJastrowOrbitalBspline laplacian.  Host=%s\n", buff);
+        fprintf(stderr, "NAN in TwoBodyJastrowCUDA laplacian.  Host=%s\n", buff);
        abort();
      }
      lapl(iw, ptcl) += GradLaplHost[4 * this->N * iw + +4 * ptcl + 3];
@ -582,7 +579,7 @@ void TwoBodyJastrowOrbitalBspline<FT>::gradLapl(MCWalkerConfiguration& W, GradMa
 }

 // explicit instantiations of templates
-template class TwoBodyJastrowOrbitalBspline<BsplineFunctor<WaveFunctionComponent::RealType>>;
+template class TwoBodyJastrowCUDA<BsplineFunctor<WaveFunctionComponent::RealType>>;


 } // namespace qmcplusplus
--- a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.h
+++ b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.h
@ -17,7 +17,7 @@
 #define TWO_BODY_JASTROW_ORBITAL_BSPLINE_H

 #include "Particle/DistanceTable.h"
-#include "QMCWaveFunctions/Jastrow/J2OrbitalSoA.h"
+#include "QMCWaveFunctions/Jastrow/TwoBodyJastrow.h"
 #include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
 #include "Configuration.h"
 #include "QMCWaveFunctions/Jastrow/CudaSpline.h"
@ -26,7 +26,7 @@
 namespace qmcplusplus
 {
 template<class FT>
-class TwoBodyJastrowOrbitalBspline : public J2OrbitalSoA<FT>
+class TwoBodyJastrowCUDA : public TwoBodyJastrow<FT>
 {
 private:
  bool UsePBC;
@ -37,7 +37,7 @@ private:
  // The following is so we can refer to type aliases(defs) below the
  // templated base class in the object hierarchy
  // Mostly QMCTraits here
-  using JBase = J2OrbitalSoA<FT>;
+  using JBase = TwoBodyJastrow<FT>;
  // Duplication that should be removed
  using RealType     = typename JBase::RealType;
  using ValueType    = typename JBase::ValueType;
@ -143,8 +143,8 @@ public:
                std::vector<PosType>& quadPoints,
                std::vector<ValueType>& psi_ratios) override;

-  TwoBodyJastrowOrbitalBspline(const std::string& obj_name, ParticleSet& pset)
-      : J2OrbitalSoA<FT>(obj_name, pset),
+  TwoBodyJastrowCUDA(const std::string& obj_name, ParticleSet& pset, bool use_offload)
+      : TwoBodyJastrow<FT>(obj_name, pset, use_offload),
        PtclRef(pset),
        L(obj_name + "L"),
        Linv(obj_name + "Linv"),
@ -164,8 +164,7 @@ public:
  {
    UsePBC = pset.getLattice().SuperCellEnum;
    app_log() << "UsePBC = " << UsePBC << std::endl;
-    int nsp = this->NumGroups = pset.groups();
-    GPUSplines.resize(nsp * nsp, 0);
+    GPUSplines.resize(this->NumGroups * this->NumGroups, 0);
    if (UsePBC)
    {
      gpu::host_vector<CTS::RealType> LHost(OHMMS_DIM * OHMMS_DIM), LinvHost(OHMMS_DIM * OHMMS_DIM);
--- a/src/QMCWaveFunctions/tests/test_J2_bspline.cpp
+++ b/src/QMCWaveFunctions/tests/test_J2_bspline.cpp
@ -19,7 +19,7 @@
 #include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
 #include "QMCWaveFunctions/Jastrow/RadialJastrowBuilder.h"
 #include "ParticleBase/ParticleAttribOps.h"
-#include "QMCWaveFunctions/Jastrow/J2OrbitalSoA.h"
+#include "QMCWaveFunctions/Jastrow/TwoBodyJastrow.h"

 #include <cstdio>
 #include <string>
@ -84,7 +84,7 @@ TEST_CASE("BSpline builder Jastrow J2", "[wavefunction]")

  RadialJastrowBuilder jastrow(c, elec_);

-  using J2Type = J2OrbitalSoA<BsplineFunctor<RealType>>;
+  using J2Type = TwoBodyJastrow<BsplineFunctor<RealType>>;
  auto j2_uptr = jastrow.buildComponent(jas1);
  J2Type* j2   = dynamic_cast<J2Type*>(j2_uptr.get());
  REQUIRE(j2);
--- a/src/QMCWaveFunctions/tests/test_J2_derivatives.cpp
+++ b/src/QMCWaveFunctions/tests/test_J2_derivatives.cpp
@ -12,7 +12,7 @@
 #include "catch.hpp"

 #include <memory>
-#include "Jastrow/J2OrbitalSoA.h"
+#include "Jastrow/TwoBodyJastrow.h"
 #include "Jastrow/FakeFunctor.h"

 namespace qmcplusplus
@ -20,25 +20,25 @@ namespace qmcplusplus

 using FakeJasFunctor = FakeFunctor<OHMMS_PRECISION>;

-TEST_CASE("J2OrbitalSoA simple", "[wavefunction]")
+TEST_CASE("TwoBodyJastrow simple", "[wavefunction]")
 {
  const SimulationCell simulation_cell;
  ParticleSet elec(simulation_cell);
  elec.setName("e");
  elec.create({1, 1});
-  J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
+  TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);

  opt_variables_type active;
  jorb.checkOutVariables(active);
 }

-TEST_CASE("J2OrbitalSoA one species and two variables", "[wavefunction]")
+TEST_CASE("TwoBodyJastrow one species and two variables", "[wavefunction]")
 {
  const SimulationCell simulation_cell;
  ParticleSet elec(simulation_cell);
  elec.setName("e");
  elec.create({1, 1});
-  J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
+  TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);

  auto j2_uptr = std::make_unique<FakeJasFunctor>("test_fake");
  auto& j2     = *j2_uptr;
@ -79,12 +79,12 @@ ParticleSet get_two_species_particleset(const SimulationCell& simulation_cell)
 }

 // Two variables, both active
-TEST_CASE("J2OrbitalSoA two variables", "[wavefunction]")
+TEST_CASE("TwoBodyJastrow two variables", "[wavefunction]")
 {
  const SimulationCell simulation_cell;
  ParticleSet elec = get_two_species_particleset(simulation_cell);

-  J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
+  TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);

  auto j2a_uptr = std::make_unique<FakeJasFunctor>("test_fake_a");
  auto& j2a     = *j2a_uptr;
@ -146,12 +146,12 @@ TEST_CASE("J2OrbitalSoA two variables", "[wavefunction]")
 // Reproduce 2814.  If the variational parameter for the first Jastrow factor
 // is not active, the offsets are not correct.
 // "First" means the first in the function list F, which has species indices 0,0
-TEST_CASE("J2OrbitalSoA variables fail", "[wavefunction]")
+TEST_CASE("TwoBodyJastrow variables fail", "[wavefunction]")
 {
  const SimulationCell simulation_cell;
  ParticleSet elec = get_two_species_particleset(simulation_cell);

-  J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
+  TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);

  auto j2a_uptr = std::make_unique<FakeJasFunctor>("test_fake_a");
  auto& j2a     = *j2a_uptr;
@ -212,12 +212,12 @@ TEST_CASE("J2OrbitalSoA variables fail", "[wavefunction]")

 // Other variational parameters in the wavefunction (e.g. one-body Jastrow)

-TEST_CASE("J2OrbitalSoA other variables", "[wavefunction]")
+TEST_CASE("TwoBodyJastrow other variables", "[wavefunction]")
 {
  const SimulationCell simulation_cell;
  ParticleSet elec = get_two_species_particleset(simulation_cell);

-  J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
+  TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);

  auto j2a_uptr = std::make_unique<FakeJasFunctor>("test_fake_a");
  auto j2a      = *j2a_uptr;
@ -285,7 +285,7 @@ TEST_CASE("J2OrbitalSoA other variables", "[wavefunction]")
 // Reproduce 3137.  If the number of particle types equals the number of particles
 // the two body jastrow is not constructed correctly (except in the case of two
 // particles).
-TEST_CASE("J2OrbitalSoA Jastrow three particles of three types", "[wavefunction]")
+TEST_CASE("TwoBodyJastrow Jastrow three particles of three types", "[wavefunction]")
 {
  const SimulationCell simulation_cell;
  ParticleSet ions(simulation_cell);
@ -309,8 +309,7 @@ TEST_CASE("J2OrbitalSoA Jastrow three particles of three types", "[wavefunction]
  elec.R[2][1] = 0.9679;
  elec.R[2][2] = 0.0128914;

-
-  J2OrbitalSoA<FakeJasFunctor> jorb("J2_fake", elec);
+  TwoBodyJastrow<FakeJasFunctor> jorb("J2_fake", elec, false);

  // 0 uu  (0,0)
  // 1 ud  (0,1)
--- a/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp
+++ b/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp
@ -154,49 +154,52 @@ TEST_CASE("TrialWaveFunction flex_evaluateParameterDerivatives", "[wavefunction]
  elec.createResource(pset_res);
  psi.createResource(twf_res);

-  RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi});
-  RefVectorWithLeader<ParticleSet> p_list(elec, {elec});
+  {
+    // Test list with one wavefunction
+    RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi});
+    RefVectorWithLeader<ParticleSet> p_list(elec, {elec});

-  ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
-  ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);
+    ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
+    ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);

-  // Test list with one wavefunction

-  int nentry = 1;
-  RecordArray<ValueType> dlogpsi_list(nentry, nparam);
-  RecordArray<ValueType> dhpsi_over_psi_list(nentry, nparam);
+    const int nentry = 1;
+    RecordArray<ValueType> dlogpsi_list(nentry, nparam);
+    RecordArray<ValueType> dhpsi_over_psi_list(nentry, nparam);

-  TrialWaveFunction::mw_evaluateParameterDerivatives(wf_list, p_list, var_param, dlogpsi_list, dhpsi_over_psi_list);
+    TrialWaveFunction::mw_evaluateParameterDerivatives(wf_list, p_list, var_param, dlogpsi_list, dhpsi_over_psi_list);

-  CHECK(dlogpsi[0] == ValueApprox(dlogpsi_list[0][0]));
-  CHECK(dhpsioverpsi[0] == ValueApprox(dhpsi_over_psi_list[0][0]));
+    CHECK(dlogpsi[0] == ValueApprox(dlogpsi_list[0][0]));
+    CHECK(dhpsioverpsi[0] == ValueApprox(dhpsi_over_psi_list[0][0]));
+  }

-  // Test list with two wavefunctions
+  { // Test list with two wavefunctions
+    const int nentry = 2;
+    RecordArray<ValueType> dlogpsi_list(nentry, nparam);
+    RecordArray<ValueType> dhpsi_over_psi_list(nentry, nparam);

-  nentry = 2;
-  dlogpsi_list.resize(nentry, nparam);
-  dhpsi_over_psi_list.resize(nentry, nparam);
+    ParticleSet elec2(elec);
+    elec2.R[0][0] = 0.9;
+    elec2.update();

-  ParticleSet elec2(elec);
-  elec2.R[0][0] = 0.9;
-  elec2.update();
+    RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi, psi});
+    RefVectorWithLeader<ParticleSet> p_list(elec, {elec, elec2});
+    ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
+    ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);

-  // Will re-using  the same TrialWaveFunction work, or should a new one be created.
-  //  If a new one is needed, what is the easiest way to copy?
-  wf_list.push_back(psi);
-  p_list.push_back(elec2);
-  TrialWaveFunction::mw_evaluateParameterDerivatives(wf_list, p_list, var_param, dlogpsi_list, dhpsi_over_psi_list);
+    TrialWaveFunction::mw_evaluateParameterDerivatives(wf_list, p_list, var_param, dlogpsi_list, dhpsi_over_psi_list);

-  Vector<ValueType> dlogpsi2(nparam);
-  Vector<ValueType> dhpsioverpsi2(nparam);
+    Vector<ValueType> dlogpsi2(nparam);
+    Vector<ValueType> dhpsioverpsi2(nparam);

-  psi.evaluateDerivatives(elec2, var_param, dlogpsi2, dhpsioverpsi2);
+    psi.evaluateDerivatives(elec2, var_param, dlogpsi2, dhpsioverpsi2);

-  CHECK(dlogpsi[0] == ValueApprox(dlogpsi_list[0][0]));
-  CHECK(dhpsioverpsi[0] == ValueApprox(dhpsi_over_psi_list[0][0]));
+    CHECK(dlogpsi[0] == ValueApprox(dlogpsi_list[0][0]));
+    CHECK(dhpsioverpsi[0] == ValueApprox(dhpsi_over_psi_list[0][0]));

-  CHECK(dlogpsi2[0] == ValueApprox(dlogpsi_list[1][0]));
-  CHECK(dhpsioverpsi2[0] == ValueApprox(dhpsi_over_psi_list[1][0]));
+    CHECK(dlogpsi2[0] == ValueApprox(dlogpsi_list[1][0]));
+    CHECK(dhpsioverpsi2[0] == ValueApprox(dhpsi_over_psi_list[1][0]));
+  }
 }


@ -258,24 +261,6 @@ TEST_CASE("TrialWaveFunction flex_evaluateDeltaLogSetup", "[wavefunction]")
  psi2.addComponent(std::move(orb2));


-  // Prepare to compare using list with one wavefunction and particleset
-
-  int nentry = 1;
-  int nelec  = 2;
-
-  RefVectorWithLeader<ParticleSet> p_list(elec1b, {elec1b});
-  RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi});
-
-  // Evaluate new flex_evaluateDeltaLogSetup
-
-  std::vector<RealType> logpsi_fixed_list(nentry);
-  std::vector<RealType> logpsi_opt_list(nentry);
-
-  auto fixedG_list_ptr = create_particle_gradient(nelec, nentry);
-  auto fixedL_list_ptr = create_particle_laplacian(nelec, nentry);
-  auto fixedG_list     = convertUPtrToRefVector(fixedG_list_ptr);
-  auto fixedL_list     = convertUPtrToRefVector(fixedL_list_ptr);
-
  // testing batched interfaces
  ResourceCollection pset_res("test_pset_res");
  ResourceCollection twf_res("test_twf_res");
@ -283,16 +268,7 @@ TEST_CASE("TrialWaveFunction flex_evaluateDeltaLogSetup", "[wavefunction]")
  elec1b.createResource(pset_res);
  psi.createResource(twf_res);

-  ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
-  ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);
-
-
-  TrialWaveFunction::mw_evaluateDeltaLogSetup(wf_list, p_list, logpsi_fixed_list, logpsi_opt_list, fixedG_list,
-                                              fixedL_list);
-
-
-  // Evaluate old (single item) evaluateDeltaLog
-
+  const int nelec = 2;
  RealType logpsi_fixed_r1;
  RealType logpsi_opt_r1;
  ParticleSet::ParticleGradient fixedG1;
@ -300,153 +276,176 @@ TEST_CASE("TrialWaveFunction flex_evaluateDeltaLogSetup", "[wavefunction]")
  fixedG1.resize(nelec);
  fixedL1.resize(nelec);

-  psi.evaluateDeltaLogSetup(elec1, logpsi_fixed_r1, logpsi_opt_r1, fixedG1, fixedL1);
+  { // Prepare to compare using list with one wavefunction and particleset
+    const int nentry = 1;

-  CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_list[0]));
-  CHECK(logpsi_opt_r1 == Approx(logpsi_opt_list[0]));
+    RefVectorWithLeader<ParticleSet> p_list(elec1b, {elec1b});
+    RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi});

-  CHECK(fixedG1[0][0] == ValueApprox(fixedG_list[0].get()[0][0]));
-  CHECK(fixedG1[0][1] == ValueApprox(fixedG_list[0].get()[0][1]));
-  CHECK(fixedG1[0][2] == ValueApprox(fixedG_list[0].get()[0][2]));
-  CHECK(fixedG1[1][0] == ValueApprox(fixedG_list[0].get()[1][0]));
-  CHECK(fixedG1[1][1] == ValueApprox(fixedG_list[0].get()[1][1]));
-  CHECK(fixedG1[1][2] == ValueApprox(fixedG_list[0].get()[1][2]));
+    // Evaluate new flex_evaluateDeltaLogSetup
+    auto fixedG_list_ptr = create_particle_gradient(nelec, nentry);
+    auto fixedL_list_ptr = create_particle_laplacian(nelec, nentry);
+    auto fixedG_list     = convertUPtrToRefVector(fixedG_list_ptr);
+    auto fixedL_list     = convertUPtrToRefVector(fixedL_list_ptr);

-  CHECK(fixedL1[0] == ValueApprox(fixedL_list[0].get()[0]));
-  CHECK(fixedL1[1] == ValueApprox(fixedL_list[0].get()[1]));
+    std::vector<RealType> logpsi_fixed_list(nentry);
+    std::vector<RealType> logpsi_opt_list(nentry);

-  // Compare the ParticleSet gradient and laplacian storage
-  // This should be temporary until these get removed from ParticleSet
-  CHECK(elec1b.L[0] == ValueApprox(elec1.L[0]));
-  CHECK(elec1b.L[1] == ValueApprox(elec1.L[1]));
+    ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
+    ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);

-  CHECK(elec1b.G[0][0] == ValueApprox(elec1.G[0][0]));
-  CHECK(elec1b.G[1][1] == ValueApprox(elec1.G[1][1]));
-
-  // Prepare to compare using list with two wavefunctions and particlesets
-
-  nentry = 2;
-
-  wf_list.push_back(psi2);
-  p_list.push_back(elec2b);
-
-  ParticleSet::ParticleGradient G2;
-  ParticleSet::ParticleLaplacian L2;
-  G2.resize(nelec);
-  L2.resize(nelec);
-  fixedG_list.push_back(G2);
-  fixedL_list.push_back(L2);
-
-  std::vector<RealType> logpsi_fixed_list2(nentry);
-  std::vector<RealType> logpsi_opt_list2(nentry);
-
-  RealType logpsi_fixed_r1b;
-  RealType logpsi_opt_r1b;
-  psi2.evaluateDeltaLogSetup(elec1, logpsi_fixed_r1b, logpsi_opt_r1b, fixedG1, fixedL1);
-
-  CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_r1b));
-  CHECK(logpsi_opt_r1 == Approx(logpsi_opt_r1b));
-
-  auto fixedG_list2_ptr = create_particle_gradient(nelec, nentry);
-  auto fixedL_list2_ptr = create_particle_laplacian(nelec, nentry);
-  auto fixedG_list2     = convertUPtrToRefVector(fixedG_list2_ptr);
-  auto fixedL_list2     = convertUPtrToRefVector(fixedL_list2_ptr);
-
-  TrialWaveFunction::mw_evaluateDeltaLogSetup(wf_list, p_list, logpsi_fixed_list2, logpsi_opt_list2, fixedG_list2,
-                                              fixedL_list2);
-
-  // Evaluate old (single item) evaluateDeltaLog corresponding to the second wavefunction/particleset
-
-  RealType logpsi_fixed_r2;
-  RealType logpsi_opt_r2;
-  ParticleSet::ParticleGradient fixedG2;
-  ParticleSet::ParticleLaplacian fixedL2;
-  fixedG2.resize(nelec);
-  fixedL2.resize(nelec);
-
-  psi2.setLogPsi(0.0);
-  psi2.evaluateDeltaLogSetup(elec2, logpsi_fixed_r2, logpsi_opt_r2, fixedG2, fixedL2);
+    TrialWaveFunction::mw_evaluateDeltaLogSetup(wf_list, p_list, logpsi_fixed_list, logpsi_opt_list, fixedG_list,
+                                                fixedL_list);


-  CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_r1b));
+    // Evaluate old (single item) evaluateDeltaLog
+    psi.evaluateDeltaLogSetup(elec1, logpsi_fixed_r1, logpsi_opt_r1, fixedG1, fixedL1);

-  CHECK(logpsi_fixed_list[0] == Approx(logpsi_fixed_list2[0]));
+    CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_list[0]));
+    CHECK(logpsi_opt_r1 == Approx(logpsi_opt_list[0]));

-  CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_list2[0]));
-  CHECK(logpsi_opt_r1 == Approx(logpsi_opt_list2[0]));
+    CHECK(fixedG1[0][0] == ValueApprox(fixedG_list[0].get()[0][0]));
+    CHECK(fixedG1[0][1] == ValueApprox(fixedG_list[0].get()[0][1]));
+    CHECK(fixedG1[0][2] == ValueApprox(fixedG_list[0].get()[0][2]));
+    CHECK(fixedG1[1][0] == ValueApprox(fixedG_list[0].get()[1][0]));
+    CHECK(fixedG1[1][1] == ValueApprox(fixedG_list[0].get()[1][1]));
+    CHECK(fixedG1[1][2] == ValueApprox(fixedG_list[0].get()[1][2]));

-  CHECK(logpsi_fixed_r2 == Approx(logpsi_fixed_list2[1]));
-  CHECK(logpsi_opt_r2 == Approx(logpsi_opt_list2[1]));
+    CHECK(fixedL1[0] == ValueApprox(fixedL_list[0].get()[0]));
+    CHECK(fixedL1[1] == ValueApprox(fixedL_list[0].get()[1]));

-  // Laplacian for first entry in the wavefunction/particleset list
-  CHECK(fixedL1[0] == ValueApprox(fixedL_list2[0].get()[0]));
-  CHECK(fixedL1[1] == ValueApprox(fixedL_list2[0].get()[1]));
-  // Laplacian for second entry in the wavefunction/particleset list
-  CHECK(fixedL2[0] == ValueApprox(fixedL_list2[1].get()[0]));
-  CHECK(fixedL2[1] == ValueApprox(fixedL_list2[1].get()[1]));
+    // Compare the ParticleSet gradient and laplacian storage
+    // This should be temporary until these get removed from ParticleSet
+    CHECK(elec1b.L[0] == ValueApprox(elec1.L[0]));
+    CHECK(elec1b.L[1] == ValueApprox(elec1.L[1]));
+
+    CHECK(elec1b.G[0][0] == ValueApprox(elec1.G[0][0]));
+    CHECK(elec1b.G[1][1] == ValueApprox(elec1.G[1][1]));
+  }
+
+  { // Prepare to compare using list with two wavefunctions and particlesets
+    const int nentry = 2;
+
+    RefVectorWithLeader<ParticleSet> p_list(elec1b, {elec1b, elec2b});
+    RefVectorWithLeader<TrialWaveFunction> wf_list(psi, {psi, psi2});
+
+    ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(pset_res, p_list);
+    ResourceCollectionTeamLock<TrialWaveFunction> mw_twf_lock(twf_res, wf_list);
+
+    auto fixedG_list_ptr = create_particle_gradient(nelec, nentry);
+    auto fixedL_list_ptr = create_particle_laplacian(nelec, nentry);
+    auto fixedG_list     = convertUPtrToRefVector(fixedG_list_ptr);
+    auto fixedL_list     = convertUPtrToRefVector(fixedL_list_ptr);
+
+    std::vector<RealType> logpsi_fixed_list2(nentry);
+    std::vector<RealType> logpsi_opt_list2(nentry);
+
+    RealType logpsi_fixed_r1b;
+    RealType logpsi_opt_r1b;
+    psi2.evaluateDeltaLogSetup(elec1, logpsi_fixed_r1b, logpsi_opt_r1b, fixedG1, fixedL1);
+
+    CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_r1b));
+    CHECK(logpsi_opt_r1 == Approx(logpsi_opt_r1b));
+
+    auto fixedG_list2_ptr = create_particle_gradient(nelec, nentry);
+    auto fixedL_list2_ptr = create_particle_laplacian(nelec, nentry);
+    auto fixedG_list2     = convertUPtrToRefVector(fixedG_list2_ptr);
+    auto fixedL_list2     = convertUPtrToRefVector(fixedL_list2_ptr);
+
+    TrialWaveFunction::mw_evaluateDeltaLogSetup(wf_list, p_list, logpsi_fixed_list2, logpsi_opt_list2, fixedG_list2,
+                                                fixedL_list2);
+
+    // Evaluate old (single item) evaluateDeltaLog corresponding to the second wavefunction/particleset
+
+    RealType logpsi_fixed_r2;
+    RealType logpsi_opt_r2;
+    ParticleSet::ParticleGradient fixedG2;
+    ParticleSet::ParticleLaplacian fixedL2;
+    fixedG2.resize(nelec);
+    fixedL2.resize(nelec);
+
+    psi2.setLogPsi(0.0);
+    psi2.evaluateDeltaLogSetup(elec2, logpsi_fixed_r2, logpsi_opt_r2, fixedG2, fixedL2);
+
+    CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_r1b));
+    CHECK(logpsi_opt_r1 == Approx(logpsi_opt_r1b));
+
+    CHECK(logpsi_fixed_r1 == Approx(logpsi_fixed_list2[0]));
+    CHECK(logpsi_opt_r1 == Approx(logpsi_opt_list2[0]));
+
+    CHECK(logpsi_fixed_r2 == Approx(logpsi_fixed_list2[1]));
+    CHECK(logpsi_opt_r2 == Approx(logpsi_opt_list2[1]));
+
+    // Laplacian for first entry in the wavefunction/particleset list
+    CHECK(fixedL1[0] == ValueApprox(fixedL_list2[0].get()[0]));
+    CHECK(fixedL1[1] == ValueApprox(fixedL_list2[0].get()[1]));
+    // Laplacian for second entry in the wavefunction/particleset list
+    CHECK(fixedL2[0] == ValueApprox(fixedL_list2[1].get()[0]));
+    CHECK(fixedL2[1] == ValueApprox(fixedL_list2[1].get()[1]));


-  // First entry wavefunction/particleset list
-  // Gradient for first electron
-  CHECK(fixedG1[0][0] == ValueApprox(fixedG_list2[0].get()[0][0]));
-  CHECK(fixedG1[0][1] == ValueApprox(fixedG_list2[0].get()[0][1]));
-  CHECK(fixedG1[0][2] == ValueApprox(fixedG_list2[0].get()[0][2]));
-  // Gradient for second electron
-  CHECK(fixedG1[1][0] == ValueApprox(fixedG_list2[0].get()[1][0]));
-  CHECK(fixedG1[1][1] == ValueApprox(fixedG_list2[0].get()[1][1]));
-  CHECK(fixedG1[1][2] == ValueApprox(fixedG_list2[0].get()[1][2]));
+    // First entry wavefunction/particleset list
+    // Gradient for first electron
+    CHECK(fixedG1[0][0] == ValueApprox(fixedG_list2[0].get()[0][0]));
+    CHECK(fixedG1[0][1] == ValueApprox(fixedG_list2[0].get()[0][1]));
+    CHECK(fixedG1[0][2] == ValueApprox(fixedG_list2[0].get()[0][2]));
+    // Gradient for second electron
+    CHECK(fixedG1[1][0] == ValueApprox(fixedG_list2[0].get()[1][0]));
+    CHECK(fixedG1[1][1] == ValueApprox(fixedG_list2[0].get()[1][1]));
+    CHECK(fixedG1[1][2] == ValueApprox(fixedG_list2[0].get()[1][2]));


-  // Second entry wavefunction/particleset list
-  // Gradient for first electron
-  CHECK(fixedG2[0][0] == ValueApprox(fixedG_list2[1].get()[0][0]));
-  CHECK(fixedG2[0][1] == ValueApprox(fixedG_list2[1].get()[0][1]));
-  CHECK(fixedG2[0][2] == ValueApprox(fixedG_list2[1].get()[0][2]));
-  // Gradient for second electron
-  CHECK(fixedG2[1][0] == ValueApprox(fixedG_list2[1].get()[1][0]));
-  CHECK(fixedG2[1][1] == ValueApprox(fixedG_list2[1].get()[1][1]));
-  CHECK(fixedG2[1][2] == ValueApprox(fixedG_list2[1].get()[1][2]));
+    // Second entry wavefunction/particleset list
+    // Gradient for first electron
+    CHECK(fixedG2[0][0] == ValueApprox(fixedG_list2[1].get()[0][0]));
+    CHECK(fixedG2[0][1] == ValueApprox(fixedG_list2[1].get()[0][1]));
+    CHECK(fixedG2[0][2] == ValueApprox(fixedG_list2[1].get()[0][2]));
+    // Gradient for second electron
+    CHECK(fixedG2[1][0] == ValueApprox(fixedG_list2[1].get()[1][0]));
+    CHECK(fixedG2[1][1] == ValueApprox(fixedG_list2[1].get()[1][1]));
+    CHECK(fixedG2[1][2] == ValueApprox(fixedG_list2[1].get()[1][2]));

-  // Compare the ParticleSet gradient and laplacian storage
-  // This should be temporary until these get removed from ParticleSet
-  CHECK(elec1b.L[0] == ValueApprox(elec1.L[0]));
-  CHECK(elec1b.L[1] == ValueApprox(elec1.L[1]));
-  CHECK(elec2b.L[0] == ValueApprox(elec2.L[0]));
-  CHECK(elec2b.L[1] == ValueApprox(elec2.L[1]));
+    // Compare the ParticleSet gradient and laplacian storage
+    // This should be temporary until these get removed from ParticleSet
+    CHECK(elec1b.L[0] == ValueApprox(elec1.L[0]));
+    CHECK(elec1b.L[1] == ValueApprox(elec1.L[1]));
+    CHECK(elec2b.L[0] == ValueApprox(elec2.L[0]));
+    CHECK(elec2b.L[1] == ValueApprox(elec2.L[1]));

-  CHECK(elec2b.G[0][0] == ValueApprox(elec2.G[0][0]));
-  CHECK(elec2b.G[1][1] == ValueApprox(elec2.G[1][1]));
+    CHECK(elec2b.G[0][0] == ValueApprox(elec2.G[0][0]));
+    CHECK(elec2b.G[1][1] == ValueApprox(elec2.G[1][1]));


-  // these lists not used if 'recompute' is false
-  RefVector<ParticleSet::ParticleGradient> dummyG_list;
-  RefVector<ParticleSet::ParticleLaplacian> dummyL_list;
+    // these lists not used if 'recompute' is false
+    RefVector<ParticleSet::ParticleGradient> dummyG_list;
+    RefVector<ParticleSet::ParticleLaplacian> dummyL_list;

-  std::vector<RealType> logpsi_variable_list(nentry);
-  TrialWaveFunction::mw_evaluateDeltaLog(wf_list, p_list, logpsi_variable_list, dummyG_list, dummyL_list, false);
+    std::vector<RealType> logpsi_variable_list(nentry);
+    TrialWaveFunction::mw_evaluateDeltaLog(wf_list, p_list, logpsi_variable_list, dummyG_list, dummyL_list, false);

-  RealType logpsi1 = psi.evaluateDeltaLog(p_list[0], false);
-  CHECK(logpsi1 == Approx(logpsi_variable_list[0]));
+    RealType logpsi1 = psi.evaluateDeltaLog(p_list[0], false);
+    CHECK(logpsi1 == Approx(logpsi_variable_list[0]));

-  RealType logpsi2 = psi2.evaluateDeltaLog(p_list[1], false);
-  CHECK(logpsi2 == Approx(logpsi_variable_list[1]));
+    RealType logpsi2 = psi2.evaluateDeltaLog(p_list[1], false);
+    CHECK(logpsi2 == Approx(logpsi_variable_list[1]));


-  // Now check with 'recompute = true'
-  auto dummyG_list2_ptr = create_particle_gradient(nelec, nentry);
-  auto dummyL_list2_ptr = create_particle_laplacian(nelec, nentry);
-  auto dummyG_list2     = convertUPtrToRefVector(dummyG_list2_ptr);
-  auto dummyL_list2     = convertUPtrToRefVector(dummyL_list2_ptr);
+    // Now check with 'recompute = true'
+    auto dummyG_list2_ptr = create_particle_gradient(nelec, nentry);
+    auto dummyL_list2_ptr = create_particle_laplacian(nelec, nentry);
+    auto dummyG_list2     = convertUPtrToRefVector(dummyG_list2_ptr);
+    auto dummyL_list2     = convertUPtrToRefVector(dummyL_list2_ptr);

-  std::vector<RealType> logpsi_variable_list2(nentry);
+    std::vector<RealType> logpsi_variable_list2(nentry);

-  TrialWaveFunction::mw_evaluateDeltaLog(wf_list, p_list, logpsi_variable_list2, dummyG_list2, dummyL_list2, true);
+    TrialWaveFunction::mw_evaluateDeltaLog(wf_list, p_list, logpsi_variable_list2, dummyG_list2, dummyL_list2, true);

-  RealType logpsi1b = psi.evaluateDeltaLog(p_list[0], true);
-  CHECK(logpsi1b == Approx(logpsi_variable_list2[0]));
+    RealType logpsi1b = psi.evaluateDeltaLog(p_list[0], true);
+    CHECK(logpsi1b == Approx(logpsi_variable_list2[0]));

-  RealType logpsi2b = psi2.evaluateDeltaLog(p_list[1], true);
-  CHECK(logpsi2b == Approx(logpsi_variable_list2[1]));
+    RealType logpsi2b = psi2.evaluateDeltaLog(p_list[1], true);
+    CHECK(logpsi2b == Approx(logpsi_variable_list2[1]));
+  }
 }
 #endif