Merge branch 'develop' into dmc_population_cont

This commit is contained in:
Peter Doak 2020-09-22 17:03:35 -04:00
commit daf2082a1d
155 changed files with 4400 additions and 1161 deletions

View File

@ -768,8 +768,8 @@ IF(ENABLE_HIP)
ADD_LIBRARY(HIP::HIP INTERFACE IMPORTED)
# temporarily put hipsparse hipblas here for convenience, should be moved to Platforms.
SET_TARGET_PROPERTIES(HIP::HIP PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${ROCM_ROOT}/include"
INTERFACE_COMPILE_DEFINITIONS "ENABLE_HIP;__HIP_PLATFORM_HCC__"
INTERFACE_LINK_LIBRARIES "-L${ROCM_ROOT}/lib;-lhipsparse;-lhipblas;-lhip_hcc")
INTERFACE_COMPILE_DEFINITIONS "ENABLE_HIP"
INTERFACE_LINK_LIBRARIES "-L${ROCM_ROOT}/lib;-lhipsparse;-lhipblas;-lamdhip64")
ENDIF(ENABLE_HIP)
######################################################

View File

@ -1552,6 +1552,12 @@ def qmcpack_simulation():
def observables():
nunit_all()
#end def observables
def nxs_redo():
nunit('test_redo')
@ -1913,6 +1919,7 @@ NexusTest( quantum_package_simulation )
NexusTest( pwscf_postprocessor_simulations )
NexusTest( qmcpack_converter_simulations )
NexusTest( qmcpack_simulation )
NexusTest( observables )
NexusTest( nxs_redo )
NexusTest( nxs_sim )
NexusTest( qmca )
@ -1938,6 +1945,7 @@ NexusTest( qdens )
#NexusTest( numerics )
#NexusTest( nxs_redo )
#NexusTest( nxs_sim )
#NexusTest( observables )
#NexusTest( optional_dependencies , optional=True)
#NexusTest( periodic_table )
#NexusTest( physical_system )
@ -2232,6 +2240,7 @@ if __name__=='__main__':
exit()
#end if
global_data['verbose'] = options.verbose
global_data['job_ref_table'] = options.job_ref_table
if options.regex!='none':

View File

@ -64,22 +64,22 @@ sorted_generic = sorted_py2
def log(*items,**kwargs):
indent=None
logfile=generic_settings.devlog
indent = None
logfile = generic_settings.devlog
if len(kwargs)>0:
n=0
if 'indent' in kwargs:
indent = kwargs['indent']
n+=1
indent = kwargs.pop('indent' ,None )
logfile = kwargs.pop('logfile',logfile)
n = kwargs.pop('n',0)
if n!=0:
if indent is None:
indent = n*' '
else:
indent = n*indent
#end if
#end if
if 'logfile' in kwargs:
logfile = kwargs['logfile']
n+=1
#end if
if n!=len(kwargs):
valid = 'indent logfile'.split()
invalid = set(kwargs.keys())-set(valid)
error('invalid keyword arguments provided\ninvalid keywords: {0}\nvalid options are: {1}'.format(sorted(invalid),valid))
if len(kwargs)>0:
valid = 'indent logfile n'.split()
error('Invalid keyword arguments provided.\nInvalid keywords: {0}\nValid options are: {1}'.format(sorted(kwargs.keys()),valid),'log')
#end if
#end if
if len(items)==1 and isinstance(items[0],str):

View File

@ -80,8 +80,11 @@ Module contents
---------------
"""
import os
from generic import obj
from developer import DevBase,ci,message,error,unavailable
from fileio import StandardFile,XsfFile
try:
import numpy as np
@ -98,6 +101,12 @@ try:
except:
measure = unavailable('skimage','measure')
#end try
try:
import scipy.ndimage as scipy_ndimage
except:
scipy_ndimage = unavailable('scipy','ndimage')
#end try
@ -715,6 +724,24 @@ class GBase(PlotHandler):
initialized = (bool,False),
)
vlogger = None
@staticmethod
def reset_vlog():
GBase.vlogger = None
#end def reset_vlog
@staticmethod
def set_vlog(vlog):
GBase.vlogger = vlog
#end def set_vlog
def vlog(self,*args,**kwargs):
if GBase.vlogger is not None:
GBase.vlogger(*args,**kwargs)
#end if
#end def vlog
def __init__(self,*args,**kwargs):
self.reset()
@ -763,6 +790,28 @@ class GBase(PlotHandler):
#end if
#end def initialize
def read(self,filepath,format=None,check=True):
if isinstance(filepath,StandardFile):
format = filepath.sftype.lower()
elif not isinstance(filepath,str):
self.error('Cannot read file.\nExpected a file path.\nInstead received type: {}\nWith value: {}\nPlease provide a file path and try again.'.format(filepath.__class__.__name__,filepath))
elif not os.path.exists(filepath):
self.error('Cannot read file. File path does not exist.\nFile path provided: {}'.format(filepath))
elif format is not None:
if not isinstance(format,str):
self.error('Cannot read file.\nExpected text (string) for file format.\nInstead received type: {}\nWith value: {}'.format(format.__class__.__name__,format))
#end if
else:
format = filepath.rsplit('.',1)[1].lower()
#end if
self.reset()
self.read_local(filepath,format)
if check:
self.check_valid()
#end if
#end def read
def valid(self):
"""
@ -865,6 +914,32 @@ class GBase(PlotHandler):
"""
self.not_implemented()
#end def local_validity_checks
def read_local(self,filepath,format):
self.not_implemented()
#end def read_local
# test needed
def ensure_array(self,dtype=None,**arrays_in):
arrays = obj()
for k,ai in arrays_in.items():
if isinstance(ai,(tuple,list)):
if dtype is None:
a = np.array(ai)
else:
a = np.array(ai,dtype=dtype)
#end if
elif isinstance(ai,np.ndarray):
a = ai
else:
self.error('Cannot ensure array value.\nReceived data with type: {}\nOnly tuple, list, and ndarray are supported.'.format(ai.__class__.__name__))
#end if
arrays[k] = a
#end for
return arrays
#end def ensure_array
#end class GBase
@ -1127,6 +1202,12 @@ class Grid(GBase):
plt.show()
#end if
#end def plot_points
def grid_function(self,*args,**kwargs):
gf = grid_function_from_grid(self)
return gf(*args,**kwargs)
#end def grid_function
#end class Grid
@ -1249,6 +1330,11 @@ class StructuredGrid(Grid):
return np.prod(self.cell_grid_shape)
#end def ncells
@property
def npoints(self):
return np.prod(self.shape)
#end def npoints
@property
def flat_points_shape(self):
space_dim = self.r.shape[-1]
@ -1262,6 +1348,11 @@ class StructuredGrid(Grid):
return self.shape+(space_dim,)
#end def flat_points_shape
@property
def periodic(self):
return (self.bconds==self.bcond_types.periodic).all()
#end def periodic
def initialize_local(self,**kwargs):
"""
@ -1401,6 +1492,28 @@ class StructuredGrid(Grid):
#end def reshape_flat
# test needed
def flat_indices(self,full_indices):
if not isinstance(full_indices,np.ndarray):
full_indices = np.array(full_indices,dtype=int)
#end if
if len(full_indices.shape)!=2:
self.error('full_indices must have shape (# points)x(# dimensions)\nShape received: {}'.format(full_indices.shape))
elif full_indices.shape[-1]!=self.grid_dim:
self.error('full_indices must have same dimension as the grid.\nfull_indices dimension: {}\nGrid dimension: {}'.format(full_indices.shape[-1],self.grid_dim))
#end if
grid_shape = self.grid_shape
D = len(grid_shape)
grid_shape_prod = np.empty((D,),dtype=int)
grid_shape_prod[-1] = 1
for d in range(D-1):
grid_shape_prod[D-d-2] = grid_shape[D-d-1]*grid_shape_prod[D-d-1]
#end for
flat_indices = np.dot(full_indices,grid_shape_prod)
return flat_indices
#end def flat_indices
def unit_points(self,points=None,project=False):
"""
(`External API`) Map a set of points into the unit cube.
@ -1942,6 +2055,19 @@ class StructuredGridWithAxes(StructuredGrid):
"""
return np.abs(np.prod(np.linalg.svd(self.axes,compute_uv=False)))
#end def axes_volume
def indices_for_map_coord(self,points):
if not self.centered:
ucorner = np.array([0,0,0],dtype=float)
else:
ucorner = 0.5/np.array(self.cell_grid_shape)
#end if
grid_shape = np.array(self.grid_shape)
ipoints = (self.unit_points(points)-ucorner)*grid_shape
return ipoints.T
#end def indices_for_map_coord
#end class StructuredGridWithAxes
@ -2049,6 +2175,8 @@ class ParallelotopeGrid(StructuredGridWithAxes):
The number of grid points in each dimension.
cell_grid_shape : `tuple, int, property`
The number of grid cells in each dimension.
dr : `ndarray, float, shape(dg,ds), property`
Vector displacements between neighboring grid points.
ncells : `int, property`
The total number of grid cells.
flat_points_shape : `tuple, int, property`
@ -2069,6 +2197,16 @@ class ParallelotopeGrid(StructuredGridWithAxes):
Datatype of the grid point values.
"""
@property
def dr(self):
dr = np.empty(self.axes.shape,self.dtype)
cells = self.cell_grid_shape
for d in range(self.grid_dim):
dr[d] = self.axes[d]/cells[d]
#end for
return dr
#end def dr
@property
def corner(self):
return self.origin
@ -2135,6 +2273,45 @@ class ParallelotopeGrid(StructuredGridWithAxes):
#end def initialize_local
def read_local(self,filepath,format):
if format=='xsf':
self.read_xsf(filepath)
else:
self.error('Cannot read file.\nUnrecognized file format encountered.\nUnrecognized file format: {}\nValid options are: xsf'.format(format))
#end if
#end def read_local
def read_xsf(self,filepath):
if isinstance(filepath,XsfFile):
xsf = filepath
else:
xsf = XsfFile(filepath)
#end if
d = xsf.get_density()
cells = d.grid-1
c = d.cell.sum(axis=0)/cells/2
centered = False
corner = None
if np.abs(c-d.corner).max()<1e-6:
centered = True
else:
corner = d.corner
#end if
self.initialize(
bconds = tuple('ppp'),
axes = d.cell.copy(),
cells = cells,
centered = centered,
corner = corner,
)
#end def read_xsf
def unit_points_bare(self,points):
"""
(`Internal API`) Maps points in a parallelotope into the unit cube.
@ -2580,6 +2757,14 @@ class SpheroidGrid(StructuredGridWithAxes):
#end if
return cell_vols
#end def cell_volumes
def radii(self):
self.reshape_full()
rrad = np.array(self.r[:,0,0,-1].ravel())
self.reshape_flat()
return rrad
#end def radii
#end class SpheroidGrid
@ -3023,8 +3208,9 @@ class GridFunction(GBase):
#: (`obj`) Collection of attributes for the class. Used to check assigned
#: members for type conformity and to assign default values.
persistent_data_types = obj(
grid = (Grid,None),
values = (np.ndarray,None),
grid = (Grid , None),
values = (np.ndarray, None),
value_shape = (tuple , None),
**GBase.persistent_data_types
)
@ -3038,9 +3224,14 @@ class GridFunction(GBase):
return self.grid.npoints
#end def npoints
@property
def value_dim(self):
return len(self.value_shape)
#end def value_dim
@property
def nvalues(self):
return self.values.shape[1]
return np.prod(self.value_shape)
#end def nvalues
@property
@ -3067,6 +3258,7 @@ class GridFunction(GBase):
copy_values = True,
dtype = None,
grid_dtype = None,
value_shape = None,
**kwargs):
"""
(`Internal API`) Sets `grid` and `values` attributes.
@ -3144,13 +3336,32 @@ class GridFunction(GBase):
else:
self.error('provided function values are of incorrect type\nvalues must be tuple, list, or ndarray\nyou provided: {}'.format(values.__class__.__name__))
#end if
if len(values.shape)==1:
values.shape = (values.shape[0],1)
# process value_shape input
if len(values.shape)==1 or values.shape==grid.shape:
value_shape = (1,)
elif value_shape is None:
if len(values)==grid.npoints:
value_shape = values.shape[1:]
elif len(value.shape)>len(grid.shape) and value.shape[:len(grid.shape)]==grid.shape:
value_shape = values.shape[len(grid.shape):]
#end if
elif isinstance(value_shape,(list,np.ndarray)):
value_shape = tuple(value_shape)
#end if
if value_shape is not None:
nvtot = values.size
nv = np.prod(value_shape)
if nvtot%nv!=0 or nvtot//nv!=grid.npoints:
self.error('value_shape and total number of values are inconsistent.\nTotal number of values: {}\nvalue_shape: {}\nExpected number of values per grid point: {}\nActual number of values per grid point: {}'.format(nvtot,value_shape,nv,nvtot/nv))
#end if
values.shape = (grid.npoints,nv)
#end if
# assign grid and values
self.grid = grid
self.values = values
self.grid = grid
self.values = values
self.value_shape = value_shape
#end def initialize_local
@ -3165,17 +3376,35 @@ class GridFunction(GBase):
"""
cls = self.__class__
if not isinstance(self.grid,cls.grid_class):
msgs.append('grid is not of the required type for current grid function\ngrid function type: {}\ngrid type required: {}'.format(cls.__name__,self.grid.__class__.__name__))
msgs.append('Grid is not of the required type for current grid function.\nGrid function type: {}\nGrid type required: {}'.format(cls.__name__,self.grid.__class__.__name__))
#end if
self.grid.local_validity_checks(msgs)
if len(self.values)!=self.npoints:
msgs.append('number of function values and number of grid points do not match\nnumber of grid points: {}\nnumber of function values: {}'.format(self.npoints,len(self.values)))
msgs.append('Number of function values and number of grid points do not match.\nNumber of grid points: {}\nNumber of function values: {}'.format(self.npoints,len(self.values)))
#end if
if len(self.values.shape)!=2:
msgs.append('function values has incorrect shape\nexpected shape is (# of points, # of function values at each point)\nshape received: {}'.format(self.values.shape))
msgs.append('Function values has incorrect shape.\nExpected shape is (# of points, # of function values at each point)\nShape received: {}'.format(self.values.shape))
#end if
if len(self.value_shape)<1:
self.error('"value_shape" must have at least one entry.')
#end if
if np.prod(self.value_shape)!=self.values.size//self.npoints:
self.error('"value_shape" and "values" are inconsistent.\nNumber of values per point based on "values": {}\nNumber of values per point based on "value_shape": {}'.format(self.values.size/self.npoints,np.prod(self.value_shape)))
#end if
if self.values.shape!=(self.npoints,self.nvalues):
self.error('Function values has incorrect shape.\nExected shape: {}\nShape received: {}'.format((self.npoints,self.nvalues),self.values.shape))
#end if
#end def local_validity_checks
def reshape_values_full(self):
self.values.shape = (self.npoints,)+self.value_shape
#end def reshape_values_full
def reshape_values_flat(self):
self.values.shape = (self.npoints,self.nvalues)
#end def reshape_values_flat
#end class GridFunction
@ -3239,8 +3468,93 @@ class StructuredGridFunction(GridFunction):
def full_points_shape(self):
return self.grid.full_points_shape
#end def full_points_shape
@property
def flat_values_shape(self):
None
#end def flat_values_shape
@property
def periodic(self):
return self.grid.periodic
#end def periodic
def reshape_points_full(self):
self.values.shape = self.grid_shape+(self.nvalues,)
self.grid.reshape_full()
#end def reshape_points_full
def reshape_points_flat(self):
self.values.shape = (self.npoints,self.nvalues)
self.grid.reshape_flat()
#end def reshape_points_flat
def reshape_full(self):
self.values.shape = self.grid_shape+self.value_shape
self.grid.reshape_full()
#end def reshape_full
def reshape_flat(self):
self.values.shape = (self.npoints,self.nvalues)
self.grid.reshape_flat()
#end def reshape_flat
def get_values_with_upper_ghost(self):
if 'values_with_upper_ghost' in self:
return self.values_with_upper_ghost
#end if
self.reshape_points_full()
g = np.array(self.grid_shape)
v = np.empty(tuple(g+1)+(self.nvalues,),dtype=self.values.dtype)
dim = self.grid_dim
if dim==1:
n1 = self.npoints
v[:-1] = self.values
v[-1] = self.values[0]
elif dim==2:
v[:-1,:-1] = self.values
v[ -1,:-1] = self.values[0,:]
v[:-1, -1] = self.values[:,0]
v[ -1, -1] = self.values[0,0]
elif dim==3:
v[:-1,:-1,:-1] = self.values
v[ -1,:-1,:-1] = self.values[0,:,:]
v[:-1, -1,:-1] = self.values[:,0,:]
v[:-1,:-1, -1] = self.values[:,:,0]
v[:-1, -1, -1] = self.values[:,0,0]
v[ -1,:-1, -1] = self.values[0,:,0]
v[ -1, -1,:-1] = self.values[0,0,:]
v[ -1, -1, -1] = self.values[0,0,0]
else:
self.error('values_with_upper_ghost is not implemented for dimensions greater than 3.\nDimensionality of the current dataset: {}'.format(dim))
#end if
self.reshape_points_flat()
self.values_with_upper_ghost = v
return v
#end def values_with_upper_ghost
def clear_ghost(self):
ghost_fields = [
'values_with_upper_ghost',
]
for f in ghost_fields:
if f in self:
del self[f]
#end if
#end for
#end def clear_ghost
def plot_unit_contours(self,boundary=False,fig=True,show=True,**kwargs):
"""
(`External API`) Make 2D contour plots in the unit coordinate space.
@ -3362,7 +3676,62 @@ class StructuredGridFunctionWithAxes(StructuredGridFunction):
This class should not be instantiated directly.
"""
def plot_contours(self,boundary=False,fig=True,show=True,**kwargs):
def interpolate(self,r,type=None,copy=False,**kw):
# https://stackoverflow.com/questions/16217995/fast-interpolation-of-regularly-sampled-3d-data-with-different-intervals-in-x-y
kw = obj(kw)
grid = None
if isinstance(r,Grid):
grid = r
r = grid.r
#end if
if type is None:
type = 'map_coordinates'
#end if
if type=='map_coordinates':
if 'mode' not in kw:
if self.periodic:
kw.mode = 'wrap'
else:
kw.mode = 'nearest'
#end if
#end if
if 'order' not in kw:
kw.order = 3
#end if
indices = self.grid.indices_for_map_coord(r)
# needed because of off-by-one error in map_coordinates
# see: https://github.com/scipy/scipy/issues/2640
v = self.get_values_with_upper_ghost()
if self.nvalues>1:
self.error('Interpolation is not yet supported for nvalues>1.')
#end if
v_shape = v.shape
v.shape = v_shape[:-1]
values = scipy_ndimage.map_coordinates(v, indices, **kw)
v.shape = v_shape
else:
self.error('Interpolation of type "{}" is not supported.\nValid options are: map_coordinates'.format(type))
#end if
if grid is None:
return values
elif grid is not None:
if copy:
grid = grid.copy()
#end if
gf = grid.grid_function(
grid = grid,
values = values,
value_shape = tuple(self.value_shape),
copy = False,
)
return gf
#end if
#end def interpolate
def plot_contours(self,a1=(1,0),a2=(0,1),boundary=False,fig=True,show=True,**kwargs):
"""
(`External API`) Make 2D contour plots in the full coordinate space.
@ -3380,10 +3749,19 @@ class StructuredGridFunctionWithAxes(StructuredGridFunction):
if self.grid_dim!=2:
self.error('cannot plot contours\ngrid must have dimension 2 to make contour plots\ndimension of grid for this function: {}'.format(self.grid_dim))
#end if
if self.space_dim!=2:
self.error('cannot plot contours\ngrid points must reside in a 2D space to make contour plots\ndimension of the space for this function: {}'.format(self.space_dim))
#if self.space_dim!=2:
# self.error('cannot plot contours\ngrid points must reside in a 2D space to make contour plots\ndimension of the space for this function: {}'.format(self.space_dim))
##end if
if self.space_dim==2:
X,Y = self.r.T
else:
ax = np.dot(np.array([a1,a2]),self.grid.axes)
ax[0] = ax[0]/np.linalg.norm(ax[0])
ax[1] -= np.dot(ax[1],ax[0])*ax[0]
ax[1] = ax[1]/np.linalg.norm(ax[1])
X,Y = np.dot(ax,self.r.T)
ax_trans = ax
#end if
X,Y = self.r.T
X.shape = self.grid_shape
Y.shape = self.grid_shape
Zm = self.f.T
@ -3392,7 +3770,16 @@ class StructuredGridFunctionWithAxes(StructuredGridFunction):
fig,ax = self.setup_mpl_fig(fig=fig,dim=self.grid_dim)
ax.contour(X,Y,Z,**kwargs)
if boundary:
self.grid.plot_boundary(fig=False,show=False)
if self.space_dim==2:
self.grid.plot_boundary(fig=False,show=False)
else:
bpoints = self.grid.get_boundary_lines()
bpoints = np.inner(ax_trans,bpoints)
bpoints = np.transpose(bpoints,(1,2,0))
for bp in bpoints:
ax.plot(*bp.T,color='k')
#end for
#end if
#end if
#end for
if show:
@ -3552,6 +3939,208 @@ class ParallelotopeGridFunction(StructuredGridFunctionWithAxes):
Datatype of the function values.
"""
grid_class = ParallelotopeGrid
def read_local(self,filepath,format):
if format=='xsf':
self.read_xsf(filepath)
else:
self.error('Cannot read file.\nUnrecognized file format encountered.\nUnrecognized file format: {}\nValid options are: xsf'.format(format))
#end if
#end def read_local
def read_xsf(self,filepath):
if isinstance(filepath,XsfFile):
xsf = filepath
copy = True
else:
xsf = XsfFile(filepath)
copy = False,
#end if
grid = self.grid_class()
grid.read_xsf(xsf)
xsf.remove_ghost()
d = xsf.get_density()
values = d.values_noghost.ravel()
if copy:
values = values.copy()
#end if
self.initialize(
grid = grid,
values = values,
)
#end def read_xsf
# test needed
def read_from_points(self,points,values,axes,tol=1e-6,average=False):
self.vlog('Reading grid function values from scattered data.')
# check data types and shapes
d = self.ensure_array(
points = points,
values = values,
axes = axes,
dtype = float,
)
points = d.points
values = d.values
axes = d.axes.copy()
del d
if len(points)!=len(values):
self.error('"points" and "values" must have the same length.\nNumber of points: {}\nNumber of values: {}'.format(len(points),len(values)))
elif len(points.shape)!=2:
self.error('Shape of "points" array must be (# points) x (# dimensions).\nShape provided: {}'.format(points.shape))
#end if
N,D = points.shape
if axes.shape!=(D,D):
self.error('"axes" must have shape {}\nShape provided: {} '.format((D,D),axes.shape))
#end if
# reshape values (for now GridFunction does not support more structured values)
values.shape = len(values),values.size//len(values)
# normalize the axes
for d in range(D):
axes[d] /= np.linalg.norm(axes[d])
#end for
# make the points rectilinear
self.vlog('Transforming points to unit coords',n=1,time=True)
rpoints = np.dot(points,np.linalg.inv(axes))
# search for layers in each dimension
def xlayers(xpoints,tol):
xmin = xpoints.min()
xmax = xpoints.max()
nbins = np.uint64(np.round(np.ceil((xmax-xmin+tol)/tol)))
dx = (xmax-xmin+tol)/nbins
layers = obj()
for x in xpoints:
n = np.uint64(x/dx)
if n not in layers:
layers[n] = obj(xsum=x,nsum=1)
else:
l = layers[n]
l.xsum += x
l.nsum += 1
#end if
#end for
for l in layers:
l.xmean = l.xsum/l.nsum
#end for
lprev = None
for n in sorted(layers.keys()):
l = layers[n]
if lprev is not None and np.abs(l.xmean-lprev.xmean)<tol:
lprev.xsum += l.xsum
lprev.nsum += l.nsum
lprev.xmean = lprev.xsum/lprev.nsum
del layers[n]
else:
lprev = l
#end if
#end for
xlayers = np.empty((len(layers),),dtype=float)
i = 0
for n in sorted(layers.keys()):
l = layers[n]
xlayers[i] = l.xmean
i += 1
#end for
order = xlayers.argsort()
xlayers = xlayers[order]
return xlayers,xmin,xmax
#end def xlayers
def index_by_layer(xpoints,tol):
xlayer,xmin,xmax = xlayers(xpoints,tol)
dxlayer = xlayer[1:]-xlayer[:-1]
dxmin = dxlayer.min()
dxmax = dxlayer.max()
if np.abs(dxmax-dxmin)>2*tol:
error('Could not determine layer separation.\nLayers are not evenly spaced.\nMin layer spacing: {}\nMax layer spacing: {}\nSpread : {}\nTolerance: {}'.format(dxmin,dxmax,dxmax-dxmin,2*tol),'read_from_points')
#end if
dx = dxlayer.mean()
ipoints = np.array(np.around((xpoints-xmin)/dx),dtype=int)
return ipoints,xmin,xmax
#end def index_by_layer
# create a grid consistent with the detected layer separations
self.vlog('Initializing point index array',n=1,time=True)
grid_shape = np.empty((D, ),dtype=int )
grid_axes = np.zeros((D,D),dtype=float)
grid_corner = np.empty((D, ),dtype=float)
ipoints = np.empty((N,D),dtype=int)
for d in range(D):
self.vlog('Indexing points along dim {}'.format(d),n=2,time=True)
ixpoints,xmin,xmax = index_by_layer(rpoints[:,d],tol)
grid_shape[d] = ixpoints.max()+1
grid_axes[d,d] = xmax-xmin
grid_corner[d] = xmin
ipoints[:,d] = ixpoints
#end for
grid_axes = np.dot(grid_axes,axes)
grid_corner = np.dot(grid_corner,axes)
grid_bconds = D*('o',) # assumed for now
self.vlog('Constructing regular bounding grid',n=1,time=True)
grid = self.grid_class(
shape = grid_shape,
axes = grid_axes,
corner = grid_corner,
bconds = grid_bconds,
centered = False,
)
self.vlog('Checking grid point mapping',n=1,time=True)
# check that the generated grid contains the inputted points
ipflat = grid.flat_indices(ipoints)
dmax = np.linalg.norm(points-grid.points[ipflat],axis=1).max()
if dmax>tol:
self.error('Generated grid points do not match those read in.\nMaximum deviation: {}\nTolerance : {}'.format(dmax,tol))
#end if
# count number of times each grid point is mapped to
point_counts = np.bincount(ipflat,minlength=grid.npoints)
# if not averaging, check for one-to-one mapping
max_count = point_counts.max()
if not average and max_count>1:
self.error('Mapping to grid points is not one-to-one.\nMax no. of read points mapped to a grid point: {}'.format(max_count))
#end if
# map the inputted values onto the generated grid
self.vlog('Mapping data values onto grid',n=1,time=True)
grid_values = np.zeros((grid.npoints,values.shape[1]),dtype=float)
if not average or max_count==1:
grid_values[ipflat] = values
else:
self.vlog('Averaging multi-valued points',n=2,time=True)
for i,v in zip(ipflat,values):
grid_values[i] += v
#end for
for i,c in enumerate(point_counts):
if c>1:
grid_values[i] /= c
#end if
#end for
#end if
# initialize the GridFunction object
self.vlog('Constructing GridFunction object',n=1,time=True)
self.reset()
self.initialize(
grid = grid,
values = grid_values,
copy = False,
)
self.vlog('Read complete',n=1,time=True)
#end def read_from_points
#end class ParallelotopeGridFunction
@ -3710,6 +4299,145 @@ class SpheroidSurfaceGridFunction(StructuredGridFunctionWithAxes):
# test needed
def parallelotope_grid_function(
loc = 'parallelotope_grid_function',
**kwargs
):
if 'points' not in kwargs:
gf = ParallelotopeGridFunction(**kwargs)
else:
required = set(('points','values','axes'))
optional = set(('tol','average'))
present = set(kwargs.keys())
if len(required-present)>0:
error('Grid function cannot be created.\nWhen "points" is provided, "axes" and "values" must also be given.\nInputs provided: {}'.format(sorted(present)),loc)
elif len(present-required-optional)>0:
error('Grid function cannot be created.\nUnrecognized inputs provided.\nUnrecognized inputs: {}\nValid options are: {}'.format(sorted(present-required-optional),sorted(required|optional)))
#end if
gf = ParallelotopeGridFunction()
gf.read_from_points(**kwargs)
#end if
return gf
#end def parallelotope_grid_function
# test needed
def grid_function(
type = 'parallelotope',
loc = 'grid_function',
**kwargs
):
filepath = kwargs.pop('filepath',None)
if filepath is not None:
return read_grid_function(filepath,loc=loc)
#end if
gf = None
if type=='parallelotope':
gf = parallelotope_grid_function(loc=loc,**kwargs)
elif type=='spheroid':
gf = SpheroidGridFunction(**kwargs)
elif type=='spheroid_surface':
gf = SpheroidSurfaceGridFunction(**kwargs)
else:
error('Grid function type "{}" is not recognized.\nValid options are: parallelotope, spheroid, or spheroid_surface'.format(type),loc)
#end if
return gf
#end def grid_function
def grid(
type = 'parallelotope',
loc = 'grid',
**kwargs
):
filepath = kwargs.pop('filepath',None)
if filepath is not None:
return read_grid(filepath,loc=loc)
#end if
g = None
if type=='parallelotope':
g = ParallelotopeGrid(**kwargs)
elif type=='spheroid':
g = SpheroidGrid(**kwargs)
elif type=='spheroid_surface':
g = SpheroidSurfaceGrid(**kwargs)
else:
error('Grid type "{}" is not recognized.\nValid options are: parallelotope, spheroid, or spheroid_surface'.format(type),loc)
#end if
return g
#end def grid
#test needed
gf_file_type_map = obj(
xsf = ParallelotopeGridFunction,
)
def read_grid_function(filepath,format=None,loc='read_grid_function'):
filepath,format = process_file_format(filepath,format,loc)
if format not in gf_file_type_map:
error('Cannot read file.\nUnrecognized file format for grid function.\nFile format provided: {}\nAllowed formats include: {}'.format(format,sorted(gf_file_type_map.keys())),loc)
#end if
gf = gf_file_type_map[format]()
gf.read(filepath)
return gf
#end def read_grid_function
# test needed
g_file_type_map = obj(
xsf = ParallelotopeGrid,
)
def read_grid(filepath,format=None,loc='read_grid'):
filepath,format = process_file_format(filepath,format,loc)
if format not in g_file_type_map:
error('Cannot read file.\nUnrecognized file format for grid.\nFile format provided: {}\nAllowed formats include: {}'.format(format,sorted(g_file_type_map.keys())),loc)
#end if
g = g_file_type_map[format]()
g.read(filepath)
return g
#end def read_grid
def process_file_format(filepath,format,loc):
if isinstance(filepath,StandardFile):
format = filepath.sftype
elif not isinstance(filepath,str):
error('Cannot read file.\nExpected a file path.\nInstead received type: {}\nWith value: {}\nPlease provide a file path and try again.'.format(filepath.__class__.__name__,filepath),loc)
elif not os.path.exists(filepath):
error('Cannot read file. File path does not exist.\nFile path provided: {}'.format(filepath),loc)
#end if
if format is None:
format = filepath.rsplit('.',1)[1].lower()
#end if
return filepath,format
#end def process_file_format
gfs = [
ParallelotopeGridFunction,
SpheroidGridFunction,
SpheroidSurfaceGridFunction,
]
grid_to_grid_function = obj()
for gf in gfs:
grid_to_grid_function[gf.grid_class.__name__] = gf
#end for
del gfs
def grid_function_from_grid(grid):
gname = grid.__class__.__name__
if gname not in grid_to_grid_function:
error('Cannot find matching grid function for grid "{}".'.format(gname))
#end if
return grid_to_grid_function[gname]
#end def grid_function_from_grid
if __name__=='__main__':

View File

@ -1323,3 +1323,151 @@ def convex_hull(points,dimension=None,tol=None):
verts = list(set(verts))
return verts
#end def convex_hull
def layers_1d(xpoints,tol,xmin=None,xmax=None,merge=True,periodic=False,full_return=False):
# Update inputs to be consistent with periodic merge, if requested
if merge and periodic:
if xmax is None:
error('"xmax" must be provided.','layers_1d')
elif xmin is None:
xmin = 0.0
#end if
#end if
# Setup a virtual fine grid along x with grid cell width of tol
if xmin is None:
xmin = xpoints.min()
#end if
if xmax is None:
xmax = xpoints.max()
#end if
nbins = np.uint64(np.round(np.ceil((xmax-xmin+tol)/tol)))
dx = (xmax-xmin+tol)/nbins
# Find the points belonging to each grid cell/layer
layers = obj()
for i,x in enumerate(xpoints):
n = np.uint64(x/dx)
if n not in layers:
layers[n] = obj(ilist=[i],xsum=x,nsum=1)
else:
l = layers[n]
l.ilist.append(i)
l.xsum += x
l.nsum += 1
#end if
#end for
# Find the mean of each set of points
for l in layers:
l.xmean = l.xsum/l.nsum
#end for
# Merge neighboring layers if the means are within the tolerance
if merge:
lprev = None
for n in sorted(layers.keys()):
l = layers[n]
if lprev is not None and np.abs(l.xmean-lprev.xmean)<tol:
lprev.ilist.extend(l.ilist)
lprev.xsum += l.xsum
lprev.nsum += l.nsum
lprev.xmean = lprev.xsum/lprev.nsum
del layers[n]
else:
lprev = l
#end if
#end for
# Merge around periodic boundary
if periodic:
nleft = 0
nright = nbins-1
if nleft in layers and nright in layers:
ll = layers[nleft]
lr = layers[nright]
L = xmax-xmin
if np.abs(ll.xmean + L - lr.xmean)<tol:
ll.ilist.extend(lr.ilist)
ll.xsum += lr.xsum
ll.nsum += lr.nsum
ll.xmean = ll.xsum/ll.nsum
del layers[nright]
#end if
#end if
#end if
#end if
if not full_return:
return layers
else:
return layers,xmin,xmax
#end if
#end def layers_1d
def layer_means_1d(xpoints,tol,full_return=False):
# Get layer data
layers,xmin,xmax = layers_1d(xpoints,tol,full_return=True)
# Extract and sort layer means
xlayers = np.empty((len(layers),),dtype=float)
i = 0
for n in sorted(layers.keys()):
l = layers[n]
xlayers[i] = l.xmean
i += 1
#end for
xlayers.sort()
if not full_return:
return xlayers
else:
return xlayers,xmin,xmax
#end if
#end def layer_means_1d
def index_by_layer_1d(xpoints,tol,uniform=True,check=True,full_return=False):
# Get layer means
xlayer,xmin,xmax = layer_means_1d(xpoints,tol,full_return=True)
# Get layer separations
dxlayer = xlayer[1:]-xlayer[:-1]
# Find appropriate layer separation for indexing
if uniform:
dxmin = dxlayer.min()
dxmax = dxlayer.max()
if np.abs(dxmax-dxmin)>2*tol:
error('Could not determine layer separation.\nLayers are not evenly spaced.\nMin layer spacing: {}\nMax layer spacing: {}\nSpread : {}\nTolerance: {}'.format(dxmin,dxmax,dxmax-dxmin,2*tol),'index_by_layer_1d')
#end if
dx = dxlayer.mean()
else:
dx = dxlayer.min()
#end if
# Find indices for each layer
ipoints = np.array(np.around((xpoints-xmin)/dx),dtype=int)
# Check the layer indices, if requested
if check:
if np.abs(ipoints*dx+xmin-xpoints).max()>3*tol: # Tolerance accounts for merge
error('Layer indexing failed.','index_by_layer_1d')
#end if
#end if
if not full_return:
return ipoints
else:
return ipoints,xmin,xmax
#end if
#end def index_by_layer

1544
nexus/lib/observables.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -2992,6 +2992,52 @@ class Structure(Sobj):
#end def voronoi_neighbors
def voronoi_vectors(self,indices=None,restrict=None):
ni = self.voronoi_neighbors(indices,restrict)
vt = self.vector_table()
vv = obj()
for i,vi in ni.items():
vv[i] = vt[i,vi]
#end for
return vv
#end def voronoi_vectors
def voronoi_distances(self,indices=None,restrict=False):
vv = self.voronoi_vectors(indices,restrict)
vd = obj()
for i,vvi in vv.items():
vd[i] = np.linalg.norm(vvi,axis=1)
#end for
return vd
#end def voronoi_distances
def voronoi_radii(self,indices=None,restrict=None):
vd = self.voronoi_distances(indices,restrict)
vr = obj()
for i,vdi in vd.items():
vr[i] = vdi.min()/2
#end for
return vr
#end def voronoi_radii
def voronoi_species_radii(self):
vr = self.voronoi_radii()
vsr = obj()
for i,r in vr.items():
e = self.elem[i]
if e not in vsr:
vsr[e] = r
else:
vsr[e] = min(vsr[e],r)
#end if
#end for
return vsr
#end def voronoi_species_radii
# test needed
# get nearest neighbors according to constrants (voronoi, max distance, coord. number)
def nearest_neighbors(self,indices=None,rmax=None,nmax=None,restrict=False,voronoi=False,distances=False,**spec_max):
@ -4626,7 +4672,9 @@ class Structure(Sobj):
def read_xsf(self,filepath):
if os.path.exists(filepath):
if isinstance(filepath,XsfFile):
f = filepath
elif os.path.exists(filepath):
f = XsfFile(filepath)
else:
f = XsfFile()
@ -4634,7 +4682,11 @@ class Structure(Sobj):
#end if
elem = []
for n in f.elem:
elem.append(pt.simple_elements[n].symbol)
if isinstance(n,str):
elem.append(n)
else:
elem.append(pt.simple_elements[n].symbol)
#end if
#end for
self.dim = 3
self.units = 'A'
@ -5060,6 +5112,155 @@ class Structure(Sobj):
cell = (lattice,positions,numbers)
return cell
#end def spglib_cell
def get_symmetry(self,symprec=1e-5):
cell = self.spglib_cell()
return spglib.get_symmetry(cell,symprec=symprec)
#end def get_symmetry
def get_symmetry_dataset(self,symprec=1e-5, angle_tolerance=-1.0, hall_number=0):
cell = self.spglib_cell()
ds = spglib.get_symmetry_dataset(
cell,
symprec = symprec,
angle_tolerance = angle_tolerance,
hall_number = hall_number,
)
return ds
#end def get_symmetry
# functions based on direct spglib interface
def symmetry_data(self,*args,**kwargs):
ds = self.get_symmetry_dataset(*args,**kwargs)
ds = obj(ds)
for k,v in ds.items():
if isinstance(v,dict):
ds[k] = obj(v)
#end if
#end for
return ds
#end def symmetry_data
# test needed
def space_group_operations(self,tol=1e-5,unit=False):
ds = self.get_symmetry(symprec=tol)
if ds is None:
self.error('Symmetry search failed.\nspglib error message:\n{}'.format(spglib.get_error_message()))
#end if
ds = obj(ds)
rotations = ds.rotations
translations = ds.translations
if not unit:
# Transform to Cartesian
axes = self.axes
axinv = np.linalg.inv(axes)
for n,(R,t) in enumerate(zip(rotations,translations)):
rotations[n] = np.dot(axinv,np.dot(R,axes))
translations[n] = np.dot(t,axes)
#end for
#end if
return rotations,translations
#end def space_group_operations
def point_group_operations(self,tol=1e-5,unit=False):
rotations,translations = self.space_group_operations(tol=tol,unit=unit)
no_trans = translations.max(axis=1) < tol
return rotations[no_trans]
#end def point_group_operations
def check_point_group_operations(self,rotations=None,tol=1e-5,unit=False,dtol=1e-5,ncheck=1,exit=False):
if rotations is None:
rotations = self.point_group_operations(tol=tol,unit=unit)
#ned if
r = self.pos
if ncheck=='all':
ncheck = len(r)
#end if
all_same = True
for n in range(ncheck):
rc = r[n]
for R in rotations:
rp = np.dot(r-rc,R)+rc
dt = self.min_image_distances(r,rp)
same = True
for d in dt:
same &= dt.min()<dtol
#end for
all_same &= same
#end for
#end for
if not all_same and exit:
self.error('Point group operators are not all symmetries of the structure.')
#end if
return all_same
#end def check_point_group_operations
def equivalent_atoms(self):
ds = self.symmetry_data()
# collect sets of species labels
species_by_specnum = obj()
for e,sn in zip(self.elem,ds.equivalent_atoms):
is_elem,es = is_element(e,symbol=True)
if sn not in species_by_specnum:
species_by_specnum[sn] = set()
#end if
species_by_specnum[sn].add(es)
#end for
for sn,sset in species_by_specnum.items():
if len(sset)>1:
self.error('Cannot find equivalent atoms.\nMultiple atomic species were marked as being equivalent.\nSpecies marked in this way: {}'.format(list(sset)))
#end if
species_by_specnum[sn] = list(sset)[0]
#end for
# give each unique species a unique label
labels_by_specnum = obj()
species_list = list(species_by_specnum.values())
species_set = set(species_list)
species_counts = obj()
for s in species_set:
species_counts[s] = species_list.count(s)
#end for
spec_counts = obj()
for sn,s in species_by_specnum.items():
if species_counts[s]==1:
labels_by_specnum[sn] = s
else:
if s not in spec_counts:
spec_counts[s] = 1
else:
spec_counts[s] += 1
#end if
labels_by_specnum[sn] = s + str(spec_counts[s])
#end if
#end for
# find indices for each unique species
equiv_indices = obj()
for s in labels_by_specnum.values():
equiv_indices[s] = list()
#end for
for i,sn in enumerate(ds.equivalent_atoms):
equiv_indices[labels_by_specnum[sn]].append(i)
#end for
for s,indices in equiv_indices.items():
equiv_indices[s] = np.array(indices,dtype=int)
#end for
return equiv_indices
#end def equivalent_atoms
#end class Structure
Structure.set_operations()
@ -5728,8 +5929,8 @@ class Crystal(Structure):
)
lattice_centerings = obj(
triclinic = ['P'],
monoclinic = ['P','A','B','C'],
triclinic = ['P'],
monoclinic = ['P','A','B','C'],
orthorhombic = ['P','C','I','F'],
tetragonal = ['P','I'],
hexagonal = ['P','R'],

View File

@ -202,9 +202,10 @@ def text_diff(t1,t2,atol=def_atol,rtol=def_rtol,int_as_float=False,full=False,by
# print the difference between two objects
def print_diff(o1,o2,atol=def_atol,rtol=def_rtol,int_as_float=False,text=False,by_line=False): # used in debugging, not actual tests
from generic import obj
print(20*'=')
hline = '========== {} =========='
print(hline.format('left object'))
print(o1)
print(20*'=')
print(hline.format('right object'))
print(o2)
if not text:
diff,diff1,diff2 = object_diff(o1,o2,atol,rtol,int_as_float,full=True)
@ -213,13 +214,24 @@ def print_diff(o1,o2,atol=def_atol,rtol=def_rtol,int_as_float=False,text=False,b
#end if
d1 = obj(diff1)
d2 = obj(diff2)
print(20*'=')
print(hline.format('left diff'))
print(d1)
print(20*'=')
print(hline.format('right diff'))
print(d2)
#end def print_diff
# check for object equality and if different, print the difference
def check_object_eq(o1,o2):
same = object_eq(o1,o2)
if not same and global_data['verbose']:
print('\nObjects differ, please see below for details')
print_diff(o1,o2)
#end if
return same
#end def check_object_eq
# additional convenience functions to use value_diff and object_diff
value_neq = value_diff
@ -586,6 +598,7 @@ class FailedTest(Exception):
global_data = dict(
verbose = False,
job_ref_table = False,
)

View File

@ -0,0 +1,404 @@
import testing
from testing import value_eq,object_eq,text_eq,check_object_eq
from testing import FailedTest,failed
def test_import():
import observables
from observables import AttributeProperties,DefinedAttributeBase
from observables import Observable
from observables import MomentumDistribution
#end def test_import
def test_defined_attribute_base():
from generic import obj,NexusError
from observables import AttributeProperties,DefinedAttributeBase
# empty init
p = AttributeProperties()
o = DefinedAttributeBase()
pref = obj(
assigned = set(),
deepcopy = False,
default = None,
dest = None,
name = None,
no_default = False,
required = False,
type = None,
)
assert(check_object_eq(p,pref))
assert(len(o)==0)
# init
p = AttributeProperties(
default = 2,
dest = 'nest',
deepcopy = True,
)
pref = obj(
assigned = {'dest', 'default', 'deepcopy'},
deepcopy = True,
default = 2,
dest = 'nest',
name = None,
no_default = False,
required = False,
type = None,
)
assert(check_object_eq(p,pref))
# define attributes
class DA(DefinedAttributeBase):
None
#end class DA
da_attributes = obj(
a = obj(
default = 1,
),
b = obj(
default = 2,
type = int,
required = True,
),
c = obj(
dest = 'nest',
type = str,
),
d = obj(
type = dict,
deepcopy = True,
no_default = True,
),
nest = obj(
type = obj,
default = obj,
),
)
DA.define_attributes(**da_attributes)
def get_class_dict(cls):
o = obj()
o.transfer_from(cls.__dict__)
for k in list(o.keys()):
if k.startswith('_'):
del o[k]
#end if
#end for
return o
#end def get_class_dict
o = get_class_dict(DA)
oref = obj(
deepcopy_attributes = {'d'},
required_attributes = {'b'},
sublevel_attributes = {'c'},
toplevel_attributes = {'d', 'a', 'b', 'nest'},
typed_attributes = {'d', 'c', 'b', 'nest'},
attribute_definitions = obj(
a = obj(
assigned = {'default'},
deepcopy = False,
default = 1,
dest = None,
name = 'a',
no_default = False,
required = False,
type = None,
),
b = obj(
assigned = {'required', 'default', 'type'},
deepcopy = False,
default = 2,
dest = None,
name = 'b',
no_default = False,
required = True,
type = int,
),
c = obj(
assigned = {'dest', 'type'},
deepcopy = False,
default = None,
dest = 'nest',
name = 'c',
no_default = False,
required = False,
type = str,
),
d = obj(
assigned = {'deepcopy', 'no_default', 'type'},
deepcopy = True,
default = None,
dest = None,
name = 'd',
no_default = True,
required = False,
type = dict,
),
nest = obj(
assigned = {'default', 'type'},
deepcopy = False,
default = obj,
no_default = False,
dest = None,
name = 'nest',
required = False,
type = obj,
),
),
)
assert(check_object_eq(o,oref))
class DA2(DA):
None
#end class DA2
DA2.define_attributes(
DA,
e = obj(
required = True,
)
)
o = get_class_dict(DA)
assert(check_object_eq(o,oref))
o2 = get_class_dict(DA2)
oref.required_attributes.add('e')
oref.toplevel_attributes.add('e')
oref.attribute_definitions.e = obj(
assigned = {'required'},
deepcopy = False,
default = None,
dest = None,
name = 'e',
no_default = False,
required = True,
type = None,
)
assert(check_object_eq(o2,oref))
# empty init
da = DA()
assert(len(da)==0)
assert(not da.check_attributes())
# set_default_attributes
da.set_default_attributes()
da_ref = obj(
a = 1,
b = 2,
nest = obj(
c = None,
),
)
assert(check_object_eq(da,da_ref))
assert(not da.check_attributes())
# set_attributes/init
da = DA(
a = 2,
b = 3,
c = 'hi',
d = dict(a=2),
)
da_ref = obj(
a = 2,
b = 3,
d = {'a': 2},
nest = obj(
c = 'hi',
),
)
assert(check_object_eq(da,da_ref))
assert(da.check_attributes())
# set_attribute
da = DA()
assert('b' not in da)
da.set_attribute('b',5)
assert('b' in da)
assert(da.b==5)
try:
da.set_attribute('unknown',None)
raise FailedTest
except NexusError:
None
except FailedTest:
failed()
except Exception as e:
failed(str(e))
#end try
da.set_attribute('b',3)
assert(da.b==3)
try:
da.set_attribute('b',3.5)
raise FailedTest
except NexusError:
None
except FailedTest:
failed()
except Exception as e:
failed(str(e))
#end try
# get_attribute
da = DA()
da.set_attribute('b',5)
assert('b' in da)
assert(da.b==5)
assert(da.get_attribute('b')==5)
assert(da.get_attribute('a',None) is None)
assert(da.get_attribute('c',None) is None)
try:
da.get_attribute('unknown')
raise FailedTest
except NexusError:
None
except FailedTest:
failed()
except Exception as e:
failed(str(e))
#end try
try:
da.get_attribute('a')
raise FailedTest
except NexusError:
None
except FailedTest:
failed()
except Exception as e:
failed(str(e))
#end try
try:
da.get_attribute('c')
raise FailedTest
except NexusError:
None
except FailedTest:
failed()
except Exception as e:
failed(str(e))
#end try
# default values
class DA_def(DefinedAttributeBase):
None
#end class DA_def
DA_def.set_unassigned_default(None)
class DA_def2(DA_def):
None
#end class DA_def2
DA_def2.define_attributes(**da_attributes)
assert(not DefinedAttributeBase.class_has('unassigned_default'))
assert(DA_def.class_has('unassigned_default'))
assert(DA_def2.class_has('unassigned_default'))
assert(DA_def.unassigned_default is None)
assert(DA_def2.unassigned_default is None)
da = DA_def2()
assert('a' not in da)
assert('c' not in da)
assert('nest' not in da)
assert(da.get_attribute('a',None) is None)
assert(da.get_attribute('c',None) is None)
try:
da.get_attribute('a')
raise FailedTest
except NexusError:
None
except FailedTest:
failed()
except Exception as e:
failed(str(e))
#end try
try:
da.get_attribute('c')
raise FailedTest
except NexusError:
None
except FailedTest:
failed()
except Exception as e:
failed(str(e))
#end try
da.set_default_attributes()
da_ref = obj(
a = 1,
b = 2,
nest = obj(
c = None,
)
)
assert(check_object_eq(da,da_ref))
da.b = None
assert(da.get_attribute('a')==1)
assert(da.get_attribute('b',assigned=False) is None)
assert(da.get_attribute('c',assigned=False) is None)
assert(da.get_attribute('a',2)==1)
assert(da.get_attribute('a',assigned=False)==1)
try:
da.get_attribute('b')
raise FailedTest
except NexusError:
None
except FailedTest:
failed()
except Exception as e:
failed(str(e))
#end try
try:
da.get_attribute('c')
raise FailedTest
except NexusError:
None
except FailedTest:
failed()
except Exception as e:
failed(str(e))
#end try
#end def test_defined_attribute_base

View File

@ -1457,3 +1457,48 @@ def test_interpolate():
assert(value_eq(Cr_positions,Cr_positions_ref))
#end def test_interpolate
if versions.spglib_available:
def test_point_group_operations():
from structure import generate_structure,Crystal
nrotations = dict(
Ca2CuO3 = 8,
CaO = 48,
Cl2Ca2CuO2 = 16,
CuO = 2,
CuO2_plane = 16,
La2CuO4 = 2,
NaCl = 48,
ZnO = 6,
calcium = 48,
copper = 48,
diamond = 24,
graphene = 12,
oxygen = 4,
rocksalt = 48,
wurtzite = 6,
)
for struct,cell in sorted(Crystal.known_crystals.keys()):
if cell!='prim':
continue
#end if
s = generate_structure(
structure = struct,
cell = cell,
)
rotations = s.point_group_operations()
assert(struct in nrotations)
assert(len(rotations)==nrotations[struct])
valid = s.check_point_group_operations(rotations,exit=False)
assert(valid)
#end for
#end def test_point_group_operations
#end if

View File

@ -30,10 +30,6 @@
#include "AFQMC/Drivers/DriverFactory.h"
#include "AFQMC/Memory/buffer_allocators.h"
#include "AFQMC/Utilities/myTimer.h"
myTimer Timer;
namespace qmcplusplus
{
TimerList_t AFQMCTimers;
@ -86,7 +82,7 @@ bool AFQMCFactory::parse(xmlNodePtr cur)
cur = cur->next;
}
// eventually read an inital buffer size from input
// eventually read an initial buffer size from input
// they are initialized now with 10 MBs
// host_buffer_type->resize(buffer_size);
// device_buffer_type->resize(buffer_size);

View File

@ -100,7 +100,6 @@ ELSEIF(ENABLE_HIP)
)
SET(AFQMC_SRCS ${AFQMC_SRCS}
Memory/HIP/hip_utilities.cpp
Numerics/detail/HIP/hipblas_utils.cpp
Memory/HIP/hip_arch.cpp
Memory/HIP/hip_init.cpp)
ELSE(ENABLE_CUDA)
@ -175,3 +174,4 @@ IF (BUILD_UNIT_TESTS)
SUBDIRS(Estimators/tests)
ENDIF()
SUBDIRS(Numerics/performance)

View File

@ -172,7 +172,7 @@ bool AFQMCDriver::checkpoint(WalkerSet& wset, int block, int step)
if (!dumpToHDF5(wset, dump))
{
app_error() << " Problems writting checkpoint file in Driver/AFQMCDriver::checkpoint(). \n";
app_error() << " Problems writing checkpoint file in Driver/AFQMCDriver::checkpoint(). \n";
return false;
}

View File

@ -31,6 +31,7 @@
#include "AFQMC/Utilities/type_conversion.hpp"
#include "AFQMC/Utilities/taskgroup.h"
#include "AFQMC/Utilities/myTimer.h"
namespace qmcplusplus
{

View File

@ -29,6 +29,7 @@
#include "type_traits/scalar_traits.h"
#include "AFQMC/Wavefunctions/Excitations.hpp"
#include "AFQMC/Wavefunctions/phmsd_helpers.hpp"
#include "AFQMC/Utilities/myTimer.h"
namespace qmcplusplus
{

View File

@ -29,6 +29,7 @@
#include "AFQMC/Utilities/type_conversion.hpp"
#include "AFQMC/Utilities/taskgroup.h"
#include "AFQMC/Utilities/myTimer.h"
namespace qmcplusplus
{

View File

@ -32,6 +32,7 @@
#include "AFQMC/Utilities/Utils.hpp"
#include "AFQMC/Numerics/batched_operations.hpp"
#include "AFQMC/Numerics/tensor_operations.hpp"
#include "AFQMC/Utilities/myTimer.h"
namespace qmcplusplus
{

View File

@ -71,7 +71,7 @@ HamiltonianOperations RealDenseHamiltonian::getHamiltonianOperations(bool pureSD
ndown = PsiT[1].size(0);
int NEL = nup + ndown;
// distribute work over equivalent nodes in TGprop.TG() accross TG.Global()
// distribute work over equivalent nodes in TGprop.TG() across TG.Global()
auto Qcomm(TG.Global().split(TGprop.getLocalGroupNumber(), TG.Global().rank()));
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
auto distNode(TG.Node().split(TGprop.getLocalGroupNumber(), TG.Node().rank()));
@ -234,7 +234,7 @@ HamiltonianOperations RealDenseHamiltonian::getHamiltonianOperations(bool pureSD
CMatrix lak({nup, NMO});
for (int nd = 0; nd < ndet; nd++)
{
// all nodes accross Qcomm share same segment {nc0,ncN}
// all nodes across Qcomm share same segment {nc0,ncN}
for (int nc = 0; nc < local_ncv; nc++)
{
if (nc % Qcomm.size() != Qcomm.rank())

View File

@ -69,7 +69,7 @@ HamiltonianOperations RealDenseHamiltonian_v2::getHamiltonianOperations(bool pur
ndown = PsiT[1].size(0);
int NEL = nup + ndown;
// distribute work over equivalent nodes in TGprop.TG() accross TG.Global()
// distribute work over equivalent nodes in TGprop.TG() across TG.Global()
auto Qcomm(TG.Global().split(TGprop.getLocalGroupNumber(), TG.Global().rank()));
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
auto distNode(TG.Node().split(TGprop.getLocalGroupNumber(), TG.Node().rank()));
@ -232,7 +232,7 @@ HamiltonianOperations RealDenseHamiltonian_v2::getHamiltonianOperations(bool pur
CMatrix lak({nup, NMO});
for (int nd = 0; nd < ndet; nd++)
{
// all nodes accross Qcomm share same segment {nc0,ncN}
// all nodes across Qcomm share same segment {nc0,ncN}
for (int nc = 0; nc < local_ncv; nc++)
{
if (nc % Qcomm.size() != Qcomm.rank())

View File

@ -28,6 +28,7 @@
#include "AFQMC/Numerics/ma_operations.hpp"
#include "AFQMC/SlaterDeterminantOperations/rotate.hpp"
#include "AFQMC/Utilities/afqmc_TTI.hpp"
#include "AFQMC/Utilities/myTimer.h"
#include "AFQMC/Hamiltonians/rotateHamiltonian_Helper2.hpp"

View File

@ -89,28 +89,68 @@ void INIT(boost::mpi3::shared_communicator& node, unsigned long long int iseed)
}
}
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind)
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind, const std::string& message)
{
if (cudaSuccess != cudaMemcpy(dst, src, count, tocudaMemcpyKind(kind)))
cudaError_t status = cudaMemcpy(dst, src, count, tocudaMemcpyKind(kind));
if (status != cudaSuccess)
{
if (message != "")
{
std::cerr << "Error: " << message << std::endl;
}
std::cerr << " Error when calling cudaMemcpy: " << cudaGetErrorString(status) << std::endl;
throw std::runtime_error("Error: cudaMemcpy returned error code.");
}
}
void memcopy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, MEMCOPYKIND kind)
void memcopy2D(void* dst,
size_t dpitch,
const void* src,
size_t spitch,
size_t width,
size_t height,
MEMCOPYKIND kind,
const std::string& message)
{
if (cudaSuccess != cudaMemcpy2D(dst, dpitch, src, spitch, width, height, tocudaMemcpyKind(kind)))
cudaError_t status = cudaMemcpy2D(dst, dpitch, src, spitch, width, height, tocudaMemcpyKind(kind));
if (status != cudaSuccess)
{
if (message != "")
{
std::cerr << "Error: " << message << std::endl;
}
std::cerr << " Error when calling cudaMemcpy2D: " << cudaGetErrorString(status) << std::endl;
throw std::runtime_error("Error: cudaMemcpy2D returned error code.");
}
}
void malloc(void** devPtr, size_t size)
void malloc(void** devPtr, size_t size, const std::string& message)
{
if (cudaSuccess != cudaMalloc(devPtr, size))
cudaError_t status = cudaMalloc(devPtr, size);
if (status != cudaSuccess)
{
std::cerr << " Error allocating " << size * 1024.0 / 1024.0 << " MBs on GPU." << std::endl;
if (message != "")
{
std::cerr << " Error from: " << message << std::endl;
}
std::cerr << " Error when calling cudaMalloc: " << cudaGetErrorString(status) << std::endl;
throw std::runtime_error("Error: cudaMalloc returned error code.");
}
}
void free(void* p) { cudaFree(p); }
void free(void* p, const std::string& message)
{
cudaError_t status = cudaFree(p);
if (status != cudaSuccess)
{
if (message != "")
{
std::cerr << " Error from: " << message << std::endl;
}
std::cerr << " Error from calling cudaFree: " << cudaGetErrorString(status) << std::endl;
}
}
} // namespace arch

View File

@ -64,7 +64,7 @@ cudaMemcpyKind tocudaMemcpyKind(MEMCOPYKIND v);
void INIT(boost::mpi3::shared_communicator& node, unsigned long long int iseed = 911ULL);
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind = memcopyDefault);
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind = memcopyDefault, const std::string& message = "");
void memcopy2D(void* dst,
size_t dpitch,
@ -72,11 +72,12 @@ void memcopy2D(void* dst,
size_t spitch,
size_t width,
size_t height,
MEMCOPYKIND kind = memcopyDefault);
MEMCOPYKIND kind = memcopyDefault,
const std::string& message = "");
void malloc(void** devPtr, size_t size);
void malloc(void** devPtr, size_t size, const std::string& message = "");
void free(void* p);
void free(void* p, const std::string& message = "");
} // namespace arch

View File

@ -58,6 +58,7 @@ void CUDA_INIT(boost::mpi3::shared_communicator& node, unsigned long long int is
cudaDeviceProp dev;
cuda_check(cudaGetDeviceProperties(&dev, 0), "cudaGetDeviceProperties");
qmcplusplus::app_log() << " CUDA compute capability: " << dev.major << "." << dev.minor << std::endl;
qmcplusplus::app_log() << " Device Name: " << dev.name << std::endl;
if (dev.major <= 6)
{
qmcplusplus::app_log() << " Warning CUDA major compute capability < 6.0" << std::endl;

View File

@ -86,28 +86,68 @@ void INIT(boost::mpi3::shared_communicator& node, unsigned long long int iseed)
}
}
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind)
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind, const std::string& message)
{
if (hipSuccess != hipMemcpy(dst, src, count, tohipMemcpyKind(kind)))
hipError_t status = hipMemcpy(dst, src, count, tohipMemcpyKind(kind));
if (status != hipSuccess)
{
if (message != "")
{
std::cerr << "Error: " << message << std::endl;
}
std::cerr << " Error when calling hipMemcpy: " << hipGetErrorString(status) << std::endl;
throw std::runtime_error("Error: hipMemcpy returned error code.");
}
}
void memcopy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, MEMCOPYKIND kind)
void memcopy2D(void* dst,
size_t dpitch,
const void* src,
size_t spitch,
size_t width,
size_t height,
MEMCOPYKIND kind,
const std::string& message)
{
if (hipSuccess != hipMemcpy2D(dst, dpitch, src, spitch, width, height, tohipMemcpyKind(kind)))
hipError_t status = hipMemcpy2D(dst, dpitch, src, spitch, width, height, tohipMemcpyKind(kind));
if (status != hipSuccess)
{
if (message != "")
{
std::cerr << "Error: " << message << std::endl;
}
std::cerr << " Error when calling hipMemcpy2D: " << hipGetErrorString(status) << std::endl;
throw std::runtime_error("Error: hipMemcpy2D returned error code.");
}
}
void malloc(void** devPtr, size_t size)
void malloc(void** devPtr, size_t size, const std::string& message)
{
if (hipSuccess != hipMalloc(devPtr, size))
hipError_t status = hipMalloc(devPtr, size);
if (status != hipSuccess)
{
std::cerr << " Error allocating " << size * 1024.0 / 1024.0 << " MBs on GPU." << std::endl;
if (message != "")
{
std::cerr << " Error from : " << message << std::endl;
}
std::cerr << " Error when call hipMalloc " << < hipGetErrorString(status) << std::endl;
throw std::runtime_error("Error: hipMalloc returned error code.");
}
}
void free(void* p) { hipFree(p); }
void free(void* p, const std::string& message)
{
hipError_t status = hipFree(p);
if (status != hipSuccess)
{
if (message != "")
{
std::cerr << " Error from : " << message << std::endl;
}
std::cerr << " Error when calling hipFree: " << hipGetErrorString(status) << std::endl;
}
}
} // namespace arch

View File

@ -62,7 +62,7 @@ hipMemcpyKind tohipMemcpyKind(MEMCOPYKIND v);
void INIT(boost::mpi3::shared_communicator& node, unsigned long long int iseed = 911ULL);
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind = memcopyDefault);
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind = memcopyDefault, const std::string& message = "");
void memcopy2D(void* dst,
size_t dpitch,
@ -70,11 +70,12 @@ void memcopy2D(void* dst,
size_t spitch,
size_t width,
size_t height,
MEMCOPYKIND kind = memcopyDefault);
MEMCOPYKIND kind = memcopyDefault,
const std::string& message = "");
void malloc(void** devPtr, size_t size);
void malloc(void** devPtr, size_t size, const std::string& message = "");
void free(void* p);
void free(void* p, const std::string& message = "");
} // namespace arch

View File

@ -59,6 +59,7 @@ void HIP_INIT(boost::mpi3::shared_communicator& node, unsigned long long int ise
hipDeviceProp_t dev;
hip_check(hipGetDeviceProperties(&dev, 0), "hipGetDeviceProperties");
qmcplusplus::app_log() << " HIP compute capability: " << dev.major << "." << dev.minor << std::endl;
qmcplusplus::app_log() << " Device Name: " << dev.name << std::endl;
if (dev.major <= 6)
{
qmcplusplus::app_log() << " Warning HIP major compute capability < 6.0" << std::endl;

View File

@ -21,6 +21,7 @@
//#include "AFQMC/Memory/CUDA/cuda_gpu_pointer.hpp"
#include "AFQMC/Utilities/type_conversion.hpp"
#include "AFQMC/Memory/device_pointers.hpp"
#include "AFQMC/Memory/arch.hpp"
#include "AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp"
//#include "AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp"
// hand coded kernels for blas extensions
@ -55,9 +56,7 @@ template<typename T, typename Q>
inline static void copy(int n, T const* x, int incx, device_pointer<Q> y, int incy)
{
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
if (cudaSuccess !=
cudaMemcpy2D(to_address(y), sizeof(Q) * incy, x, sizeof(T) * incx, sizeof(T), n, cudaMemcpyHostToDevice))
throw std::runtime_error("Error: cudaMemcpy2D returned error code.");
arch::memcopy2D(to_address(y), sizeof(Q) * incy, x, sizeof(T) * incx, sizeof(T), n, arch::memcopyH2D, "lapack_cuda_gpu_ptr::copy");
}
template<typename T, typename Q>
@ -65,9 +64,7 @@ inline static void copy(int n, device_pointer<Q> x, int incx, T* y, int incy)
{
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
assert(sizeof(Q) == sizeof(T));
if (cudaSuccess !=
cudaMemcpy2D(y, sizeof(T) * incy, to_address(x), sizeof(Q) * incx, sizeof(T), n, cudaMemcpyDeviceToHost))
throw std::runtime_error("Error: cudaMemcpy2D returned error code.");
arch::memcopy2D(y, sizeof(T) * incy, to_address(x), sizeof(Q) * incx, sizeof(T), n, arch::memcopyD2H, "lapack_cuda_gpu_ptr::copy");
}
// scal Specializations
@ -329,7 +326,7 @@ inline static void gemmBatched(char Atrans,
{
static_assert(std::is_same<typename std::decay<Q1>::type, T>::value, "Wrong dispatch.\n");
static_assert(std::is_same<typename std::decay<Q2>::type, T>::value, "Wrong dispatch.\n");
// replace with single call to cudaMalloc and cudaMemcpy
// replace with single call to arch::malloc and arch::memcopy
T **A_d, **B_d, **C_d;
Q1** A_h;
Q2** B_h;
@ -343,17 +340,17 @@ inline static void gemmBatched(char Atrans,
B_h[i] = to_address(B[i]);
C_h[i] = to_address(C[i]);
}
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
cudaMalloc((void**)&B_d, batchSize * sizeof(*B_h));
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
cudaMemcpy(B_d, B_h, batchSize * sizeof(*B_h), cudaMemcpyHostToDevice);
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
cublas::cublas_gemmBatched(*(A[0]).handles.cublas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
C_d, ldc, batchSize);
cudaFree(A_d);
cudaFree(B_d);
cudaFree(C_d);
arch::free(A_d);
arch::free(B_d);
arch::free(C_d);
delete[] A_h;
delete[] B_h;
delete[] C_h;
@ -385,7 +382,7 @@ inline static void gemmBatched(char Atrans,
static_assert(std::is_same<typename std::decay<Q1>::type, T2>::value, "Wrong dispatch.\n");
static_assert(std::is_same<typename std::decay<Q2>::type, T>::value, "Wrong dispatch.\n");
assert(Atrans == 'N' || Atrans == 'n');
// replace with single call to cudaMalloc and cudaMemcpy
// replace with single call to arch::malloc and arch::memcopy
T2** A_d;
T** B_d;
T2** C_d;
@ -401,17 +398,17 @@ inline static void gemmBatched(char Atrans,
B_h[i] = to_address(B[i]);
C_h[i] = to_address(C[i]);
}
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
cudaMalloc((void**)&B_d, batchSize * sizeof(*B_h));
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
cudaMemcpy(B_d, B_h, batchSize * sizeof(*B_h), cudaMemcpyHostToDevice);
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
cublas::cublas_gemmBatched(*(A[0]).handles.cublas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
C_d, ldc, batchSize);
cudaFree(A_d);
cudaFree(B_d);
cudaFree(C_d);
arch::free(A_d);
arch::free(B_d);
arch::free(C_d);
delete[] A_h;
delete[] B_h;
delete[] C_h;
@ -465,28 +462,21 @@ template<typename T, typename T2>
inline static void copy2D(int N, int M, device_pointer<T> src, int lda, device_pointer<T2> dst, int ldb)
{
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
if (cudaSuccess !=
cudaMemcpy2D(to_address(dst), sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N,
cudaMemcpyDeviceToDevice))
throw std::runtime_error("Error: cudaMemcpy2D returned error code in copy2D.");
arch::memcopy2D(to_address(dst), sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N, arch::memcopyD2D, "blas_cuda_gpu_ptr::copy2D");
}
template<typename T, typename T2>
inline static void copy2D(int N, int M, T const* src, int lda, device_pointer<T2> dst, int ldb)
{
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
if (cudaSuccess !=
cudaMemcpy2D(to_address(dst), sizeof(T2) * ldb, src, sizeof(T) * lda, M * sizeof(T), N, cudaMemcpyHostToDevice))
throw std::runtime_error("Error: cudaMemcpy2D returned error code in copy2D.");
arch::memcopy2D(to_address(dst), sizeof(T2) * ldb, src, sizeof(T) * lda, M * sizeof(T), N, arch::memcopyH2D, "blas_cuda_gpu_ptr::copy2D");
}
template<typename T, typename T2>
inline static void copy2D(int N, int M, device_pointer<T> src, int lda, T2* dst, int ldb)
{
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
if (cudaSuccess !=
cudaMemcpy2D(dst, sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N, cudaMemcpyDeviceToHost))
throw std::runtime_error("Error: cudaMemcpy2D returned error code in copy2D.");
arch::memcopy2D(dst, sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N, arch::memcopyD2H, "blas_cuda_gpu_ptr::copy2D");
}
template<typename T, typename T2>

View File

@ -18,6 +18,7 @@
#include <cassert>
#include "AFQMC/Utilities/type_conversion.hpp"
#include "AFQMC/Memory/custom_pointers.hpp"
#include "AFQMC/Memory/arch.hpp"
#include "AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp"
#include "AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp"
#include "AFQMC/Numerics/detail/CUDA/Kernels/setIdentity.cuh"
@ -72,7 +73,7 @@ inline static void getrf(const int n,
{
cusolverStatus_t status = cusolver::cusolver_getrf(*a.handles.cusolverDn_handle, n, m, to_address(a), lda,
to_address(work), to_address(piv), to_address(piv) + n);
cudaMemcpy(&st, to_address(piv) + n, sizeof(int), cudaMemcpyDeviceToHost);
arch::memcopy(&st, to_address(piv) + n, sizeof(int), arch::memcopyD2H);
if (CUSOLVER_STATUS_SUCCESS != status)
{
std::cerr << " cublas_getrf status, info: " << status << " " << st << std::endl;
@ -95,13 +96,13 @@ inline static void getrfBatched(const int n,
A_h = new T*[batchSize];
for (int i = 0; i < batchSize; i++)
A_h[i] = to_address(a[i]);
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
cublasStatus_t status = cublas::cublas_getrfBatched(*(a[0]).handles.cublas_handle, n, A_d, lda, to_address(piv),
to_address(info), batchSize);
if (CUBLAS_STATUS_SUCCESS != status)
throw std::runtime_error("Error: cublas_getrf returned error code.");
cudaFree(A_d);
arch::free(A_d);
delete[] A_h;
}
@ -129,20 +130,16 @@ inline static void getri(int n,
throw std::runtime_error("Error: getri<GPU_MEMORY_POINTER_TYPE> required lda = 1.");
int* info;
if (cudaSuccess != cudaMalloc((void**)&info, sizeof(int)))
{
std::cerr << " Error getri: Error allocating on GPU." << std::endl;
throw std::runtime_error("Error: cudaMalloc returned error code.");
}
arch::malloc((void**)&info, sizeof(int), "lapack_cuda_gpu_ptr::getri");
kernels::set_identity(n, n, to_address(work), n);
if (CUSOLVER_STATUS_SUCCESS !=
cusolver::cusolver_getrs(*a.handles.cusolverDn_handle, CUBLAS_OP_N, n, n, to_address(a), lda, to_address(piv),
to_address(work), n, info))
throw std::runtime_error("Error: cusolver_getrs returned error code.");
cudaMemcpy(to_address(a), to_address(work), n * n * sizeof(T), cudaMemcpyDeviceToDevice);
cudaMemcpy(&status, info, sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(info);
arch::memcopy(to_address(a), to_address(work), n * n * sizeof(T), arch::memcopyD2D);
arch::memcopy(&status, info, sizeof(int), arch::memcopyD2H);
arch::free(info);
}
// getriBatched
@ -165,16 +162,16 @@ inline static void getriBatched(int n,
A_h[i] = to_address(a[i]);
C_h[i] = to_address(ainv[i]);
}
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
cublasStatus_t status = cublas::cublas_getriBatched(*(a[0]).handles.cublas_handle, n, A_d, lda, to_address(piv), C_d,
ldc, to_address(info), batchSize);
if (CUBLAS_STATUS_SUCCESS != status)
throw std::runtime_error("Error: cublas_getri returned error code.");
cudaFree(A_d);
cudaFree(C_d);
arch::free(A_d);
arch::free(C_d);
delete[] A_h;
delete[] C_h;
}
@ -198,16 +195,16 @@ inline static void matinvBatched(int n,
A_h[i] = to_address(a[i]);
C_h[i] = to_address(ainv[i]);
}
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
cublasStatus_t status = cublas::cublas_matinvBatched(*(a[0]).handles.cublas_handle, n, A_d, lda, C_d, lda_inv,
to_address(info), batchSize);
if (CUBLAS_STATUS_SUCCESS != status)
throw std::runtime_error("Error: cublas_matinv returned error code.");
cudaFree(A_d);
cudaFree(C_d);
arch::free(A_d);
arch::free(C_d);
delete[] A_h;
delete[] C_h;
}
@ -233,15 +230,11 @@ inline static void geqrf(int M,
{
// allocating here for now
int* piv;
if (cudaSuccess != cudaMalloc((void**)&piv, sizeof(int)))
{
std::cerr << " Error geqrf: Error allocating on GPU." << std::endl;
throw std::runtime_error("Error: cudaMalloc returned error code.");
}
arch::malloc((void**)&piv, sizeof(int), "lapack_cuda_gpu_ptr::geqrf");
cusolverStatus_t status = cusolver::cusolver_geqrf(*A.handles.cusolverDn_handle, M, N, to_address(A), LDA,
to_address(TAU), to_address(WORK), LWORK, piv);
cudaMemcpy(&INFO, piv, sizeof(int), cudaMemcpyDeviceToHost);
arch::memcopy(&INFO, piv, sizeof(int), arch::memcopyD2H);
if (CUSOLVER_STATUS_SUCCESS != status)
{
int st;
@ -249,7 +242,7 @@ inline static void geqrf(int M,
std::cerr.flush();
throw std::runtime_error("Error: cublas_geqrf returned error code.");
}
cudaFree(piv);
arch::free(piv);
}
// gelqf
@ -294,15 +287,11 @@ void static gqr(int M,
{
// allocating here for now
int* piv;
if (cudaSuccess != cudaMalloc((void**)&piv, sizeof(int)))
{
std::cerr << " Error gqr: Error allocating on GPU." << std::endl;
throw std::runtime_error("Error: cudaMalloc returned error code.");
}
arch::malloc((void**)&piv, sizeof(int), "lapack_cuda_gpu_ptr::gqr");
cusolverStatus_t status = cusolver::cusolver_gqr(*A.handles.cusolverDn_handle, M, N, K, to_address(A), LDA,
to_address(TAU), to_address(WORK), LWORK, piv);
cudaMemcpy(&INFO, piv, sizeof(int), cudaMemcpyDeviceToHost);
arch::memcopy(&INFO, piv, sizeof(int), arch::memcopyD2H);
if (CUSOLVER_STATUS_SUCCESS != status)
{
int st;
@ -310,7 +299,7 @@ void static gqr(int M,
std::cerr.flush();
throw std::runtime_error("Error: cublas_gqr returned error code.");
}
cudaFree(piv);
arch::free(piv);
}
template<typename T, typename I>
@ -378,15 +367,15 @@ inline static void geqrfBatched(int M,
T_h[i] = to_address(TAU[i]);
T** B_d;
std::vector<int> inf(batchSize);
cudaMalloc((void**)&B_d, 2 * batchSize * sizeof(*B_h));
cudaMemcpy(B_d, B_h, 2 * batchSize * sizeof(*B_h), cudaMemcpyHostToDevice);
arch::malloc((void**)&B_d, 2 * batchSize * sizeof(*B_h));
arch::memcopy(B_d, B_h, 2 * batchSize * sizeof(*B_h), arch::memcopyH2D);
T** A_d(B_d);
T** T_d(B_d + batchSize);
cublasStatus_t status = cublas::cublas_geqrfBatched(*(A[0]).handles.cublas_handle, M, N, A_d, LDA, T_d,
to_address(inf.data()), batchSize);
if (CUBLAS_STATUS_SUCCESS != status)
throw std::runtime_error("Error: cublas_geqrfBatched returned error code.");
cudaFree(B_d);
arch::free(B_d);
delete[] B_h;
}
@ -410,8 +399,8 @@ inline static void geqrfStrided(int M,
for(int i=0; i<batchSize; i++)
T_h[i] = to_address(TAU)+i*Tstride;
T **B_d;
cudaMalloc((void **)&B_d, 2*batchSize*sizeof(*B_h));
cudaMemcpy(B_d, B_h, 2*batchSize*sizeof(*B_h), cudaMemcpyHostToDevice);
arch::malloc((void **)&B_d, 2*batchSize*sizeof(*B_h));
arch::memcopy(B_d, B_h, 2*batchSize*sizeof(*B_h), arch::memcopyH2D);
T **A_d(B_d);
T **T_d(B_d+batchSize);
*/
@ -424,19 +413,19 @@ inline static void geqrfStrided(int M,
for (int i = 0; i < batchSize; i++)
T_h[i] = to_address(TAU) + i * Tstride;
T **A_d, **T_d;
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
cudaMalloc((void**)&T_d, batchSize * sizeof(*T_h));
cudaMemcpy(T_d, T_h, batchSize * sizeof(*T_h), cudaMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::malloc((void**)&T_d, batchSize * sizeof(*T_h));
arch::memcopy(T_d, T_h, batchSize * sizeof(*T_h), arch::memcopyH2D);
cublasStatus_t status =
cublas::cublas_geqrfBatched(*A.handles.cublas_handle, M, N, A_d, LDA, T_d, to_address(inf.data()), batchSize);
for (int i = 0; i < batchSize; i++)
assert(inf[i] == 0);
if (CUBLAS_STATUS_SUCCESS != status)
throw std::runtime_error("Error: cublas_geqrfBatched returned error code.");
cudaFree(A_d);
arch::free(A_d);
delete[] A_h;
cudaFree(T_d);
arch::free(T_d);
delete[] T_h;
}
@ -464,18 +453,18 @@ inline static void gesvd(char jobU,
int& st)
{
int* devSt;
cudaMalloc((void**)&devSt, sizeof(int));
arch::malloc((void**)&devSt, sizeof(int));
cusolverStatus_t status =
cusolver::cusolver_gesvd(*A.handles.cusolverDn_handle, jobU, jobVT, m, n, to_address(A), lda, to_address(S),
to_address(U), ldu, to_address(VT), ldvt, to_address(W), lw, devSt);
cudaMemcpy(&st, devSt, sizeof(int), cudaMemcpyDeviceToHost);
arch::memcopy(&st, devSt, sizeof(int), arch::memcopyD2H);
if (CUSOLVER_STATUS_SUCCESS != status)
{
std::cerr << " cublas_gesvd status, info: " << status << " " << st << std::endl;
std::cerr.flush();
throw std::runtime_error("Error: cublas_gesvd returned error code.");
}
cudaFree(devSt);
arch::free(devSt);
}
template<typename T, typename R>
@ -496,18 +485,18 @@ inline static void gesvd(char jobU,
int& st)
{
int* devSt;
cudaMalloc((void**)&devSt, sizeof(int));
arch::malloc((void**)&devSt, sizeof(int));
cusolverStatus_t status =
cusolver::cusolver_gesvd(*A.handles.cusolverDn_handle, jobU, jobVT, m, n, to_address(A), lda, to_address(S),
to_address(U), ldu, to_address(VT), ldvt, to_address(W), lw, devSt);
cudaMemcpy(&st, devSt, sizeof(int), cudaMemcpyDeviceToHost);
arch::memcopy(&st, devSt, sizeof(int), arch::memcopyD2H);
if (CUSOLVER_STATUS_SUCCESS != status)
{
std::cerr << " cublas_gesvd status, info: " << status << " " << st << std::endl;
std::cerr.flush();
throw std::runtime_error("Error: cublas_gesvd returned error code.");
}
cudaFree(devSt);
arch::free(devSt);
}

View File

@ -566,17 +566,17 @@ inline static void gemmBatched(char Atrans,
B_h[i] = to_address(B[i]);
C_h[i] = to_address(C[i]);
}
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
cudaMalloc((void**)&B_d, batchSize * sizeof(*B_h));
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
cudaMemcpy(B_d, B_h, batchSize * sizeof(*B_h), cudaMemcpyHostToDevice);
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
cublas::cublas_gemmBatched(*(A[0]).handles.cublas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
C_d, ldc, batchSize);
cudaFree(A_d);
cudaFree(B_d);
cudaFree(C_d);
arch::free(A_d);
arch::free(B_d);
arch::free(C_d);
delete[] A_h;
delete[] B_h;
delete[] C_h;

View File

@ -49,10 +49,8 @@ void csrmv(const char transa,
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
// somehow need to check if the matrix is compact!
int pb, pe;
if (cudaSuccess != cudaMemcpy(std::addressof(pb), to_address(pntrb), sizeof(int), cudaMemcpyDeviceToHost))
throw std::runtime_error("Error: cudaMemcpy returned error code in csrmv.");
if (cudaSuccess != cudaMemcpy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), cudaMemcpyDeviceToHost))
throw std::runtime_error("Error: cudaMemcpy returned error code in csrmv.");
arch::memcopy(std::addressof(pb), to_address(pntrb), sizeof(int), arch::memcopyD2H, "sparse_cuda_gpu_ptr::csrmv");
arch::memcopy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), arch::memcopyD2H, "sparse_cuda_gpu_ptr::csrmv");
int nnz = pe - pb;
if (CUSPARSE_STATUS_SUCCESS !=
cusparse::cusparse_csrmv(*A.handles.cusparse_handle, transa, M, K, nnz, alpha,
@ -81,10 +79,8 @@ void csrmm(const char transa,
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
// somehow need to check if the matrix is compact!
int pb, pe;
if (cudaSuccess != cudaMemcpy(std::addressof(pb), to_address(pntrb), sizeof(int), cudaMemcpyDeviceToHost))
throw std::runtime_error("Error: cudaMemcpy returned error code in csrmm.");
if (cudaSuccess != cudaMemcpy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), cudaMemcpyDeviceToHost))
throw std::runtime_error("Error: cudaMemcpy returned error code in csrmm.");
arch::memcopy(std::addressof(pb), to_address(pntrb), sizeof(int), arch::memcopyD2H, "lapack_sparse_gpu_ptr::csrmm");
arch::memcopy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), arch::memcopyD2H, "lapack_sparse_gpu_ptr::csrmm");
int nnz = pe - pb;
if (transa == 'N')
{

View File

@ -15,7 +15,7 @@
#include <hip/hip_runtime.h>
#include "uninitialized_array.hpp"
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -281,8 +281,8 @@ void Auwn_Bun_Cuw(int nu,
reinterpret_cast<thrust::complex<double> const*>(A),
reinterpret_cast<thrust::complex<double> const*>(B),
reinterpret_cast<thrust::complex<double>*>(C));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
@ -304,8 +304,8 @@ void Auwn_Bun_Cuw(int nu,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(A),
reinterpret_cast<thrust::complex<float> const*>(B), reinterpret_cast<thrust::complex<float>*>(C));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
// C[u][w] = alpha * sum_i A[w][i][u] * B[i][u]
@ -329,8 +329,8 @@ void Awiu_Biu_Cuw(int nu,
static_cast<thrust::complex<double> const>(alpha),
reinterpret_cast<thrust::complex<double> const*>(A), B, ldb,
reinterpret_cast<thrust::complex<double>*>(C), ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
@ -354,8 +354,8 @@ void Awiu_Biu_Cuw(int nu,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(A), B, ldb,
reinterpret_cast<thrust::complex<float>*>(C), ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void Awiu_Biu_Cuw(int nu,
@ -379,8 +379,8 @@ void Awiu_Biu_Cuw(int nu,
reinterpret_cast<thrust::complex<double> const*>(A),
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
reinterpret_cast<thrust::complex<double>*>(C), ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
@ -405,8 +405,8 @@ void Awiu_Biu_Cuw(int nu,
reinterpret_cast<thrust::complex<float> const*>(A),
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
reinterpret_cast<thrust::complex<float>*>(C), ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
@ -428,8 +428,8 @@ void Aijk_Bkj_Cik(int ni,
reinterpret_cast<thrust::complex<double> const*>(A), lda, stride,
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
reinterpret_cast<thrust::complex<double>*>(C), ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void Aijk_Bkj_Cik(int ni,
@ -448,8 +448,8 @@ void Aijk_Bkj_Cik(int ni,
hipLaunchKernelGGL(kernel_Aijk_Bkj_Cik, dim3(grid_dim), dim3(32), 0, 0, ni, nj, nk,
reinterpret_cast<thrust::complex<double> const*>(A), lda, stride, B, ldb,
reinterpret_cast<thrust::complex<double>*>(C), ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void Aijk_Bkj_Cik(int ni,
@ -469,8 +469,8 @@ void Aijk_Bkj_Cik(int ni,
reinterpret_cast<thrust::complex<float> const*>(A), lda, stride,
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
reinterpret_cast<thrust::complex<float>*>(C), ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void Aijk_Bkj_Cik(int ni,
@ -489,8 +489,8 @@ void Aijk_Bkj_Cik(int ni,
hipLaunchKernelGGL(kernel_Aijk_Bkj_Cik, dim3(grid_dim), dim3(32), 0, 0, ni, nj, nk,
reinterpret_cast<thrust::complex<float> const*>(A), lda, stride, B, ldb,
reinterpret_cast<thrust::complex<float>*>(C), ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
// v[w][i][j] = v[i][w][j]
@ -501,8 +501,8 @@ void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<double> const* B, st
hipLaunchKernelGGL(kernel_viwj_vwij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, nw, ni, i0, iN,
reinterpret_cast<thrust::complex<double> const*>(B),
reinterpret_cast<thrust::complex<double>*>(A));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<double> const* B, std::complex<float>* A)
@ -511,8 +511,8 @@ void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<double> const* B, st
dim3 grid_dim(nw, (iN - i0), 1);
hipLaunchKernelGGL(kernel_viwj_vwij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, nw, ni, i0, iN,
reinterpret_cast<thrust::complex<double> const*>(B), reinterpret_cast<thrust::complex<float>*>(A));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<float> const* B, std::complex<double>* A)
@ -521,8 +521,8 @@ void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<float> const* B, std
dim3 grid_dim(nw, (iN - i0), 1);
hipLaunchKernelGGL(kernel_viwj_vwij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, nw, ni, i0, iN,
reinterpret_cast<thrust::complex<float> const*>(B), reinterpret_cast<thrust::complex<double>*>(A));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<float> const* B, std::complex<float>* A)
@ -531,8 +531,8 @@ void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<float> const* B, std
dim3 grid_dim(nw, (iN - i0), 1);
hipLaunchKernelGGL(kernel_viwj_vwij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, nw, ni, i0, iN,
reinterpret_cast<thrust::complex<float> const*>(B), reinterpret_cast<thrust::complex<float>*>(A));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
// element-wise C[k][i][j] = A[i][j] * B[j][k]
@ -554,8 +554,8 @@ void element_wise_Aij_Bjk_Ckij(char transA,
hipLaunchKernelGGL(kernel_element_wise_Aij_Bjk_Ckij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, transA, ni, nj,
nk, A, lda, reinterpret_cast<thrust::complex<double> const*>(B), ldb,
reinterpret_cast<thrust::complex<double>*>(C), ldc1, ldc2);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void element_wise_Aij_Bjk_Ckij(char transA,
@ -576,8 +576,8 @@ void element_wise_Aij_Bjk_Ckij(char transA,
hipLaunchKernelGGL(kernel_element_wise_Aij_Bjk_Ckij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, transA, ni, nj,
nk, A, lda, reinterpret_cast<thrust::complex<float> const*>(B), ldb,
reinterpret_cast<thrust::complex<float>*>(C), ldc1, ldc2);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void element_wise_Aij_Bjk_Ckij(char transA,
@ -599,8 +599,8 @@ void element_wise_Aij_Bjk_Ckij(char transA,
nk, reinterpret_cast<thrust::complex<double> const*>(A), lda,
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
reinterpret_cast<thrust::complex<double>*>(C), ldc1, ldc2);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void element_wise_Aij_Bjk_Ckij(char transA,
@ -622,8 +622,8 @@ void element_wise_Aij_Bjk_Ckij(char transA,
nk, reinterpret_cast<thrust::complex<float> const*>(A), lda,
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
reinterpret_cast<thrust::complex<float>*>(C), ldc1, ldc2);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
@ -649,8 +649,8 @@ void element_wise_Aij_Bjk_Ckji(int ni,
hipLaunchKernelGGL(kernel_element_wise_Aij_Bjk_Ckji, dim3(grid_dim), dim3(block_dim), 0, 0, ni, nj, nk, A, lda,
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
reinterpret_cast<thrust::complex<double>*>(C), ldc, stride);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void element_wise_Aij_Bjk_Ckji(int ni,
@ -676,8 +676,8 @@ void element_wise_Aij_Bjk_Ckji(int ni,
reinterpret_cast<thrust::complex<double> const*>(A), lda,
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
reinterpret_cast<thrust::complex<double>*>(C), ldc, stride);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void element_wise_Aij_Bjk_Ckji(int ni,
@ -701,8 +701,8 @@ void element_wise_Aij_Bjk_Ckji(int ni,
hipLaunchKernelGGL(kernel_element_wise_Aij_Bjk_Ckji, dim3(grid_dim), dim3(block_dim), 0, 0, ni, nj, nk, A, lda,
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
reinterpret_cast<thrust::complex<float>*>(C), ldc, stride);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void element_wise_Aij_Bjk_Ckji(int ni,
@ -727,8 +727,8 @@ void element_wise_Aij_Bjk_Ckji(int ni,
reinterpret_cast<thrust::complex<float> const*>(A), lda,
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
reinterpret_cast<thrust::complex<float>*>(C), ldc, stride);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}

View File

@ -15,7 +15,7 @@
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -113,8 +113,8 @@ void KaKjw_to_KKwaj(int nwalk,
dim3 grid_dim(nkpts, nkpts, 1);
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, A, B);
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
}
void KaKjw_to_KKwaj(int nwalk,
@ -135,8 +135,8 @@ void KaKjw_to_KKwaj(int nwalk,
dim3 grid_dim(nkpts, nkpts, 1);
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, A, B);
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
}
void KaKjw_to_KKwaj(int nwalk,
@ -157,8 +157,8 @@ void KaKjw_to_KKwaj(int nwalk,
dim3 grid_dim(nkpts, nkpts, 1);
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, A, B);
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
}
void KaKjw_to_KKwaj(int nwalk,
@ -180,8 +180,8 @@ void KaKjw_to_KKwaj(int nwalk,
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, reinterpret_cast<thrust::complex<double> const*>(A),
reinterpret_cast<thrust::complex<double>*>(B));
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
}
void KaKjw_to_KKwaj(int nwalk,
@ -203,8 +203,8 @@ void KaKjw_to_KKwaj(int nwalk,
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, reinterpret_cast<thrust::complex<float> const*>(A),
reinterpret_cast<thrust::complex<float>*>(B));
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
}
void KaKjw_to_KKwaj(int nwalk,
@ -226,8 +226,8 @@ void KaKjw_to_KKwaj(int nwalk,
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, reinterpret_cast<thrust::complex<double> const*>(A),
reinterpret_cast<thrust::complex<float>*>(B));
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
}

View File

@ -16,7 +16,7 @@
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -120,8 +120,8 @@ void KaKjw_to_QKajw(int nwalk,
dim3 grid_dim(nkpts, nkpts, 1);
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, A, B);
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
}
void KaKjw_to_QKajw(int nwalk,
@ -143,8 +143,8 @@ void KaKjw_to_QKajw(int nwalk,
dim3 grid_dim(nkpts, nkpts, 1);
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, A, B);
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
}
void KaKjw_to_QKajw(int nwalk,
@ -166,8 +166,8 @@ void KaKjw_to_QKajw(int nwalk,
dim3 grid_dim(nkpts, nkpts, 1);
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, A, B);
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
}
void KaKjw_to_QKajw(int nwalk,
@ -190,8 +190,8 @@ void KaKjw_to_QKajw(int nwalk,
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, reinterpret_cast<thrust::complex<float> const*>(A),
reinterpret_cast<thrust::complex<float>*>(B));
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
}
void KaKjw_to_QKajw(int nwalk,
@ -214,8 +214,8 @@ void KaKjw_to_QKajw(int nwalk,
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, reinterpret_cast<thrust::complex<double> const*>(A),
reinterpret_cast<thrust::complex<double>*>(B));
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
}
void KaKjw_to_QKajw(int nwalk,
@ -238,8 +238,8 @@ void KaKjw_to_QKajw(int nwalk,
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, reinterpret_cast<thrust::complex<double> const*>(A),
reinterpret_cast<thrust::complex<float>*>(B));
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
}
} // namespace kernels

View File

@ -16,7 +16,7 @@
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -74,8 +74,8 @@ void Tab_to_Kl(int nwalk, int nocc, int nchol, std::complex<double> const* Tab,
hipLaunchKernelGGL(kernel_Tab_to_Kl, dim3(grid_dim), dim3(nthr), 0, 0, nwalk, nocc, nchol,
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(Kl));
qmc_hip::hip_check(hipGetLastError(), "Tab_to_Kl");
qmc_hip::hip_check(hipDeviceSynchronize(), "Tab_to_Kl");
qmc_hip::hip_kernel_check(hipGetLastError(), "Tab_to_Kl");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "Tab_to_Kl");
}
void Tab_to_Kl(int nwalk, int nocc, int nchol, std::complex<float> const* Tab, std::complex<float>* Kl)
@ -85,8 +85,8 @@ void Tab_to_Kl(int nwalk, int nocc, int nchol, std::complex<float> const* Tab, s
hipLaunchKernelGGL(kernel_Tab_to_Kl, dim3(grid_dim), dim3(nthr), 0, 0, nwalk, nocc, nchol,
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<float>*>(Kl));
qmc_hip::hip_check(hipGetLastError(), "Tab_to_Kl");
qmc_hip::hip_check(hipDeviceSynchronize(), "Tab_to_Kl");
qmc_hip::hip_kernel_check(hipGetLastError(), "Tab_to_Kl");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "Tab_to_Kl");
}
@ -102,8 +102,8 @@ void Tanb_to_Kl(int nwalk,
hipLaunchKernelGGL(kernel_Tanb_to_Kl, dim3(grid_dim), dim3(nthr), 0, 0, nwalk, nocc, nchol, nchol_tot,
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(Kl));
qmc_hip::hip_check(hipGetLastError(), "Tab_to_Kl");
qmc_hip::hip_check(hipDeviceSynchronize(), "Tab_to_Kl");
qmc_hip::hip_kernel_check(hipGetLastError(), "Tab_to_Kl");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "Tab_to_Kl");
}
void Tanb_to_Kl(int nwalk, int nocc, int nchol, int nchol_tot, std::complex<float> const* Tab, std::complex<float>* Kl)
@ -113,8 +113,8 @@ void Tanb_to_Kl(int nwalk, int nocc, int nchol, int nchol_tot, std::complex<floa
hipLaunchKernelGGL(kernel_Tanb_to_Kl, dim3(grid_dim), dim3(nthr), 0, 0, nwalk, nocc, nchol, nchol_tot,
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<float>*>(Kl));
qmc_hip::hip_check(hipGetLastError(), "Tab_to_Kl");
qmc_hip::hip_check(hipDeviceSynchronize(), "Tab_to_Kl");
qmc_hip::hip_kernel_check(hipGetLastError(), "Tab_to_Kl");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "Tab_to_Kl");
}
} // namespace kernels

View File

@ -15,7 +15,7 @@
#include <hip/hip_runtime.h>
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -73,8 +73,8 @@ void acAxpbB(int m,
dim3 block_dim(xblock_dim, yblock_dim);
dim3 grid_dim(xgrid_dim, ygrid_dim);
hipLaunchKernelGGL(kernel_acAxpbB, dim3(grid_dim), dim3(block_dim), 0, 0, m, n, alpha, A, lda, x, incx, beta, B, ldb);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void acAxpbB(int m,
@ -95,8 +95,8 @@ void acAxpbB(int m,
dim3 block_dim(xblock_dim, yblock_dim);
dim3 grid_dim(xgrid_dim, ygrid_dim);
hipLaunchKernelGGL(kernel_acAxpbB, dim3(grid_dim), dim3(block_dim), 0, 0, m, n, alpha, A, lda, x, incx, beta, B, ldb);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void acAxpbB(int m,
@ -122,8 +122,8 @@ void acAxpbB(int m,
reinterpret_cast<thrust::complex<double> const*>(x), incx,
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(B),
ldb);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void acAxpbB(int m,
@ -149,8 +149,8 @@ void acAxpbB(int m,
reinterpret_cast<thrust::complex<float> const*>(x), incx,
static_cast<thrust::complex<float> const>(beta), reinterpret_cast<thrust::complex<float>*>(B),
ldb);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}

View File

@ -15,7 +15,7 @@
#include <hip/hip_runtime.h>
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -49,8 +49,8 @@ void adiagApy(int N, double const alpha, double const* A, int lda, double* y, in
int block_dim = 256;
int grid_dim = (N + block_dim - 1) / block_dim;
hipLaunchKernelGGL(kernel_adiagApy, dim3(grid_dim), dim3(block_dim), 0, 0, N, alpha, A, lda, y, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void adiagApy(int N,
@ -66,8 +66,8 @@ void adiagApy(int N,
static_cast<thrust::complex<double> const>(alpha),
reinterpret_cast<thrust::complex<double> const*>(A), lda,
reinterpret_cast<thrust::complex<double>*>(y), incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void adiagApy(int N, float const alpha, float const* A, int lda, float* y, int incy)
@ -75,8 +75,8 @@ void adiagApy(int N, float const alpha, float const* A, int lda, float* y, int i
int block_dim = 256;
int grid_dim = (N + block_dim - 1) / block_dim;
hipLaunchKernelGGL(kernel_adiagApy, dim3(grid_dim), dim3(block_dim), 0, 0, N, alpha, A, lda, y, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void adiagApy(int N,
@ -92,8 +92,8 @@ void adiagApy(int N,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(A), lda,
reinterpret_cast<thrust::complex<float>*>(y), incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -15,7 +15,7 @@
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "uninitialized_array.hpp"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -142,8 +142,8 @@ void adotpby(int N,
double* res)
{
hipLaunchKernelGGL(kernel_adotpby, dim3(1), dim3(1024), 0, 0, N, alpha, x, incx, y, incy, beta, res);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void adotpby(int N,
@ -159,8 +159,8 @@ void adotpby(int N,
reinterpret_cast<thrust::complex<double> const*>(x), incx,
reinterpret_cast<thrust::complex<double> const*>(y), incy,
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(res));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void adotpby(int N,
@ -173,8 +173,8 @@ void adotpby(int N,
float* res)
{
hipLaunchKernelGGL(kernel_adotpby, dim3(1), dim3(1024), 0, 0, N, alpha, x, incx, y, incy, beta, res);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void adotpby(int N,
@ -190,8 +190,8 @@ void adotpby(int N,
reinterpret_cast<thrust::complex<float> const*>(x), incx,
reinterpret_cast<thrust::complex<float> const*>(y), incy,
static_cast<thrust::complex<float> const>(beta), reinterpret_cast<thrust::complex<float>*>(res));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void adotpby(int N,
@ -204,8 +204,8 @@ void adotpby(int N,
double* res)
{
hipLaunchKernelGGL(kernel_adotpby, dim3(1), dim3(1024), 0, 0, N, alpha, x, incx, y, incy, beta, res);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void adotpby(int N,
@ -221,8 +221,8 @@ void adotpby(int N,
reinterpret_cast<thrust::complex<float> const*>(x), incx,
reinterpret_cast<thrust::complex<float> const*>(y), incy,
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(res));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void strided_adotpby(int NB,
@ -242,8 +242,8 @@ void strided_adotpby(int NB,
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(C),
ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void strided_adotpby(int NB,
@ -263,8 +263,8 @@ void strided_adotpby(int NB,
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(C),
ldc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -16,7 +16,7 @@
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -82,15 +82,15 @@ __global__ void kernel_transpose_wabn_to_wban(int nwalk,
void ajw_to_waj(int na, int nj, int nw, int inca, double const* A, double* B)
{
hipLaunchKernelGGL(kernel_ajw_to_waj, dim3(na), dim3(128), 0, 0, na, nj, nw, inca, A, B);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void ajw_to_waj(int na, int nj, int nw, int inca, float const* A, float* B)
{
hipLaunchKernelGGL(kernel_ajw_to_waj, dim3(na), dim3(128), 0, 0, na, nj, nw, inca, A, B);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void ajw_to_waj(int na, int nj, int nw, int inca, std::complex<double> const* A, std::complex<double>* B)
@ -98,16 +98,16 @@ void ajw_to_waj(int na, int nj, int nw, int inca, std::complex<double> const* A,
hipLaunchKernelGGL(kernel_ajw_to_waj, dim3(na), dim3(128), 0, 0, na, nj, nw, inca,
reinterpret_cast<thrust::complex<double> const*>(A),
reinterpret_cast<thrust::complex<double>*>(B));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void ajw_to_waj(int na, int nj, int nw, int inca, std::complex<float> const* A, std::complex<float>* B)
{
hipLaunchKernelGGL(kernel_ajw_to_waj, dim3(na), dim3(128), 0, 0, na, nj, nw, inca,
reinterpret_cast<thrust::complex<float> const*>(A), reinterpret_cast<thrust::complex<float>*>(B));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void transpose_wabn_to_wban(int nwalk,
@ -121,8 +121,8 @@ void transpose_wabn_to_wban(int nwalk,
hipLaunchKernelGGL(kernel_transpose_wabn_to_wban, dim3(grid_dim), dim3(32), 0, 0, nwalk, na, nb, nchol,
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(Tba));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void transpose_wabn_to_wban(int nwalk,
@ -136,8 +136,8 @@ void transpose_wabn_to_wban(int nwalk,
hipLaunchKernelGGL(kernel_transpose_wabn_to_wban, dim3(grid_dim), dim3(32), 0, 0, nwalk, na, nb, nchol,
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<float>*>(Tba));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void transpose_wabn_to_wban(int nwalk,
@ -151,8 +151,8 @@ void transpose_wabn_to_wban(int nwalk,
hipLaunchKernelGGL(kernel_transpose_wabn_to_wban, dim3(grid_dim), dim3(32), 0, 0, nwalk, na, nb, nchol,
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<float>*>(Tba));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void transpose_wabn_to_wban(int nwalk,
@ -166,8 +166,8 @@ void transpose_wabn_to_wban(int nwalk,
hipLaunchKernelGGL(kernel_transpose_wabn_to_wban, dim3(grid_dim), dim3(32), 0, 0, nwalk, na, nb, nchol,
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(Tba));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -15,7 +15,7 @@
#include <hip/hip_runtime.h>
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -94,8 +94,8 @@ void axpy_batched_gpu(int n,
hipMemcpy(b_, b, batchSize * sizeof(*b), hipMemcpyHostToDevice);
hipMemcpy(x_, x, batchSize * sizeof(*x), hipMemcpyHostToDevice);
hipLaunchKernelGGL(kernel_axpy_batched, dim3(batchSize), dim3(128), 0, 0, n, x_, a_, inca, b_, incb, batchSize);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
hipFree(a_);
hipFree(b_);
hipFree(x_);
@ -120,8 +120,8 @@ void sumGw_batched_gpu(int n,
hipMemcpy(x_, x, batchSize * sizeof(*x), hipMemcpyHostToDevice);
int nb_(nw > batchSize ? batchSize : nw);
hipLaunchKernelGGL(kernel_sumGw_batched, dim3(nb_), dim3(256), 0, 0, n, x_, a_, inca, b_, incb, b0, nw, batchSize);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
hipFree(a_);
hipFree(b_);
hipFree(x_);

View File

@ -15,7 +15,7 @@
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -47,26 +47,26 @@ inline static void kernel_axty(int n, std::complex<T> const alpha, std::complex<
void axty(int n, float alpha, float const* x, float* y)
{
kernel_axty(n, alpha, x, y);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void axty(int n, double alpha, double const* x, double* y)
{
kernel_axty(n, alpha, x, y);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void axty(int n, std::complex<float> alpha, std::complex<float> const* x, std::complex<float>* y)
{
kernel_axty(n, alpha, x, y);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void axty(int n, std::complex<double> alpha, std::complex<double> const* x, std::complex<double>* y)
{
kernel_axty(n, alpha, x, y);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}

View File

@ -16,7 +16,7 @@
#include <thrust/complex.h>
#include "uninitialized_array.hpp"
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -93,8 +93,8 @@ void batchedDot(int m,
int incy)
{
hipLaunchKernelGGL(kernel_dot, dim3(m), dim3(DOT_BLOCK_SIZE), 0, 0, n, alpha, A, lda, B, ldb, beta, y, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void batchedDot(int m,
@ -109,8 +109,8 @@ void batchedDot(int m,
int incy)
{
hipLaunchKernelGGL(kernel_dot, dim3(m), dim3(DOT_BLOCK_SIZE), 0, 0, n, alpha, A, lda, B, ldb, beta, y, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void batchedDot(int m,
@ -130,8 +130,8 @@ void batchedDot(int m,
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(y),
incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void batchedDot(int m,
@ -151,8 +151,8 @@ void batchedDot(int m,
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
static_cast<thrust::complex<float> const>(beta), reinterpret_cast<thrust::complex<float>*>(y),
incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -16,7 +16,7 @@
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -159,8 +159,8 @@ void batched_Tab_to_Klr(int nterms,
hipLaunchKernelGGL(kernel_batched_Tab_to_Klr, dim3(grid_dim), dim3(nthr), 0, 0, nterms, nwalk, nocc, nchol_max,
nchol_tot, ncholQ, ncholQ0, kdiag, reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(Kl), reinterpret_cast<thrust::complex<double>*>(Kr));
qmc_hip::hip_check(hipGetLastError(), "batched_Tab_to_Klr");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_Tab_to_Klr");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_Tab_to_Klr");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_Tab_to_Klr");
}
void batched_Tab_to_Klr(int nterms,
@ -180,8 +180,8 @@ void batched_Tab_to_Klr(int nterms,
hipLaunchKernelGGL(kernel_batched_Tab_to_Klr, dim3(grid_dim), dim3(nthr), 0, 0, nterms, nwalk, nocc, nchol_max,
nchol_tot, ncholQ, ncholQ0, kdiag, reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<float>*>(Kl), reinterpret_cast<thrust::complex<float>*>(Kr));
qmc_hip::hip_check(hipGetLastError(), "batched_Tab_to_Klr");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_Tab_to_Klr");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_Tab_to_Klr");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_Tab_to_Klr");
}
void batched_Tanb_to_Klr(int nterms,
@ -201,8 +201,8 @@ void batched_Tanb_to_Klr(int nterms,
hipLaunchKernelGGL(kernel_batched_Tanb_to_Klr, dim3(grid_dim), dim3(nthr), 0, 0, nterms, nwalk, nocc, nchol_max,
nchol_tot, ncholQ, ncholQ0, kdiag, reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(Kl), reinterpret_cast<thrust::complex<double>*>(Kr));
qmc_hip::hip_check(hipGetLastError(), "batched_Tanb_to_Klr");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_Tanb_to_Klr");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_Tanb_to_Klr");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_Tanb_to_Klr");
}
void batched_Tanb_to_Klr(int nterms,
@ -222,8 +222,8 @@ void batched_Tanb_to_Klr(int nterms,
hipLaunchKernelGGL(kernel_batched_Tanb_to_Klr, dim3(grid_dim), dim3(nthr), 0, 0, nterms, nwalk, nocc, nchol_max,
nchol_tot, ncholQ, ncholQ0, kdiag, reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<float>*>(Kl), reinterpret_cast<thrust::complex<float>*>(Kr));
qmc_hip::hip_check(hipGetLastError(), "batched_Tanb_to_Klr");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_Tanb_to_Klr");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_Tanb_to_Klr");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_Tanb_to_Klr");
}
} // namespace kernels

View File

@ -16,7 +16,7 @@
#include <thrust/complex.h>
#include "uninitialized_array.hpp"
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -138,8 +138,8 @@ void batched_dot_wabn_wban(int nbatch,
nchol, reinterpret_cast<thrust::complex<double> const*>(alpha),
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(y), incy);
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wabn_wban");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wabn_wban");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
}
void batched_dot_wabn_wban(int nbatch,
@ -157,8 +157,8 @@ void batched_dot_wabn_wban(int nbatch,
nchol, reinterpret_cast<thrust::complex<float> const*>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab), reinterpret_cast<thrust::complex<float>*>(y),
incy);
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wabn_wban");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wabn_wban");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
}
void batched_dot_wabn_wban(int nbatch,
@ -176,8 +176,8 @@ void batched_dot_wabn_wban(int nbatch,
nchol, reinterpret_cast<thrust::complex<float> const*>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(y), incy);
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wabn_wban");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wabn_wban");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
}
// anb/bna
@ -196,8 +196,8 @@ void batched_dot_wanb_wbna(int nbatch,
nchol, reinterpret_cast<thrust::complex<double> const*>(alpha),
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(y), incy);
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wanb_wbna");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wanb_wbna");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
}
void batched_dot_wanb_wbna(int nbatch,
@ -215,8 +215,8 @@ void batched_dot_wanb_wbna(int nbatch,
nchol, reinterpret_cast<thrust::complex<float> const*>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab), reinterpret_cast<thrust::complex<float>*>(y),
incy);
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wanb_wbna");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wanb_wbna");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
}
void batched_dot_wanb_wbna(int nbatch,
@ -234,8 +234,8 @@ void batched_dot_wanb_wbna(int nbatch,
nchol, reinterpret_cast<thrust::complex<float> const*>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(y), incy);
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wanb_wbna");
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wanb_wbna");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
}

View File

@ -16,7 +16,7 @@
#include <hip/hip_runtime.h>
#include "uninitialized_array.hpp"
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -156,8 +156,8 @@ void construct_X(int nCV,
reinterpret_cast<thrust::complex<double> const*>(vbias),
reinterpret_cast<thrust::complex<double>*>(HW), reinterpret_cast<thrust::complex<double>*>(MF),
reinterpret_cast<thrust::complex<double>*>(X));
qmc_hip::hip_check(hipGetLastError(), "construct_X");
qmc_hip::hip_check(hipDeviceSynchronize(), "construct_X");
qmc_hip::hip_kernel_check(hipGetLastError(), "construct_X");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "construct_X");
}
void construct_X(int nCV,
int nsteps,
@ -183,8 +183,8 @@ void construct_X(int nCV,
reinterpret_cast<thrust::complex<float> const*>(vbias),
reinterpret_cast<thrust::complex<double>*>(HW), reinterpret_cast<thrust::complex<double>*>(MF),
reinterpret_cast<thrust::complex<float>*>(X));
qmc_hip::hip_check(hipGetLastError(), "construct_X");
qmc_hip::hip_check(hipDeviceSynchronize(), "construct_X");
qmc_hip::hip_kernel_check(hipGetLastError(), "construct_X");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "construct_X");
}
void construct_X(int nCV,
int nsteps,
@ -210,8 +210,8 @@ void construct_X(int nCV,
reinterpret_cast<thrust::complex<double> const*>(vbias),
reinterpret_cast<thrust::complex<double>*>(HW), reinterpret_cast<thrust::complex<double>*>(MF),
reinterpret_cast<thrust::complex<float>*>(X));
qmc_hip::hip_check(hipGetLastError(), "construct_X");
qmc_hip::hip_check(hipDeviceSynchronize(), "construct_X");
qmc_hip::hip_kernel_check(hipGetLastError(), "construct_X");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "construct_X");
}
void construct_X(int nCV,
int nsteps,
@ -237,8 +237,8 @@ void construct_X(int nCV,
reinterpret_cast<thrust::complex<float> const*>(vbias),
reinterpret_cast<thrust::complex<double>*>(HW), reinterpret_cast<thrust::complex<double>*>(MF),
reinterpret_cast<thrust::complex<double>*>(X));
qmc_hip::hip_check(hipGetLastError(), "construct_X");
qmc_hip::hip_check(hipDeviceSynchronize(), "construct_X");
qmc_hip::hip_kernel_check(hipGetLastError(), "construct_X");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "construct_X");
}
} // namespace kernels

View File

@ -15,7 +15,7 @@
#include <hip/hip_runtime.h>
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -40,16 +40,16 @@ void copy_n_cast(double const* A, int n, float* B)
int block_dim = 256;
int grid_dim = (n + block_dim - 1) / block_dim;
hipLaunchKernelGGL(kernel_copy_n_cast, dim3(grid_dim), dim3(block_dim), 0, 0, A, n, B);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void copy_n_cast(float const* A, int n, double* B)
{
int block_dim = 256;
int grid_dim = (n + block_dim - 1) / block_dim;
hipLaunchKernelGGL(kernel_copy_n_cast, dim3(grid_dim), dim3(block_dim), 0, 0, A, n, B);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void copy_n_cast(std::complex<double> const* A, int n, std::complex<float>* B)
{
@ -58,8 +58,8 @@ void copy_n_cast(std::complex<double> const* A, int n, std::complex<float>* B)
hipLaunchKernelGGL(kernel_copy_n_cast, dim3(grid_dim), dim3(block_dim), 0, 0,
reinterpret_cast<thrust::complex<double> const*>(A), n,
reinterpret_cast<thrust::complex<float>*>(B));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void copy_n_cast(std::complex<float> const* A, int n, std::complex<double>* B)
{
@ -68,8 +68,8 @@ void copy_n_cast(std::complex<float> const* A, int n, std::complex<double>* B)
hipLaunchKernelGGL(kernel_copy_n_cast, dim3(grid_dim), dim3(block_dim), 0, 0,
reinterpret_cast<thrust::complex<float> const*>(A), n,
reinterpret_cast<thrust::complex<double>*>(B));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -19,7 +19,7 @@
#include <thrust/device_malloc.h>
#include <thrust/device_free.h>
#include "uninitialized_array.hpp"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -390,8 +390,8 @@ __global__ void kernel_scale_columns(int n, int m, T* A, int lda, T* scl)
void determinant_from_getrf_gpu(int N, double* m, int lda, int* piv, double LogOverlapFactor, double* res)
{
hipLaunchKernelGGL(kernel_determinant_from_getrf, dim3(1), dim3(256), 0, 0, N, m, lda, piv, LogOverlapFactor, res);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void determinant_from_getrf_gpu(int N,
@ -405,8 +405,8 @@ void determinant_from_getrf_gpu(int N,
reinterpret_cast<thrust::complex<double>*>(m), lda, piv,
static_cast<thrust::complex<double>>(LogOverlapFactor),
reinterpret_cast<thrust::complex<double>*>(res));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void strided_determinant_from_getrf_gpu(int N,
@ -421,8 +421,8 @@ void strided_determinant_from_getrf_gpu(int N,
{
hipLaunchKernelGGL(kernel_strided_determinant_from_getrf, dim3(nbatch), dim3(64), 0, 0, N, m, lda, mstride, piv,
pstride, LogOverlapFactor, res, nbatch);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void strided_determinant_from_getrf_gpu(int N,
@ -439,8 +439,8 @@ void strided_determinant_from_getrf_gpu(int N,
reinterpret_cast<thrust::complex<double>*>(m), lda, mstride, piv, pstride,
static_cast<thrust::complex<double>>(LogOverlapFactor),
reinterpret_cast<thrust::complex<double>*>(res), nbatch);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void batched_determinant_from_getrf_gpu(int N,
@ -454,8 +454,8 @@ void batched_determinant_from_getrf_gpu(int N,
{
hipLaunchKernelGGL(kernel_batched_determinant_from_getrf, dim3(nbatch), dim3(64), 0, 0, N, m, lda, piv, pstride,
LogOverlapFactor, res, nbatch);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void batched_determinant_from_getrf_gpu(int N,
@ -471,8 +471,8 @@ void batched_determinant_from_getrf_gpu(int N,
reinterpret_cast<thrust::complex<double>**>(m), lda, piv, pstride,
static_cast<thrust::complex<double>>(LogOverlapFactor),
reinterpret_cast<thrust::complex<double>*>(res), nbatch);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
std::complex<double> determinant_from_geqrf_gpu(int N, double* m, int lda, double* buff, double LogOverlapFactor)
@ -480,10 +480,10 @@ std::complex<double> determinant_from_geqrf_gpu(int N, double* m, int lda, doubl
thrust::device_ptr<thrust::complex<double>> d_ptr = thrust::device_malloc<thrust::complex<double>>(1);
hipLaunchKernelGGL(kernel_determinant_from_geqrf, dim3(1), dim3(256), 0, 0, N, m, lda, buff, LogOverlapFactor,
thrust::raw_pointer_cast(d_ptr));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
std::complex<double> res;
qmc_hip::hip_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
qmc_hip::hip_kernel_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
hipMemcpyDeviceToHost));
thrust::device_free(d_ptr);
return res;
@ -500,10 +500,10 @@ std::complex<double> determinant_from_geqrf_gpu(int N,
reinterpret_cast<thrust::complex<double>*>(m), lda,
reinterpret_cast<thrust::complex<double>*>(buff),
static_cast<thrust::complex<double>>(LogOverlapFactor), thrust::raw_pointer_cast(d_ptr));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
std::complex<double> res;
qmc_hip::hip_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
qmc_hip::hip_kernel_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
hipMemcpyDeviceToHost));
thrust::device_free(d_ptr);
return res;
@ -512,8 +512,8 @@ std::complex<double> determinant_from_geqrf_gpu(int N,
void determinant_from_geqrf_gpu(int N, double* m, int lda, double* buff)
{
hipLaunchKernelGGL(kernel_determinant_from_geqrf, dim3(1), dim3(256), 0, 0, N, m, lda, buff);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void determinant_from_geqrf_gpu(int N, std::complex<double>* m, int lda, std::complex<double>* buff)
@ -521,8 +521,8 @@ void determinant_from_geqrf_gpu(int N, std::complex<double>* m, int lda, std::co
hipLaunchKernelGGL(kernel_determinant_from_geqrf, dim3(1), dim3(256), 0, 0, N,
reinterpret_cast<thrust::complex<double>*>(m), lda,
reinterpret_cast<thrust::complex<double>*>(buff));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
@ -531,8 +531,8 @@ double determinant_from_getrf_gpu(int N, double* m, int lda, int* piv, double Lo
thrust::device_ptr<double> d_ptr = thrust::device_malloc<double>(1);
hipLaunchKernelGGL(kernel_determinant_from_getrf, dim3(1), dim3(256), 0, 0, N, m, lda, piv, LogOverlapFactor,
thrust::raw_pointer_cast(d_ptr));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
double res = *d_ptr;
thrust::device_free(d_ptr);
return res;
@ -548,10 +548,10 @@ std::complex<double> determinant_from_getrf_gpu(int N,
hipLaunchKernelGGL(kernel_determinant_from_getrf, dim3(1), dim3(256), 0, 0, N,
reinterpret_cast<thrust::complex<double>*>(m), lda, piv,
static_cast<thrust::complex<double>>(LogOverlapFactor), thrust::raw_pointer_cast(d_ptr));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
std::complex<double> res;
qmc_hip::hip_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
qmc_hip::hip_kernel_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
hipMemcpyDeviceToHost));
thrust::device_free(d_ptr);
return res;
@ -562,8 +562,8 @@ void scale_columns(int n, int m, double* A, int lda, double* scl)
int xblock_dim = 32;
dim3 block_dim(xblock_dim, xblock_dim, 1);
hipLaunchKernelGGL(kernel_scale_columns, dim3(1), dim3(block_dim), 0, 0, n, m, A, lda, scl);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void scale_columns(int n, int m, std::complex<double>* A, int lda, std::complex<double>* scl)
{
@ -572,8 +572,8 @@ void scale_columns(int n, int m, std::complex<double>* A, int lda, std::complex<
hipLaunchKernelGGL(kernel_scale_columns, dim3(1), dim3(block_dim), 0, 0, n, m,
reinterpret_cast<thrust::complex<double>*>(A), lda,
reinterpret_cast<thrust::complex<double>*>(scl));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -14,7 +14,7 @@
#include <hip/hip_runtime.h>
#include "uninitialized_array.hpp"
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -223,8 +223,8 @@ void dot_wabn(int nwalk,
static_cast<thrust::complex<double> const>(alpha),
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(y), incy);
qmc_hip::hip_check(hipGetLastError(), "dot_wabn");
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wabn");
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wabn");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wabn");
}
void dot_wabn(int nwalk,
@ -241,8 +241,8 @@ void dot_wabn(int nwalk,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab), reinterpret_cast<thrust::complex<float>*>(y),
incy);
qmc_hip::hip_check(hipGetLastError(), "dot_wabn");
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wabn");
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wabn");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wabn");
}
void dot_wabn(int nwalk,
@ -259,8 +259,8 @@ void dot_wabn(int nwalk,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(y), incy);
qmc_hip::hip_check(hipGetLastError(), "dot_wabn");
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wabn");
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wabn");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wabn");
}
// v2
@ -283,8 +283,8 @@ void dot_wanb(int nwalk,
static_cast<thrust::complex<double> const>(alpha),
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(y), incy);
qmc_hip::hip_check(hipGetLastError(), "dot_wanb");
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wanb");
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wanb");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wanb");
}
void dot_wanb(int nwalk,
@ -306,8 +306,8 @@ void dot_wanb(int nwalk,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab), reinterpret_cast<thrust::complex<float>*>(y),
incy);
qmc_hip::hip_check(hipGetLastError(), "dot_wanb");
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wanb");
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wanb");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wanb");
}
void dot_wanb(int nwalk,
@ -329,8 +329,8 @@ void dot_wanb(int nwalk,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(y), incy);
qmc_hip::hip_check(hipGetLastError(), "dot_wanb");
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wanb");
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wanb");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wanb");
}
/*
@ -345,8 +345,8 @@ void dot_wanb( int nwalk, int nocc, int nchol,
static_cast<thrust::complex<double> const>(alpha),
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double> *>(y),incy);
qmc_hip::hip_check(hipGetLastError(),"dot_wanb");
qmc_hip::hip_check(hipDeviceSynchronize(),"dot_wanb");
qmc_hip::hip_kernel_check(hipGetLastError(),"dot_wanb");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(),"dot_wanb");
}
void dot_wanb( int nwalk, int nocc, int nchol,
@ -359,8 +359,8 @@ void dot_wanb( int nwalk, int nocc, int nchol,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<float> *>(y),incy);
qmc_hip::hip_check(hipGetLastError(),"dot_wanb");
qmc_hip::hip_check(hipDeviceSynchronize(),"dot_wanb");
qmc_hip::hip_kernel_check(hipGetLastError(),"dot_wanb");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(),"dot_wanb");
}
void dot_wanb( int nwalk, int nocc, int nchol,
@ -373,8 +373,8 @@ void dot_wanb( int nwalk, int nocc, int nchol,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<double> *>(y),incy);
qmc_hip::hip_check(hipGetLastError(),"dot_wanb");
qmc_hip::hip_check(hipDeviceSynchronize(),"dot_wanb");
qmc_hip::hip_kernel_check(hipGetLastError(),"dot_wanb");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(),"dot_wanb");
}
*/
@ -390,8 +390,8 @@ void dot_wpan_waqn_Fwpq(int nwalk,
static_cast<thrust::complex<double> const>(alpha),
reinterpret_cast<thrust::complex<double> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(F));
qmc_hip::hip_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
}
void dot_wpan_waqn_Fwpq(int nwalk,
@ -406,8 +406,8 @@ void dot_wpan_waqn_Fwpq(int nwalk,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<float>*>(F));
qmc_hip::hip_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
}
@ -423,8 +423,8 @@ void dot_wpan_waqn_Fwpq(int nwalk,
static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(Tab),
reinterpret_cast<thrust::complex<double>*>(F));
qmc_hip::hip_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
}

View File

@ -13,7 +13,7 @@
#include <cassert>
#include <complex>
#include <type_traits>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -41,118 +41,118 @@ __global__ void kernel_fill2D_n(Size N, Size M, T* y, Size lda, T const a)
void fill_n(char* first, int N, int incx, char const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(int* first, int N, int incx, int const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(float* first, int N, int incx, float const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(double* first, int N, int incx, double const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(std::complex<float>* first, int N, int incx, std::complex<float> const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(std::complex<double>* first, int N, int incx, std::complex<double> const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(char* first, int N, char const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(long int* first, long unsigned int N, const long int value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(long unsigned int* first, long unsigned int N, const long unsigned int value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(int* first, int N, int const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(float* first, int N, float const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(double* first, int N, double const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(std::complex<float>* first, int N, std::complex<float> const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill_n(std::complex<double>* first, int N, std::complex<double> const value)
{
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill2D_n(int N, int M, int* A, int lda, int const value)
{
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill2D_n(int N, int M, float* A, int lda, float const value)
{
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill2D_n(int N, int M, double* A, int lda, double const value)
{
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill2D_n(int N, int M, std::complex<double>* A, int lda, std::complex<double> const value)
{
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void fill2D_n(int N, int M, std::complex<float>* A, int lda, std::complex<float> const value)
{
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -16,7 +16,7 @@
//#include "hip_settings.h"
//#include "hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -52,8 +52,8 @@ void get_diagonal_strided(int nk,
hipLaunchKernelGGL(kernel_get_diagonal_strided, dim3(grid_dim), dim3(nthr), 0, 0, nk, ni,
reinterpret_cast<thrust::complex<double> const*>(B), ldb, stride,
reinterpret_cast<thrust::complex<double>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void get_diagonal_strided(int nk,
@ -70,8 +70,8 @@ void get_diagonal_strided(int nk,
hipLaunchKernelGGL(kernel_get_diagonal_strided, dim3(grid_dim), dim3(nthr), 0, 0, nk, ni,
reinterpret_cast<thrust::complex<float> const*>(B), ldb, stride,
reinterpret_cast<thrust::complex<float>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -15,7 +15,7 @@
#include <hip/hip_runtime.h>
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
namespace kernels
@ -61,32 +61,32 @@ void inplace_cast(unsigned long n, std::complex<float>* A, std::complex<double>*
{
hipLaunchKernelGGL(kernel_inplace_cast, dim3(1), dim3(MAX_THREADS_PER_DIM), 0, 0, n,
reinterpret_cast<thrust::complex<float>*>(A), reinterpret_cast<thrust::complex<double>*>(B));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void inplace_cast(unsigned long n, std::complex<double>* A, std::complex<float>* B)
{
hipLaunchKernelGGL(kernel_inplace_cast, dim3(1), dim3(MAX_THREADS_PER_DIM), 0, 0, n,
reinterpret_cast<thrust::complex<double>*>(A), reinterpret_cast<thrust::complex<float>*>(B));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void inplace_cast(long n, std::complex<float>* A, std::complex<double>* B)
{
hipLaunchKernelGGL(kernel_inplace_cast, dim3(1), dim3(MAX_THREADS_PER_DIM), 0, 0, n,
reinterpret_cast<thrust::complex<float>*>(A), reinterpret_cast<thrust::complex<double>*>(B));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void inplace_cast(long n, std::complex<double>* A, std::complex<float>* B)
{
hipLaunchKernelGGL(kernel_inplace_cast, dim3(1), dim3(MAX_THREADS_PER_DIM), 0, 0, n,
reinterpret_cast<thrust::complex<double>*>(A), reinterpret_cast<thrust::complex<float>*>(B));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -13,7 +13,7 @@
#include <complex>
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
namespace kernels
@ -38,8 +38,8 @@ void inplace_product(int nbatch, int n, int m, double const* B, int ldb, std::co
dim3 block_dim(1, MAX_THREADS_PER_DIM, 1);
hipLaunchKernelGGL(kernel_inplace_product, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, n, m, B, ldb,
reinterpret_cast<thrust::complex<double>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void inplace_product(int nbatch, int n, int m, std::complex<double> const* B, int ldb, std::complex<double>* A, int lda)
@ -50,8 +50,8 @@ void inplace_product(int nbatch, int n, int m, std::complex<double> const* B, in
hipLaunchKernelGGL(kernel_inplace_product, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, n, m,
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
reinterpret_cast<thrust::complex<double>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void inplace_product(int nbatch, int n, int m, float const* B, int ldb, std::complex<float>* A, int lda)
@ -61,8 +61,8 @@ void inplace_product(int nbatch, int n, int m, float const* B, int ldb, std::com
dim3 block_dim(1, MAX_THREADS_PER_DIM, 1);
hipLaunchKernelGGL(kernel_inplace_product, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, n, m, B, ldb,
reinterpret_cast<thrust::complex<float>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void inplace_product(int nbatch, int n, int m, std::complex<float> const* B, int ldb, std::complex<float>* A, int lda)
@ -73,8 +73,8 @@ void inplace_product(int nbatch, int n, int m, std::complex<float> const* B, int
hipLaunchKernelGGL(kernel_inplace_product, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, n, m,
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
reinterpret_cast<thrust::complex<float>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -12,7 +12,7 @@
#include <complex>
#include <thrust/complex.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -79,112 +79,112 @@ __global__ void op_div__(thrust::complex<T>* x, thrust::complex<T> inc)
void op_plus(double* x, double inc)
{
hipLaunchKernelGGL(op_plus__, dim3(1), dim3(1), 0, 0, x, inc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_plus(float* x, float inc)
{
hipLaunchKernelGGL(op_plus__, dim3(1), dim3(1), 0, 0, x, inc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_plus(std::complex<double>* x, std::complex<double> inc)
{
hipLaunchKernelGGL(op_plus__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<double>*>(x),
static_cast<thrust::complex<double>>(inc));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_plus(std::complex<float>* x, std::complex<float> inc)
{
hipLaunchKernelGGL(op_plus__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<float>*>(x),
static_cast<thrust::complex<float>>(inc));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
// -=
void op_minus(double* x, double inc)
{
hipLaunchKernelGGL(op_minus__, dim3(1), dim3(1), 0, 0, x, inc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_minus(float* x, float inc)
{
hipLaunchKernelGGL(op_minus__, dim3(1), dim3(1), 0, 0, x, inc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_minus(std::complex<double>* x, std::complex<double> inc)
{
hipLaunchKernelGGL(op_minus__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<double>*>(x),
static_cast<thrust::complex<double>>(inc));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_minus(std::complex<float>* x, std::complex<float> inc)
{
hipLaunchKernelGGL(op_minus__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<float>*>(x),
static_cast<thrust::complex<float>>(inc));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
// *=
void op_times(double* x, double inc)
{
hipLaunchKernelGGL(op_times__, dim3(1), dim3(1), 0, 0, x, inc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_times(float* x, float inc)
{
hipLaunchKernelGGL(op_times__, dim3(1), dim3(1), 0, 0, x, inc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_times(std::complex<double>* x, std::complex<double> inc)
{
hipLaunchKernelGGL(op_times__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<double>*>(x),
static_cast<thrust::complex<double>>(inc));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_times(std::complex<float>* x, std::complex<float> inc)
{
hipLaunchKernelGGL(op_times__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<float>*>(x),
static_cast<thrust::complex<float>>(inc));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
// /=
void op_div(double* x, double inc)
{
hipLaunchKernelGGL(op_div__, dim3(1), dim3(1), 0, 0, x, inc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_div(float* x, float inc)
{
hipLaunchKernelGGL(op_div__, dim3(1), dim3(1), 0, 0, x, inc);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_div(std::complex<double>* x, std::complex<double> inc)
{
hipLaunchKernelGGL(op_div__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<double>*>(x),
static_cast<thrust::complex<double>>(inc));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void op_div(std::complex<float>* x, std::complex<float> inc)
{
hipLaunchKernelGGL(op_div__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<float>*>(x),
static_cast<thrust::complex<float>>(inc));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}

View File

@ -22,40 +22,40 @@ namespace kernels
void sampleGaussianRNG(double* V, int n, rocrand_generator& gen)
{
qmc_hip::rocrand_check(rocrand_generate_normal_double(gen, V, n, 0.0, 1.0), "rocrand_generate_normal_double");
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
// Convert to double if really necessary
void sampleGaussianRNG(float* V, int n, rocrand_generator& gen)
{
qmc_hip::rocrand_check(rocrand_generate_normal(gen, V, n, float(0.0), float(1.0)), "rocrand_generate_normal");
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void sampleGaussianRNG(std::complex<double>* V, int n, rocrand_generator& gen)
{
qmc_hip::rocrand_check(rocrand_generate_normal_double(gen, reinterpret_cast<double*>(V), 2 * n, 0.0, 1.0),
"rocrand_generate_normal_double");
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
// hack hack hack!!!
kernels::zero_complex_part(n, V);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void sampleGaussianRNG(std::complex<float>* V, int n, rocrand_generator& gen)
{
qmc_hip::rocrand_check(rocrand_generate_normal(gen, reinterpret_cast<float*>(V), 2 * n, float(0.0), float(1.0)),
"rocrand_generate_normal");
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
// hack hack hack!!!
kernels::zero_complex_part(n, V);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -14,7 +14,7 @@
#include <hip/hip_runtime.h>
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -98,8 +98,8 @@ void set_identity(int m, int n, double* A, int lda)
dim3 block_dim(xblock_dim, xblock_dim);
dim3 grid_dim(xgrid_dim, ygrid_dim);
hipLaunchKernelGGL(kernel_setIdentity, dim3(grid_dim), dim3(block_dim), 0, 0, m, n, A, lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void set_identity(int m, int n, float* A, int lda)
@ -110,8 +110,8 @@ void set_identity(int m, int n, float* A, int lda)
dim3 block_dim(xblock_dim, xblock_dim);
dim3 grid_dim(xgrid_dim, ygrid_dim);
hipLaunchKernelGGL(kernel_setIdentity, dim3(grid_dim), dim3(block_dim), 0, 0, m, n, A, lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void set_identity(int m, int n, std::complex<double>* A, int lda)
@ -123,8 +123,8 @@ void set_identity(int m, int n, std::complex<double>* A, int lda)
dim3 grid_dim(xgrid_dim, ygrid_dim);
hipLaunchKernelGGL(kernel_setIdentity, dim3(grid_dim), dim3(block_dim), 0, 0, m, n,
reinterpret_cast<thrust::complex<double>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void set_identity(int m, int n, std::complex<float>* A, int lda)
@ -136,8 +136,8 @@ void set_identity(int m, int n, std::complex<float>* A, int lda)
dim3 grid_dim(xgrid_dim, ygrid_dim);
hipLaunchKernelGGL(kernel_setIdentity, dim3(grid_dim), dim3(block_dim), 0, 0, m, n,
reinterpret_cast<thrust::complex<float>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void set_identity_strided(int nbatch, int stride, int m, int n, double* A, int lda)
@ -148,8 +148,8 @@ void set_identity_strided(int nbatch, int stride, int m, int n, double* A, int l
dim3 block_dim(xblock_dim, xblock_dim);
dim3 grid_dim(xgrid_dim, ygrid_dim, nbatch);
hipLaunchKernelGGL(kernel_setIdentity_strided, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, stride, m, n, A, lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void set_identity_strided(int nbatch, int stride, int m, int n, float* A, int lda)
@ -160,8 +160,8 @@ void set_identity_strided(int nbatch, int stride, int m, int n, float* A, int ld
dim3 block_dim(xblock_dim, xblock_dim);
dim3 grid_dim(xgrid_dim, ygrid_dim, nbatch);
hipLaunchKernelGGL(kernel_setIdentity_strided, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, stride, m, n, A, lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void set_identity_strided(int nbatch, int stride, int m, int n, std::complex<double>* A, int lda)
@ -173,8 +173,8 @@ void set_identity_strided(int nbatch, int stride, int m, int n, std::complex<dou
dim3 grid_dim(xgrid_dim, ygrid_dim, nbatch);
hipLaunchKernelGGL(kernel_setIdentity_strided, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, stride, m, n,
reinterpret_cast<thrust::complex<double>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void set_identity_strided(int nbatch, int stride, int m, int n, std::complex<float>* A, int lda)
@ -186,8 +186,8 @@ void set_identity_strided(int nbatch, int stride, int m, int n, std::complex<flo
dim3 grid_dim(xgrid_dim, ygrid_dim, nbatch);
hipLaunchKernelGGL(kernel_setIdentity_strided, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, stride, m, n,
reinterpret_cast<thrust::complex<float>*>(A), lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -15,7 +15,7 @@
#include <thrust/device_ptr.h>
#include "AFQMC/Numerics/detail/HIP/Kernels/strided_range.hpp"
#include "AFQMC/Numerics/detail/HIP/Kernels/strided_2Drange.hpp"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -32,8 +32,8 @@ double sum(int n, double const* x, int incx)
thrust::device_ptr<double const> x_(x);
strided_range<thrust::device_ptr<double const>> strided(x_, x_ + n * incx, incx);
double res = thrust::reduce(strided.begin(), strided.end());
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
return res;
}
@ -44,8 +44,8 @@ std::complex<double> sum(int n, std::complex<double> const* x, int incx)
double R = thrust::reduce(Rstrided.begin(), Rstrided.end());
strided_range<thrust::device_ptr<double const>> Istrided(x_ + 1, x_ + 1 + 2 * n * incx, 2 * incx);
double I = thrust::reduce(Istrided.begin(), Istrided.end());
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
return std::complex<double>(R, I);
}
@ -54,8 +54,8 @@ float sum(int n, float const* x, int incx)
thrust::device_ptr<float const> x_(x);
strided_range<thrust::device_ptr<float const>> strided(x_, x_ + n * incx, incx);
float res = thrust::reduce(strided.begin(), strided.end());
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
return res;
}
@ -66,8 +66,8 @@ std::complex<float> sum(int n, std::complex<float> const* x, int incx)
float R = thrust::reduce(Rstrided.begin(), Rstrided.end());
strided_range<thrust::device_ptr<float const>> Istrided(x_ + 1, x_ + 1 + 2 * n * incx, 2 * incx);
float I = thrust::reduce(Istrided.begin(), Istrided.end());
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
return std::complex<float>(R, I);
}
@ -76,8 +76,8 @@ double sum(int m, int n, double const* x, int lda)
thrust::device_ptr<double const> x_(x);
strided_2Drange<thrust::device_ptr<double const>> strided(x_, x_ + n * lda, lda, m);
double res = thrust::reduce(strided.begin(), strided.end());
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
return res;
}
@ -86,8 +86,8 @@ std::complex<double> sum(int m, int n, std::complex<double> const* x, int lda)
std::complex<double> res;
for (int i = 0; i < m; i++)
res += sum(n, x + i, lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
return res;
}
@ -103,8 +103,8 @@ std::complex<float> sum(int m, int n, std::complex<float> const* x, int lda)
std::complex<float> res;
for (int i = 0; i < m; i++)
res += sum(n, x + i, lda);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
return res;
}

View File

@ -16,7 +16,7 @@
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -131,8 +131,8 @@ void term_by_term_mat_vec_plus(int dim,
hipLaunchKernelGGL(kernel_tbt_mv_plus, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
reinterpret_cast<thrust::complex<double>*>(A), lda,
reinterpret_cast<thrust::complex<double> const*>(x), incx);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void term_by_term_mat_vec_minus(int dim,
@ -149,8 +149,8 @@ void term_by_term_mat_vec_minus(int dim,
hipLaunchKernelGGL(kernel_tbt_mv_minus, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
reinterpret_cast<thrust::complex<double>*>(A), lda,
reinterpret_cast<thrust::complex<double> const*>(x), incx);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void term_by_term_mat_vec_mult(int dim,
@ -167,8 +167,8 @@ void term_by_term_mat_vec_mult(int dim,
hipLaunchKernelGGL(kernel_tbt_mv_mult, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
reinterpret_cast<thrust::complex<double>*>(A), lda,
reinterpret_cast<thrust::complex<double> const*>(x), incx);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void term_by_term_mat_vec_div(int dim,
@ -185,8 +185,8 @@ void term_by_term_mat_vec_div(int dim,
hipLaunchKernelGGL(kernel_tbt_mv_div, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
reinterpret_cast<thrust::complex<double>*>(A), lda,
reinterpret_cast<thrust::complex<double> const*>(x), incx);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void term_by_term_mat_vec_plus(int dim, int nrow, int ncol, std::complex<double>* A, int lda, double* x, int incx)
@ -196,8 +196,8 @@ void term_by_term_mat_vec_plus(int dim, int nrow, int ncol, std::complex<double>
dim3 block_dim(xblock_dim, yblock_dim, 1);
hipLaunchKernelGGL(kernel_tbt_mv_plus, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
reinterpret_cast<thrust::complex<double>*>(A), lda, x, incx);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void term_by_term_mat_vec_minus(int dim, int nrow, int ncol, std::complex<double>* A, int lda, double* x, int incx)
@ -207,8 +207,8 @@ void term_by_term_mat_vec_minus(int dim, int nrow, int ncol, std::complex<double
dim3 block_dim(xblock_dim, yblock_dim, 1);
hipLaunchKernelGGL(kernel_tbt_mv_minus, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
reinterpret_cast<thrust::complex<double>*>(A), lda, x, incx);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void term_by_term_mat_vec_mult(int dim, int nrow, int ncol, std::complex<double>* A, int lda, double* x, int incx)
@ -218,8 +218,8 @@ void term_by_term_mat_vec_mult(int dim, int nrow, int ncol, std::complex<double>
dim3 block_dim(xblock_dim, yblock_dim, 1);
hipLaunchKernelGGL(kernel_tbt_mv_mult, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
reinterpret_cast<thrust::complex<double>*>(A), lda, x, incx);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void term_by_term_mat_vec_div(int dim, int nrow, int ncol, std::complex<double>* A, int lda, double* x, int incx)
@ -229,8 +229,8 @@ void term_by_term_mat_vec_div(int dim, int nrow, int ncol, std::complex<double>*
dim3 block_dim(xblock_dim, yblock_dim, 1);
hipLaunchKernelGGL(kernel_tbt_mv_div, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
reinterpret_cast<thrust::complex<double>*>(A), lda, x, incx);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}

View File

@ -14,7 +14,7 @@
#include <complex>
#include <type_traits>
#include <thrust/complex.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
//#include "AFQMC/Numerics/detail/HIP/Kernels/strided_range.hpp"
namespace kernels
@ -32,44 +32,44 @@ __global__ void kernel_uninitialized_copy_n(Size N, T const* x, Size incx, T* ar
void uninitialized_copy_n(int N, double const* first, int incx, double* array, int incy)
{
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_copy_n(int N, std::complex<double> const* first, int incx, std::complex<double>* array, int incy)
{
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_copy_n(int N, int const* first, int incx, int* array, int incy)
{
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
// long
void uninitialized_copy_n(long N, double const* first, long incx, double* array, long incy)
{
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_copy_n(long N, std::complex<double> const* first, long incx, std::complex<double>* array, long incy)
{
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_copy_n(long N, int const* first, long incx, int* array, long incy)
{
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
} // namespace kernels

View File

@ -20,7 +20,7 @@
#include <thrust/host_vector.h>
#include <thrust/uninitialized_fill.h>
*/
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -39,99 +39,99 @@ __global__ void kernel_uninitialized_fill_n(Size N, T* x, T const a)
void uninitialized_fill_n(bool* first, int N, bool const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(int* first, int N, int const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(float* first, int N, float const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(double* first, int N, double const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(std::complex<float>* first, int N, std::complex<float> const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(std::complex<double>* first, int N, std::complex<double> const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(double2* first, int N, double2 const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(bool* first, long N, bool const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(int* first, long N, int const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(float* first, long N, float const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(double* first, long N, double const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(std::complex<float>* first, long N, std::complex<float> const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(std::complex<double>* first, long N, std::complex<double> const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void uninitialized_fill_n(double2* first, long N, double2 const value)
{
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}

View File

@ -16,7 +16,7 @@
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -188,8 +188,8 @@ void vKKwij_to_vwKiKj(int nwalk,
dim3 grid_dim(nkpts, nkpts, nwalk);
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
nmo, nmo0, A, B);
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
}
void vKKwij_to_vwKiKj(int nwalk,
@ -208,8 +208,8 @@ void vKKwij_to_vwKiKj(int nwalk,
dim3 grid_dim(nkpts, nkpts, nwalk);
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
nmo, nmo0, A, B);
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
}
void vKKwij_to_vwKiKj(int nwalk,
@ -228,8 +228,8 @@ void vKKwij_to_vwKiKj(int nwalk,
dim3 grid_dim(nkpts, nkpts, nwalk);
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
nmo, nmo0, A, B);
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
}
void vKKwij_to_vwKiKj(int nwalk,
@ -249,8 +249,8 @@ void vKKwij_to_vwKiKj(int nwalk,
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
nmo, nmo0, reinterpret_cast<thrust::complex<double> const*>(A),
reinterpret_cast<thrust::complex<double>*>(B));
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
}
void vKKwij_to_vwKiKj(int nwalk,
@ -270,8 +270,8 @@ void vKKwij_to_vwKiKj(int nwalk,
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
nmo, nmo0, reinterpret_cast<thrust::complex<float> const*>(A),
reinterpret_cast<thrust::complex<float>*>(B));
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
}
void vKKwij_to_vwKiKj(int nwalk,
@ -291,8 +291,8 @@ void vKKwij_to_vwKiKj(int nwalk,
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
nmo, nmo0, reinterpret_cast<thrust::complex<float> const*>(A),
reinterpret_cast<thrust::complex<double>*>(B));
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
}

View File

@ -16,7 +16,7 @@
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -113,8 +113,8 @@ void vbias_from_v1(int nwalk,
ncholpQ, ncholpQ0, static_cast<thrust::complex<double> const>(alpha),
reinterpret_cast<thrust::complex<double> const*>(v1),
reinterpret_cast<thrust::complex<double>*>(vb));
qmc_hip::hip_check(hipGetLastError(), "vbias_from_v1");
qmc_hip::hip_check(hipDeviceSynchronize(), "vbias_from_v1");
qmc_hip::hip_kernel_check(hipGetLastError(), "vbias_from_v1");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vbias_from_v1");
}
void vbias_from_v1(int nwalk,
@ -136,8 +136,8 @@ void vbias_from_v1(int nwalk,
ncholpQ, ncholpQ0, static_cast<thrust::complex<float> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(v1),
reinterpret_cast<thrust::complex<float>*>(vb));
qmc_hip::hip_check(hipGetLastError(), "vbias_from_v1");
qmc_hip::hip_check(hipDeviceSynchronize(), "vbias_from_v1");
qmc_hip::hip_kernel_check(hipGetLastError(), "vbias_from_v1");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vbias_from_v1");
}
void vbias_from_v1(int nwalk,
@ -159,8 +159,8 @@ void vbias_from_v1(int nwalk,
ncholpQ, ncholpQ0, static_cast<thrust::complex<double> const>(alpha),
reinterpret_cast<thrust::complex<float> const*>(v1),
reinterpret_cast<thrust::complex<double>*>(vb));
qmc_hip::hip_check(hipGetLastError(), "vbias_from_v1");
qmc_hip::hip_check(hipDeviceSynchronize(), "vbias_from_v1");
qmc_hip::hip_kernel_check(hipGetLastError(), "vbias_from_v1");
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vbias_from_v1");
}
} // namespace kernels

View File

@ -15,7 +15,7 @@
#include <hip/hip_runtime.h>
#include <thrust/complex.h>
#include <hip/hip_runtime.h>
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
namespace kernels
{
@ -33,8 +33,8 @@ void zero_complex_part(int n, std::complex<double>* x)
int grid_dim = (n + block_dim - 1) / block_dim;
hipLaunchKernelGGL(kernel_zero_complex_part, dim3(grid_dim), dim3(block_dim), 0, 0, n,
reinterpret_cast<thrust::complex<double>*>(x));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void zero_complex_part(int n, std::complex<float>* x)
@ -43,8 +43,8 @@ void zero_complex_part(int n, std::complex<float>* x)
int grid_dim = (n + block_dim - 1) / block_dim;
hipLaunchKernelGGL(kernel_zero_complex_part, dim3(grid_dim), dim3(block_dim), 0, 0, n,
reinterpret_cast<thrust::complex<float>*>(x));
qmc_hip::hip_check(hipGetLastError());
qmc_hip::hip_check(hipDeviceSynchronize());
qmc_hip::hip_kernel_check(hipGetLastError());
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
}
void zero_complex_part(int n, double* x) { return; }

View File

@ -50,9 +50,7 @@ template<typename T, typename Q>
inline static void copy(int n, T const* x, int incx, device_pointer<Q> y, int incy)
{
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
if (hipSuccess !=
hipMemcpy2D(to_address(y), sizeof(Q) * incy, x, sizeof(T) * incx, sizeof(T), n, hipMemcpyHostToDevice))
throw std::runtime_error("Error: hipMemcpy2D returned error code.");
arch::memcopy2D(to_address(y), sizeof(Q) * incy, x, sizeof(T) * incx, sizeof(T), n, arch::memcopyH2D, "blas_hip_gpu_ptr::copy");
}
template<typename T, typename Q>
@ -60,9 +58,7 @@ inline static void copy(int n, device_pointer<Q> x, int incx, T* y, int incy)
{
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
assert(sizeof(Q) == sizeof(T));
if (hipSuccess !=
hipMemcpy2D(y, sizeof(T) * incy, to_address(x), sizeof(Q) * incx, sizeof(T), n, hipMemcpyDeviceToHost))
throw std::runtime_error("Error: hipMemcpy2D returned error code.");
arch::memcopy2D(y, sizeof(T) * incy, to_address(x), sizeof(Q) * incx, sizeof(T), n, arch::memcopyD2H, "blas_hip_gpu_ptr::copy");
}
// scal Specializations
@ -325,7 +321,7 @@ inline static void gemmBatched(char Atrans,
{
static_assert(std::is_same<typename std::decay<Q1>::type, T>::value, "Wrong dispatch.\n");
static_assert(std::is_same<typename std::decay<Q2>::type, T>::value, "Wrong dispatch.\n");
// replace with single call to hipMalloc and hipMemcpy
// replace with single call to arch::malloc and arch::memcopy
T **A_d, **B_d, **C_d;
Q1** A_h;
Q2** B_h;
@ -339,17 +335,17 @@ inline static void gemmBatched(char Atrans,
B_h[i] = to_address(B[i]);
C_h[i] = to_address(C[i]);
}
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
hipMalloc((void**)&B_d, batchSize * sizeof(*B_h));
hipMalloc((void**)&C_d, batchSize * sizeof(*C_h));
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
hipMemcpy(B_d, B_h, batchSize * sizeof(*B_h), hipMemcpyHostToDevice);
hipMemcpy(C_d, C_h, batchSize * sizeof(*C_h), hipMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
hipblas::hipblas_gemmBatched(*(A[0]).handles.hipblas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
C_d, ldc, batchSize);
hipFree(A_d);
hipFree(B_d);
hipFree(C_d);
arch::free(A_d);
arch::free(B_d);
arch::free(C_d);
delete[] A_h;
delete[] B_h;
delete[] C_h;
@ -381,7 +377,7 @@ inline static void gemmBatched(char Atrans,
static_assert(std::is_same<typename std::decay<Q1>::type, T2>::value, "Wrong dispatch.\n");
static_assert(std::is_same<typename std::decay<Q2>::type, T>::value, "Wrong dispatch.\n");
assert(Atrans == 'N' || Atrans == 'n');
// replace with single call to hipMalloc and hipMemcpy
// replace with single call to arch::malloc and arch::memcopy
T2** A_d;
T** B_d;
T2** C_d;
@ -397,17 +393,17 @@ inline static void gemmBatched(char Atrans,
B_h[i] = to_address(B[i]);
C_h[i] = to_address(C[i]);
}
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
hipMalloc((void**)&B_d, batchSize * sizeof(*B_h));
hipMalloc((void**)&C_d, batchSize * sizeof(*C_h));
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
hipMemcpy(B_d, B_h, batchSize * sizeof(*B_h), hipMemcpyHostToDevice);
hipMemcpy(C_d, C_h, batchSize * sizeof(*C_h), hipMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
hipblas::hipblas_gemmBatched(*(A[0]).handles.hipblas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
C_d, ldc, batchSize);
hipFree(A_d);
hipFree(B_d);
hipFree(C_d);
arch::free(A_d);
arch::free(B_d);
arch::free(C_d);
delete[] A_h;
delete[] B_h;
delete[] C_h;
@ -461,28 +457,24 @@ template<typename T, typename T2>
inline static void copy2D(int N, int M, device_pointer<T> src, int lda, device_pointer<T2> dst, int ldb)
{
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
if (hipSuccess !=
hipMemcpy2D(to_address(dst), sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N,
hipMemcpyDeviceToDevice))
throw std::runtime_error("Error: hipMemcpy2D returned error code in copy2D.");
arch::memcopy2D(to_address(dst), sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N,
arch::memcopyD2D, "blas_hip_gpu_ptr::copy2D");
}
template<typename T, typename T2>
inline static void copy2D(int N, int M, T const* src, int lda, device_pointer<T2> dst, int ldb)
{
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
if (hipSuccess !=
hipMemcpy2D(to_address(dst), sizeof(T2) * ldb, src, sizeof(T) * lda, M * sizeof(T), N, hipMemcpyHostToDevice))
throw std::runtime_error("Error: hipMemcpy2D returned error code in copy2D.");
arch::memcopy2D(to_address(dst), sizeof(T2) * ldb, src, sizeof(T) * lda, M * sizeof(T), N,
arch::memcopyH2D, "blas_hip_gpu_ptr::copy2D");
}
template<typename T, typename T2>
inline static void copy2D(int N, int M, device_pointer<T> src, int lda, T2* dst, int ldb)
{
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
if (hipSuccess !=
hipMemcpy2D(dst, sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N, hipMemcpyDeviceToHost))
throw std::runtime_error("Error: hipMemcpy2D returned error code in copy2D.");
arch::memcopy2D(dst, sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N,
arch::memcopyD2H, "blas_hip_gpu_ptr::copy2D");
}
template<typename T, typename T2>

View File

@ -18,7 +18,7 @@
namespace qmc_hip
{
void hip_check(hipError_t sucess, std::string message)
void hip_kernel_check(hipError_t sucess, std::string message)
{
if (hipSuccess != sucess)
{

View File

@ -20,7 +20,7 @@
namespace qmc_hip
{
void hip_check(hipError_t sucess, std::string message = "");
void hip_kernel_check(hipError_t sucess, std::string message = "");
void rocrand_check(rocrand_status sucess, std::string message = "");
} // namespace qmc_hip

View File

@ -1,29 +0,0 @@
#include "hipblas.h"
#include "AFQMC/Numerics/detail/HIP/hipblas_utils.h"
namespace hipblas
{
hipblasStatus_t rocBLASStatusToHIPStatusAFQMC(rocblas_status_ error)
{
switch (error)
{
case rocblas_status_success:
return HIPBLAS_STATUS_SUCCESS;
case rocblas_status_invalid_handle:
return HIPBLAS_STATUS_NOT_INITIALIZED;
case rocblas_status_not_implemented:
return HIPBLAS_STATUS_NOT_SUPPORTED;
case rocblas_status_invalid_pointer:
return HIPBLAS_STATUS_INVALID_VALUE;
case rocblas_status_invalid_size:
return HIPBLAS_STATUS_INVALID_VALUE;
case rocblas_status_memory_error:
return HIPBLAS_STATUS_ALLOC_FAILED;
case rocblas_status_internal_error:
return HIPBLAS_STATUS_INTERNAL_ERROR;
default:
throw "Unimplemented status";
}
}
} // namespace hipblas

View File

@ -1,28 +0,0 @@
//////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source
// License. See LICENSE file in top directory for details.
//
// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
//
// File developed by:
// Lawrence Livermore National Laboratory
//
// File created by:
// Miguel A. Morales, moralessilva2@llnl.gov
// Lawrence Livermore National Laboratory
////////////////////////////////////////////////////////////////////////////////
#ifndef AFQMC_HIPBLAS_UTILS_H
#define AFQMC_HIPBLAS_UTILS_H
#include "hipblas.h"
#include "rocblas.h"
namespace hipblas
{
// TODO: Temporary hack waiting for upstream version of hipblas
hipblasStatus_t rocBLASStatusToHIPStatusAFQMC(rocblas_status_ error);
} // namespace hipblas
#endif

View File

@ -17,12 +17,10 @@
#include "hipblas.h"
#include "rocsolver.h"
#include "AFQMC/Memory/HIP/hip_utilities.h"
#include "AFQMC/Numerics/detail/HIP/hipblas_utils.h"
namespace hipblas
{
using qmc_hip::hipblasOperation;
using qmc_hip::rocblasOperation;
// Level-1
inline hipblasStatus_t hipblas_copy(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy)
@ -821,13 +819,11 @@ inline hipblasStatus_t hipblas_geam(hipblasHandle_t handle,
std::complex<float>* C,
int ldc)
{
hipblasStatus_t success = rocBLASStatusToHIPStatusAFQMC(
rocblas_cgeam((rocblas_handle)handle, rocblasOperation(Atrans), rocblasOperation(Btrans), M, N,
reinterpret_cast<rocblas_float_complex const*>(&alpha),
reinterpret_cast<rocblas_float_complex const*>(A), lda,
reinterpret_cast<rocblas_float_complex const*>(&beta),
reinterpret_cast<rocblas_float_complex const*>(B), ldb, reinterpret_cast<rocblas_float_complex*>(C),
ldc));
hipblasStatus_t success =
hipblasCgeam(handle, hipblasOperation(Atrans), hipblasOperation(Btrans), M, N,
reinterpret_cast<hipblasComplex const*>(&alpha), reinterpret_cast<hipblasComplex const*>(A), lda,
reinterpret_cast<hipblasComplex const*>(&beta), reinterpret_cast<hipblasComplex const*>(B), ldb,
reinterpret_cast<hipblasComplex*>(C), ldc);
hipDeviceSynchronize();
return success;
}
@ -846,13 +842,12 @@ inline hipblasStatus_t hipblas_geam(hipblasHandle_t handle,
std::complex<double>* C,
int ldc)
{
hipblasStatus_t success = rocBLASStatusToHIPStatusAFQMC(
rocblas_zgeam((rocblas_handle)handle, rocblasOperation(Atrans), rocblasOperation(Btrans), M, N,
reinterpret_cast<rocblas_double_complex const*>(&alpha),
reinterpret_cast<rocblas_double_complex const*>(A), lda,
reinterpret_cast<rocblas_double_complex const*>(&beta),
reinterpret_cast<rocblas_double_complex const*>(B), ldb,
reinterpret_cast<rocblas_double_complex*>(C), ldc));
hipblasStatus_t success = hipblasZgeam(handle, hipblasOperation(Atrans), hipblasOperation(Btrans), M, N,
reinterpret_cast<hipblasDoubleComplex const*>(&alpha),
reinterpret_cast<hipblasDoubleComplex const*>(A), lda,
reinterpret_cast<hipblasDoubleComplex const*>(&beta),
reinterpret_cast<hipblasDoubleComplex const*>(B), ldb,
reinterpret_cast<hipblasDoubleComplex*>(C), ldc);
hipDeviceSynchronize();
return success;
}

View File

@ -69,21 +69,15 @@ inline static void getrf(const int n,
int& st,
device_pointer<R> work)
{
//rocsolver_handle handle;
//rocsolver_create_handle(&handle);
//std::cout << (*a.handles.rocsolver_handle_)->device_arch_id() << std::endl;
rocsolverStatus_t status = rocsolver::rocsolver_getrf(*a.handles.rocsolver_handle_, n, m, to_address(a), lda,
to_address(work), to_address(piv), to_address(piv) + n);
//rocsolverStatus_t status = rocsolver::rocsolver_getrf(handle, n, m,
//to_address(a), lda, to_address(work), to_address(piv), to_address(piv)+n);
hipMemcpy(&st, to_address(piv) + n, sizeof(int), hipMemcpyDeviceToHost);
arch::memcopy(&st, to_address(piv) + n, sizeof(int), arch::memcopyD2H);
if (rocblas_status_success != status)
{
std::cerr << " hipblas_getrf status, info: " << status << " " << st << std::endl;
std::cerr.flush();
throw std::runtime_error("Error: hipblas_getrf returned error code.");
}
//rocsolver_destroy_handle(handle);
}
// getrfBatched
@ -100,13 +94,13 @@ inline static void getrfBatched(const int n,
A_h = new T*[batchSize];
for (int i = 0; i < batchSize; i++)
A_h[i] = to_address(a[i]);
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
hipblasStatus_t status = hipblas::hipblas_getrfBatched(*(a[0]).handles.hipblas_handle, n, A_d, lda, to_address(piv),
to_address(info), batchSize);
if (HIPBLAS_STATUS_SUCCESS != status)
throw std::runtime_error("Error: hipblas_getrf returned error code.");
hipFree(A_d);
arch::free(A_d);
delete[] A_h;
}
@ -135,11 +129,7 @@ inline static void getri(int n,
// info isn't returned from ?getrs.
int* info;
if (hipSuccess != hipMalloc((void**)&info, sizeof(int)))
{
std::cerr << " Error getri: Error allocating on GPU." << std::endl;
throw std::runtime_error("Error: hipMalloc returned error code.");
}
arch::malloc((void**)&info, sizeof(int), "lapack_hip_gpu_ptr::getri");
kernels::set_identity(n, n, to_address(work), n);
//std::cout << (*work).num_elements() << " " << (*a).num_elements() << std::endl;
@ -147,10 +137,10 @@ inline static void getri(int n,
rocsolver::rocsolver_getrs(*a.handles.rocsolver_handle_, rocblas_operation_none, n, n, to_address(a), lda,
to_address(piv), to_address(work), n, info))
throw std::runtime_error("Error: rocsolver_getrs returned error code.");
hipMemcpy(to_address(a), to_address(work), n * n * sizeof(T), hipMemcpyDeviceToDevice);
//hipMemcpy(&status,info,sizeof(int),hipMemcpyDeviceToHost);
arch::memcopy(to_address(a), to_address(work), n * n * sizeof(T), arch::memcopyD2D);
//arch::memcopy(&status,info,sizeof(int),arch::memcopyD2H);
status = 0;
hipFree(info);
arch::free(info);
}
// getriBatched
@ -174,16 +164,16 @@ inline static void getriBatched(int n,
A_h[i] = to_address(a[i]);
C_h[i] = to_address(ainv[i]);
}
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
hipMalloc((void**)&C_d, batchSize * sizeof(*C_h));
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
hipMemcpy(C_d, C_h, batchSize * sizeof(*C_h), hipMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
hipblasStatus_t status = hipblas::hipblas_getriBatched(*(a[0]).handles.hipblas_handle, HIPBLAS_OP_N, n, n, A_d, lda,
to_address(piv), C_d, ldc, to_address(info), batchSize);
if (HIPBLAS_STATUS_SUCCESS != status)
throw std::runtime_error("Error: hipblas_getri returned error code.");
hipFree(A_d);
hipFree(C_d);
arch::free(A_d);
arch::free(C_d);
delete[] A_h;
delete[] C_h;
}
@ -207,16 +197,16 @@ inline static void matinvBatched(int n,
A_h[i] = to_address(a[i]);
C_h[i] = to_address(ainv[i]);
}
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
hipMalloc((void**)&C_d, batchSize * sizeof(*C_h));
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
hipMemcpy(C_d, C_h, batchSize * sizeof(*C_h), hipMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
hipblasStatus_t status = hipblas::hipblas_matinvBatched(*(a[0]).handles.hipblas_handle, n, A_d, lda, C_d, lda_inv,
to_address(info), batchSize);
if (HIPBLAS_STATUS_SUCCESS != status)
throw std::runtime_error("Error: hipblas_matinv returned error code.");
hipFree(A_d);
hipFree(C_d);
arch::free(A_d);
arch::free(C_d);
delete[] A_h;
delete[] C_h;
}
@ -244,20 +234,20 @@ inline static void geqrf(int M,
//size_t dim = std::min(M,N);
//std::vector<T> piv(dim,0.0);
//T* dpiv;
//if(hipSuccess != hipMalloc(&dpiv,sizeof(T)*dim)) {
//if(hipSuccess != arch::malloc(&dpiv,sizeof(T)*dim)) {
//std::cerr << " Error gqr: Error allocating piv on GPU." << std::endl;
//throw std::runtime_error("Error: hipMalloc returned error code.");
//throw std::runtime_error("Error: arch::malloc returned error code.");
//}
rocsolverStatus_t status =
rocsolver::rocsolver_geqrf(*A.handles.rocsolver_handle_, M, N, to_address(A), LDA, to_address(TAU));
//hipMemcpy(piv.data(),dpiv,sizeof(T)*dim,hipMemcpyDeviceToHost);
//arch::memcopy(piv.data(),dpiv,sizeof(T)*dim,arch::memcopyD2H);
if (rocblas_status_success != status)
{
std::cerr << " hipblas_geqrf status, info: " << status << std::endl;
std::cerr.flush();
throw std::runtime_error("Error: hipblas_geqrf returned error code.");
}
//hipFree(dpiv);
//arch::free(dpiv);
INFO = 0;
}
@ -313,7 +303,7 @@ void static gqr(int M,
}
// Not returned from rocm
INFO = 0;
//hipFree(piv);
//arch::free(piv);
}
template<typename T, typename I>
@ -375,8 +365,8 @@ void static glq(int M,
//T_h[i] = to_address(TAU[i]);
//T **B_d;
//std::vector<int> inf(batchSize);
//hipMalloc((void **)&B_d, 2*batchSize*sizeof(*B_h));
//hipMemcpy(B_d, B_h, 2*batchSize*sizeof(*B_h), hipMemcpyHostToDevice);
//arch::malloc((void **)&B_d, 2*batchSize*sizeof(*B_h));
//arch::memcopy(B_d, B_h, 2*batchSize*sizeof(*B_h), arch::memcopyH2D);
//T **A_d(B_d);
//T **T_d(B_d+batchSize);
//int pstride = std::min(M,N);
@ -385,7 +375,7 @@ void static glq(int M,
//pstride, batchSize);
//if(rocblas_status_success != status)
//throw std::runtime_error("Error: hipblas_geqrfBatched returned error code.");
//hipFree(B_d);
//arch::free(B_d);
//delete [] B_h;
//}
@ -404,13 +394,13 @@ inline static void geqrfStrided(int M,
for (int i = 0; i < batchSize; i++)
A_h[i] = to_address(A) + i * Astride;
T** A_d;
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
rocsolverStatus_t status = rocsolver::rocsolver_geqrf_batched(*A.handles.rocsolver_handle_, M, N, A_d, LDA,
to_address(TAU), Tstride, batchSize);
if (rocblas_status_success != status)
throw std::runtime_error("Error: hipblas_geqrfStrided returned error code.");
hipFree(A_d);
arch::free(A_d);
delete[] A_h;
}
@ -438,18 +428,18 @@ inline static void gesvd(char jobU,
int& st)
{
int* devSt;
hipMalloc((void**)&devSt, sizeof(int));
arch::malloc((void**)&devSt, sizeof(int));
rocsolverStatus_t status =
rocsolver::rocsolver_gesvd(*A.handles.rocsolver_handle_, jobU, jobVT, m, n, to_address(A), lda, to_address(S),
to_address(U), ldu, to_address(VT), ldvt, to_address(W), lw, devSt);
hipMemcpy(&st, devSt, sizeof(int), hipMemcpyDeviceToHost);
arch::memcopy(&st, devSt, sizeof(int), arch::memcopyD2H);
if (rocblas_status_success != status)
{
std::cerr << " hipblas_gesvd status, info: " << status << " " << st << std::endl;
std::cerr.flush();
throw std::runtime_error("Error: hipblas_gesvd returned error code.");
}
hipFree(devSt);
arch::free(devSt);
}
template<typename T, typename R>
@ -470,18 +460,18 @@ inline static void gesvd(char jobU,
int& st)
{
int* devSt;
hipMalloc((void**)&devSt, sizeof(int));
arch::malloc((void**)&devSt, sizeof(int));
rocsolverStatus_t status =
rocsolver::rocsolver_gesvd(*A.handles.rocsolver_handle_, jobU, jobVT, m, n, to_address(A), lda, to_address(S),
to_address(U), ldu, to_address(VT), ldvt, to_address(W), lw, devSt);
hipMemcpy(&st, devSt, sizeof(int), hipMemcpyDeviceToHost);
arch::memcopy(&st, devSt, sizeof(int), arch::memcopyD2H);
if (rocblas_status_success != status)
{
std::cerr << " hipblas_gesvd status, info: " << status << " " << st << std::endl;
std::cerr.flush();
throw std::runtime_error("Error: hipblas_gesvd returned error code.");
}
hipFree(devSt);
arch::free(devSt);
}

View File

@ -467,7 +467,7 @@ inline rocsolverStatus_t rocsolver_gqr_strided(rocsolverHandle_t handle,
qmc_hip::hip_check(hipDeviceSynchronize(), "rocsolver_gqr_strided");
qmc_hip::hipsolver_check(success, "rocsolver_gqr_strided");
}
//qmc_hip::hip_check(hipDeviceSynchronize(),"rocsolver_gqr_strided");
qmc_hip::hip_check(hipDeviceSynchronize(),"rocsolver_gqr_strided");
qmc_hip::hip_check(hipGetLastError(), "rocsolver_gqr_strided");
//qmc_hip::hipsolver_check(rocsolver_set_stream(handle,s0), "rocsolver_setStream");

View File

@ -46,10 +46,8 @@ void csrmv(const char transa,
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
// somehow need to check if the matrix is compact!
int pb, pe;
if (hipSuccess != hipMemcpy(std::addressof(pb), to_address(pntrb), sizeof(int), hipMemcpyDeviceToHost))
throw std::runtime_error("Error: hipMemcpy returned error code in csrmv.");
if (hipSuccess != hipMemcpy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), hipMemcpyDeviceToHost))
throw std::runtime_error("Error: hipMemcpy returned error code in csrmv.");
arch::memcopy(std::addressof(pb), to_address(pntrb), sizeof(int), arch::memcopyD2H, "sparse_hip_gpu_ptr::csrmv");
arch::memcopy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), arch::memcopyD2H, "sparse_hip_gpu_ptr::csrmv");
int nnz = pe - pb;
if (HIPSPARSE_STATUS_SUCCESS !=
hipsparse::hipsparse_csrmv(*A.handles.hipsparse_handle, transa, M, K, nnz, alpha,
@ -78,10 +76,8 @@ void csrmm(const char transa,
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
// somehow need to check if the matrix is compact!
int pb, pe;
if (hipSuccess != hipMemcpy(std::addressof(pb), to_address(pntrb), sizeof(int), hipMemcpyDeviceToHost))
throw std::runtime_error("Error: hipMemcpy returned error code in csrmm.");
if (hipSuccess != hipMemcpy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), hipMemcpyDeviceToHost))
throw std::runtime_error("Error: hipMemcpy returned error code in csrmm.");
arch::memcopy(std::addressof(pb), to_address(pntrb), sizeof(int), arch::memcopyD2H, "sparse_hip_gpu_ptr::csrmm");
arch::memcopy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), arch::memcopyD2H, "sparse_hip_gpu_ptr::csrmm");
int nnz = pe - pb;
if (transa == 'N')
{

View File

@ -0,0 +1,17 @@
#////////////////////////////////////////////////////////////////////////////////////////////
#// This file is distributed under the University of Illinois/NCSA Open Source License.
#// See LICENSE file in top directory for details.
#//
#// Copyright (c) 2020 QMCPACK developers.
#//
#// File developed by: Fionn Malone malone14@llnl.gov, Lawrence Livermore National Laboratory
#//
#// File created by: Fionn Malone malone14@llnl.gov, Lawrence Livermore National Laboratory
#////////////////////////////////////////////////////////////////////////////////////////////
MESSAGE("Building AFQMC performance executable ")
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${qmcpack_BINARY_DIR}/bin)
ADD_EXECUTABLE(qmc-afqmc-performance performance.cpp)
TARGET_LINK_LIBRARIES(qmc-afqmc-performance afqmc Math::BLAS_LAPACK)

View File

@ -0,0 +1,424 @@
///////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source
// License. See LICENSE file in top directory for details.
//
// Copyright (c) 2020 QMCPACK developers.
//
// File developed by: Fionn Malone, malone14@llnl.gov, LLNL
//
// File created by: Fionn Malone, malone14@llnl.gov, LLNL
////////////////////////////////////////////////////////////////////////////////
#include "Configuration.h"
#include <vector>
#include <random>
#include "Utilities/Timer.h"
#include "AFQMC/config.h"
#include "AFQMC/config.0.h"
#include "AFQMC/Numerics/ma_blas.hpp"
#include "AFQMC/Numerics/batched_operations.hpp"
#include "AFQMC/Numerics/ma_operations.hpp"
#include "AFQMC/Matrix/tests/matrix_helpers.h"
#include "AFQMC/Memory/buffer_allocators.h"
#include "AFQMC/Memory/arch.hpp"
#include "multi/array.hpp"
#include "multi/array_ref.hpp"
using namespace qmcplusplus;
using namespace afqmc;
using std::copy_n;
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
template<typename T>
using Alloc = device::device_allocator<T>;
#else
template<typename T>
using Alloc = std::allocator<T>;
#endif
template<typename T>
using pointer = typename Alloc<T>::pointer;
//template<typename T>
//using buffer_alloc_type = device_buffer_type<T>;
//using buffer_ialloc_type = device_buffer_type<int>;
template<typename T>
using Tensor1D = boost::multi::array<T, 1, Alloc<T>>;
template<typename T>
using Tensor2D = boost::multi::array<T, 2, Alloc<T>>;
template<typename T>
using Tensor3D = boost::multi::array<T, 3, Alloc<T>>;
template<typename T>
using Tensor3D_ref = boost::multi::array_ref<T, 3, pointer<T>>;
template<typename T>
using Tensor2D_ref = boost::multi::array_ref<T, 2, pointer<T>>;
template<typename T>
using Tensor1D_ref = boost::multi::array_ref<T, 1, pointer<T>>;
template<typename T>
void fillRandomMatrix(std::vector<T>& vec)
{
std::mt19937 generator(0);
std::normal_distribution<T> distribution(0.0, 1.0);
// avoid uninitialized warning
T tmp = distribution(generator);
for (int i = 0; i < vec.size(); i++)
{
T val = distribution(generator);
vec[i] = val;
}
}
template<typename T>
void fillRandomMatrix(std::vector<std::complex<T>>& vec)
{
std::mt19937 generator(0);
std::normal_distribution<T> distribution(0.0, 1.0);
T tmp = distribution(generator);
for (int i = 0; i < vec.size(); i++)
{
T re = distribution(generator);
T im = distribution(generator);
vec[i] = std::complex<T>(re, im);
}
}
template<class Allocator, class Buff>
void timeBatchedQR(std::ostream& out, Allocator& alloc, Buff& buffer, int nbatch, int m, int n)
{
using T = typename Allocator::value_type;
int offset = 0;
Tensor3D_ref<T> A(buffer.origin(), {nbatch, m, n});
offset += A.num_elements();
Tensor3D_ref<T> AT(buffer.origin() + offset, {nbatch, n, m});
offset += AT.num_elements();
Tensor2D_ref<T> T_(buffer.origin() + offset, {nbatch, m});
offset += T_.num_elements();
Tensor2D_ref<T> scl(buffer.origin() + offset, {nbatch, m});
offset += T_.num_elements();
int sz = ma::gqr_optimal_workspace_size(AT[0]);
//std::cout << buffer.num_elements() << " " << 2*nbatch*m*n + 2*nbatch*m + nbatch*sz << " " << offset << std::endl;
Tensor1D_ref<T> WORK(buffer.origin() + offset, boost::multi::iextensions<1u>{nbatch * sz});
Alloc<int> ialloc{};
std::vector<pointer<T>> Aarray;
Tensor1D<int> IWORK(boost::multi::iextensions<1u>{nbatch * (m + 1)}, ialloc);
using std::copy_n;
for (int i = 0; i < nbatch; i++)
{
Aarray.emplace_back(A[i].origin());
}
// Actual profile.
Timer timer;
for (int i = 0; i < nbatch; i++)
ma::transpose(A[i], AT[i]);
double ttrans = timer.elapsed();
timer.restart();
geqrfStrided(m, n, AT.origin(), m, m * n, T_.origin(), m, IWORK.origin(), nbatch);
double tgeqrf = timer.elapsed();
timer.restart();
for (int i = 0; i < nbatch; i++)
determinant_from_geqrf(n, AT[i].origin(), m, scl[i].origin(), T(0.0));
double tdet = timer.elapsed();
timer.restart();
gqrStrided(m, n, n, AT.origin(), m, m * n, T_.origin(), m, WORK.origin(), sz, IWORK.origin(), nbatch);
double tgqr = timer.elapsed();
out << " " << std::setw(5) << nbatch << " " << std::setw(5) << m << " " << std::setw(5) << n << " "
<< std::scientific << ttrans << " " << tgeqrf << " " << tdet << " " << tgqr << "\n";
}
template<class Allocator, class Buff>
void timeQR(std::ostream& out, Allocator& alloc, Buff& buffer, int m)
{
using T = typename Allocator::value_type;
int offset = 0;
Tensor2D_ref<T> A(buffer.origin(), {m, m});
offset += A.num_elements();
Tensor1D_ref<T> TAU(buffer.origin() + offset, {m});
offset += TAU.num_elements();
int sz = ma::gqr_optimal_workspace_size(A);
Tensor1D_ref<T> WORK(buffer.origin() + offset, boost::multi::iextensions<1u>{sz});
Timer timer;
using ma::geqrf;
geqrf(A, TAU, WORK);
double tgeqrf = timer.elapsed();
using ma::gqr;
timer.restart();
gqr(A, TAU, WORK);
double tgqr = timer.elapsed();
out << " " << std::setw(5) << m << " " << std::setw(5) << m << " " << std::scientific << tgeqrf << " "
<< " " << tgqr << "\n";
}
template<class Allocator, class Buff>
void timeExchangeKernel(std::ostream& out, Allocator& alloc, Buff& buffer, int nbatch, int nwalk, int nocc, int nchol)
{
using T = typename Allocator::value_type;
int offset = 0;
Tensor3D_ref<T> Twabn(buffer.origin(), {2 * nbatch, nwalk * nocc, nocc * nchol});
offset += Twabn.num_elements();
Tensor1D_ref<T> scal(buffer.origin() + offset, boost::multi::iextensions<1u>{nbatch});
offset += scal.num_elements();
Tensor1D_ref<T> result(buffer.origin() + offset, boost::multi::iextensions<1u>{nwalk});
using ma::batched_dot_wabn_wban;
Timer timer;
batched_dot_wabn_wban(nbatch, nwalk, nocc, nchol, scal.origin(), Twabn.origin(), to_address(result.data()), 1);
double time = timer.elapsed();
out << " " << std::setw(5) << nbatch << " " << std::setw(5) << nwalk << " " << std::setw(5) << nocc << " "
<< std::setw(5) << nchol << " " << std::scientific << time << "\n";
}
template<class Allocator, class Buff>
void timeBatchedGemm(std::ostream& out, Allocator& alloc, Buff& buffer, int nbatch, int m)
{
using T = typename Allocator::value_type;
int offset = 0;
Tensor2D_ref<T> a(buffer.origin(), {m, m});
offset += a.num_elements();
Tensor2D_ref<T> b(buffer.origin() + offset, {m, m});
offset += b.num_elements();
Tensor3D_ref<T> c(buffer.origin() + offset, {nbatch, m, m});
//float scale = float(100.0);
std::vector<pointer<T>> A_array;
std::vector<pointer<T>> B_array;
std::vector<pointer<T>> C_array;
float alpha = 1.0;
float beta = 0.0;
for (int i = 0; i < nbatch; i++)
{
A_array.emplace_back(a.origin());
B_array.emplace_back(b.origin());
C_array.emplace_back(c[i].origin());
}
using ma::gemmBatched;
Timer timer;
gemmBatched('N', 'N', m, m, m, alpha, A_array.data(), m, B_array.data(), m, beta, C_array.data(), m, nbatch);
double tgemm = timer.elapsed();
out << " " << std::setw(6) << nbatch << " " << std::setw(5) << m << " " << std::scientific << tgemm
<< "\n";
}
template<class Allocator, class Buff>
void timeGemm(std::ostream& out, Allocator& alloc, Buff& buffer, int m, int n)
{
using T = typename Allocator::value_type;
int offset = 0;
Tensor2D_ref<T> a(buffer.origin(), {m, m});
offset += a.num_elements();
Tensor2D_ref<T> b(buffer.origin() + offset, {m, n});
offset += b.num_elements();
Tensor2D_ref<T> c(buffer.origin() + offset, {m, n});
using ma::product;
Timer timer;
product(a, b, c);
double tproduct = timer.elapsed();
out << " " << std::setw(6) << m << " " << std::setw(5) << n << " " << std::scientific << tproduct
<< "\n";
}
template<class Allocator, class Buff>
void timeBatchedMatrixInverse(std::ostream& out, Allocator& alloc, Buff& buffer, int nbatch, int m)
{
using T = typename Allocator::value_type;
int offset = 0;
Tensor3D_ref<T> a(buffer.origin(), {nbatch, m, m});
Tensor3D_ref<T> b(buffer.origin() + a.num_elements(), {nbatch, m, m});
Alloc<int> ialloc{};
Tensor1D<int> IWORK(boost::multi::iextensions<1u>{nbatch * (m + 1)}, ialloc);
std::vector<pointer<T>> A_array, B_array;
A_array.reserve(nbatch);
B_array.reserve(nbatch);
for (int i = 0; i < nbatch; i++)
{
A_array.emplace_back(a[i].origin());
B_array.emplace_back(b[i].origin());
}
using ma::getrfBatched;
Timer timer;
getrfBatched(m, A_array.data(), m, ma::pointer_dispatch(IWORK.origin()),
ma::pointer_dispatch(IWORK.origin()) + nbatch * m, nbatch);
double tgetrf = timer.elapsed();
using ma::getriBatched;
timer.restart();
getriBatched(m, A_array.data(), m, ma::pointer_dispatch(IWORK.origin()), B_array.data(), m,
ma::pointer_dispatch(IWORK.origin()) + nbatch * m, nbatch);
double tgetri = timer.elapsed();
out << " " << std::setw(6) << nbatch << " " << std::setw(5) << m << " " << std::scientific
<< tgetrf << " " << tgetri << "\n";
}
template<class Allocator, class Buff>
void timeMatrixInverse(std::ostream& out, Allocator& alloc, Buff& buffer, int m)
{
using T = typename Allocator::value_type;
int offset = 0;
Tensor2D_ref<T> a(buffer.origin(), {m, m});
Tensor1D_ref<T> WORK(buffer.origin() + a.num_elements(), boost::multi::iextensions<1u>{m * m});
Alloc<int> ialloc{};
Tensor1D<int> IWORK(boost::multi::iextensions<1u>{m + 1}, ialloc);
using ma::getrf;
Timer timer;
getrf(a, IWORK, WORK);
double tgetrf = timer.elapsed();
using ma::getri;
timer.restart();
getri(a, IWORK, WORK);
double tgetri = timer.elapsed();
out << " " << std::setw(6) << m << " " << std::scientific << tgetrf << " " << tgetri
<< "\n";
}
int main(int argc, char* argv[])
{
boost::mpi3::environment env(argc, argv);
auto world = boost::mpi3::environment::get_world_instance();
auto node = world.split_shared(world.rank());
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
arch::INIT(node);
#endif
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
{
std::ofstream out;
out.open("time_batched_zqr.dat");
std::cout << " - Batched zQR (nbatch, MxN)" << std::endl;
out << " nbatch M N ttrans tgeqrf tgetdet tgqr\n";
std::vector<int> batches = {1, 5, 10, 20};
std::vector<int> num_rows = {200, 400, 800};
int max_batch = batches[batches.size() - 1];
int max_rows = num_rows[num_rows.size() - 1];
int size = (2 * max_batch * max_rows * (max_rows / 2.0) + 3 * max_batch * max_rows);
Alloc<std::complex<double>> alloc{};
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, 1.0, alloc);
for (auto nb : batches)
{
for (auto m : num_rows)
{
timeBatchedQR(out, alloc, buffer, nb, m, m / 2);
}
}
}
#endif
{
std::ofstream out;
out.open("time_zqr.dat");
std::cout << " - zQR (MxM)" << std::endl;
out << " M M tzgeqrf tzungqr\n";
int size = 3 * 1000 * 1000;
Alloc<std::complex<double>> alloc{};
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, 1.0, alloc);
std::vector<int> dims = {100, 200, 500, 800, 1000};
for (auto d : dims)
{
timeQR(out, alloc, buffer, d);
}
}
{
std::ofstream out;
out.open("time_sgemm.dat");
std::cout << " - sgemm (MxM)" << std::endl;
out << " M M tsgemm\n";
int size = 3 * 8000 * 8000;
Alloc<float> alloc{};
Tensor1D<float> buffer(iextensions<1u>{size}, 1.0, alloc);
std::vector<int> dims = {200, 500, 800, 1000, 2000, 3000, 4000, 8000};
for (auto d : dims)
{
timeGemm(out, alloc, buffer, d, d);
}
}
{
std::ofstream out;
out.open("time_batched_sgemm.dat");
std::cout << " - batched sgemm (nbatch, MxM)" << std::endl;
out << " nbatch M tsgemm\n";
Alloc<float> alloc{};
std::vector<int> num_rows = {100, 200, 300, 400, 500, 600};
std::vector<int> batches = {128, 256, 512, 1024};
int max_batch = batches[batches.size() - 1];
int max_rows = num_rows[num_rows.size() - 1];
int size = 3 * max_batch * max_rows * max_rows;
Tensor1D<float> buffer(iextensions<1u>{size}, 1.0, alloc);
for (auto nb : batches)
{
for (auto m : num_rows)
{
timeBatchedGemm(out, alloc, buffer, nb, m);
}
}
}
{
std::ofstream out;
out.open("time_exchange_kernel.dat");
std::cout << " - exchange kernel (E[w] = sum_{abn} Twabn Twanb)" << std::endl;
out << " nbatch nwalk nocc nchol tExchangeKernel\n";
Alloc<std::complex<double>> alloc{};
int nwalk = 5;
int nocc = 20;
int nchol = 270;
std::vector<int> batches = {100, 200, 400, 800};
int nbatch_max = batches[batches.size() - 1];
int size = 2 * nbatch_max * nwalk * nocc * nocc * nchol + nbatch_max + nwalk;
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, 1.0, alloc);
for (auto b : batches)
{
timeExchangeKernel(out, alloc, buffer, b, nwalk, nocc, nchol);
}
}
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
{
std::ofstream out;
out.open("time_batched_matrix_inverse.dat");
std::cout << " - batched matrix inverse (nbatch, MxM)" << std::endl;
out << " nbatch M tgetrf tgetri\n";
Alloc<std::complex<double>> alloc{};
std::vector<int> batches = {1, 5, 10, 20};
std::vector<int> num_rows = {100, 110, 120, 200, 210, 300, 400, 500, 600, 700};
int max_batch = batches[batches.size() - 1];
int max_rows = num_rows[num_rows.size() - 1];
int size = 2 * max_batch * max_rows * max_rows;
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, alloc);
{
std::vector<std::complex<double>> tmp(size);
fillRandomMatrix(tmp);
using std::copy_n;
copy_n(tmp.data(), tmp.size(), buffer.origin());
}
for (auto b : batches)
{
for (auto m : num_rows)
{
timeBatchedMatrixInverse(out, alloc, buffer, b, m);
}
}
}
#endif
{
std::ofstream out;
out.open("time_matrix_inverse.dat");
std::cout << " - matrix inverse (nbatch, MxM)" << std::endl;
out << " M tgetrf tgetri\n";
Alloc<std::complex<double>> alloc{};
std::vector<int> num_rows = {100, 110, 120, 200, 210, 300, 400, 500, 600, 700, 800, 1000, 2000, 4000};
int max_rows = num_rows[num_rows.size() - 1];
int size = 2 * max_rows * max_rows;
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, alloc);
{
std::vector<std::complex<double>> tmp(size);
fillRandomMatrix(tmp);
using std::copy_n;
copy_n(tmp.data(), tmp.size(), buffer.origin());
}
for (auto m : num_rows)
{
timeMatrixInverse(out, alloc, buffer, m);
}
}
}

View File

@ -74,8 +74,7 @@ void test_dense_matrix_mult()
{
vector<double> m = {
9., 24., 30., 4., 10.,
12., 14., 16., 36. //,
// 9., 6., 1.
12., 14., 16., 36.
};
array_ref<double, 2> M(m.data(), {3, 3});
REQUIRE(M.num_elements() == m.size());
@ -602,6 +601,7 @@ void test_dense_gerf_gqr_device(Allocator& alloc)
verify_approx(Id, Id2);
}
}
template<class Allocator>
void test_dense_gerf_gqr_strided_device(Allocator& alloc)
{
@ -633,6 +633,38 @@ void test_dense_gerf_gqr_strided_device(Allocator& alloc)
}
}
template<class Allocator>
void test_dense_batched_gemm(Allocator& alloc)
{
using T = typename Allocator::value_type;
using pointer = typename Allocator::pointer;
{
int nbatch = 3;
array<T, 2, Allocator> a = {{0.0, 1.0, 2.0}, {3.0, 4.0, 5.0}, {6.0, 7.0, 8.0}};
array<T, 2, Allocator> b = {{0.0, 1.0, 2.0}, {3.0, 4.0, 5.0}, {6.0, 7.0, 8.0}};
array<T, 2, Allocator> res = {{15.0, 18.0, 21.0}, {42.0, 54.0, 66.0}, {69.0, 90.0, 111.0}};
array<T, 3, Allocator> c({3, 3, 3}, 0.0, alloc);
T alpha = 1.0;
T beta = 0.0;
std::vector<pointer> A_array;
std::vector<pointer> B_array;
std::vector<pointer> C_array;
for (int i = 0; i < nbatch; i++)
{
A_array.emplace_back(a.origin());
B_array.emplace_back(b.origin());
C_array.emplace_back(c[i].origin());
}
using ma::gemmBatched;
gemmBatched('N', 'N', 3, 3, 3, alpha, A_array.data(), 3, B_array.data(), 3, beta, C_array.data(), 3, nbatch);
for (int i = 0; i < nbatch; i++)
{
verify_approx(c[i], res);
}
}
}
template<class Allocator>
void test_dense_geqrf_getri_batched_device(Allocator& alloc)
{
@ -719,6 +751,7 @@ TEST_CASE("dense_ma_operations_device_double", "[matrix_operations]")
test_dense_mat_vec_device<Alloc>(alloc);
test_dense_mat_mul_device<Alloc>(alloc);
test_dense_gerf_gqr_device<Alloc>(alloc);
test_dense_batched_gemm<Alloc>(alloc);
}
}
TEST_CASE("dense_ma_operations_device_complex", "[matrix_operations]")
@ -734,6 +767,8 @@ TEST_CASE("dense_ma_operations_device_complex", "[matrix_operations]")
test_dense_gerf_gqr_device<Alloc>(alloc);
test_dense_gerf_gqr_strided_device<Alloc>(alloc);
test_dense_geqrf_getri_batched_device<Alloc>(alloc);
test_dense_geqrf_getri_batched_device<Alloc>(alloc);
test_dense_batched_gemm<Alloc>(alloc);
}
}
#endif

View File

@ -183,7 +183,7 @@ protected:
Wavefunction& wfn;
// P1 = exp(-0.5*dt*H1), so H1 includes terms from MF substraction
// P1 = exp(-0.5*dt*H1), so H1 includes terms from MF subtraction
// and the exchange term from the cholesky decomposition (e.g. vn0)
mpi3CMatrix H1;
mpi3CTensor H1ext;

View File

@ -64,7 +64,7 @@ Propagator PropagatorFactory::buildAFQMCPropagator(TaskGroup_& TG,
substractMF = false;
if (substractMF)
app_log() << " Using mean-field substraction in propagator: " << name << "\n";
app_log() << " Using mean-field subtraction in propagator: " << name << "\n";
// buld mean field expectation value of the Cholesky matrix
CVector vMF(iextensions<1u>{wfn.local_number_of_cholesky_vectors()}, allocator{});
@ -89,9 +89,9 @@ Propagator PropagatorFactory::buildAFQMCPropagator(TaskGroup_& TG,
for (int i = 0; i < vMF_.size(); i++)
v_ = std::max(v_, std::abs(vMF_[i]));
TG.Global().all_reduce_n(&v_, 1, &vmax, boost::mpi3::max<>());
app_log() << " Largest component of Mean-field substraction potential: " << vmax << std::endl;
app_log() << " Largest component of Mean-field subtraction potential: " << vmax << std::endl;
if (vmax > vbias_bound)
app_log() << " WARNING: Mean-field substraction potential has components outside vbias_bound.\n"
app_log() << " WARNING: Mean-field subtraction potential has components outside vbias_bound.\n"
<< " Consider increasing vbias_bound. max(vMF[n]), vbias_bound: " << vmax << " " << vbias_bound
<< std::endl;

View File

@ -28,8 +28,6 @@
#include "multi/memory/fallback.hpp"
#include "Utilities/TimerManager.h"
#include "AFQMC/Utilities/myTimer.h"
extern myTimer Timer;
namespace qmcplusplus
{

View File

@ -66,7 +66,7 @@ inline std::ostream& app_log() { return infoLog.getStream(); }
inline std::ostream& app_error() { return infoError.getStream() << "ERROR "; }
inline std::ostream& app_warning() { return infoError.getStream() << "WARNING "; }
inline std::ostream& app_warning() { return infoLog.getStream() << "WARNING "; }
inline std::ostream& app_debug_stream() { return infoDebug.getStream(); }

View File

@ -99,7 +99,7 @@ public:
private:
/** @name Walker Vectors
*
* A single index into these ordered lists constitue a complete
* A single index into these ordered lists constitutes a complete
* walker context.
* @{
*/

View File

@ -70,7 +70,7 @@ private:
// std::shared_ptr<QMCHamiltonian> hamiltonian_;
// This is necessary MCPopulation is constructed in a simple call scope in QMCDriverFactory from the legacy MCWalkerConfiguration
// MCPopulation should have QMCMain scope eventually and the driver will just have a refrence to it.
// MCPopulation should have QMCMain scope eventually and the driver will just have a reference to it.
TrialWaveFunction* trial_wf_;
ParticleSet* elec_particle_set_;
QMCHamiltonian* hamiltonian_;

View File

@ -83,7 +83,7 @@ protected:
IndexType walkers_per_rank_ = 0;
IndexType requested_samples_ = 0;
IndexType sub_steps_ = 1;
// max unecessary in this context
// max unnecessary in this context
IndexType max_blocks_ = 1;
IndexType max_steps_ = 1;
IndexType warmup_steps_ = 0;

View File

@ -356,7 +356,7 @@ protected:
/** Observables manager
* Has very problematic owner ship and life cycle.
* Can be transfered via branch manager one driver to the next indefinitely
* Can be transferred via branch manager one driver to the next indefinitely
* TODO: Modify Branch manager and others to clear this up.
*/
EstimatorManagerNew* estimator_manager_;

View File

@ -181,7 +181,7 @@ public:
/** unified: do the actual adjustment
*
* unfortunately right now this requires knowledge of the global context, seems unecessary
* unfortunately right now this requires knowledge of the global context, seems unnecessary
* but this is why MCPopulation is handed in.
* This does was applyNmaxNmin used to.
*/

View File

@ -71,7 +71,7 @@ app_log() << "Descent engine test of parameter update" << std::endl;
app_log() << "First parameter: " << results[0] << std::endl;
app_log() << "Second parameter: " << results[1] << std::endl;
//The engine should update the parameters using the generic defualt step size of .001 and obtain these values.
//The engine should update the parameters using the generic default step size of .001 and obtain these values.
REQUIRE(std::real(results[0]) == Approx(.995));
REQUIRE(std::real(results[1]) == Approx(-2.001));

View File

@ -173,7 +173,7 @@ private:
/** mark all the electrons affected by Tmoves and update ElecNeighborIons and IonNeighborElecs
* @param myTable electron ion distance table
* @param iel reference electron
* Note this funtion should be called before acceptMove for a Tmove
* Note this function should be called before acceptMove for a Tmove
*/
void markAffectedElecs(const DistanceTableData& myTable, int iel);
};

View File

@ -192,7 +192,7 @@ void GamesXmlParser::getGeometry(std::vector<xmlNodePtr>& aPtrList)
while (tcur != NULL)
{
std::string tname((const char*)tcur->name);
double x, y, z;
double x(0), y(0), z(0);
if (tname == "XCOORD")
putContent(x, tcur);
else if (tname == "YCOORD")

View File

@ -934,7 +934,7 @@ void EshdfFile::handleKpt(int kpt_num,
}
}
// now all the states are writen, so write out eigenvalues and number of states
// now all the states are written, so write out eigenvalues and number of states
vector<double> eigval = eigenvalues;
if (spinpol == 0)
{

View File

@ -61,7 +61,7 @@ int main(int argc, char** argv)
bool show_usage = false;
bool show_warn = false;
/* For a successful execution of the code, atleast 3 arguments will need to be
* provided along with the exectuable. Therefore, print usage information if
* provided along with the executable. Therefore, print usage information if
* argc is less than 4.
*/
if (argc < 4)

View File

@ -87,9 +87,6 @@ void AGPDeterminant::reportStatus(std::ostream& os)
//do nothing
}
void AGPDeterminant::resetTargetParticleSet(ParticleSet& P) { GeminalBasis->resetTargetParticleSet(P); }
/** Calculate the log value of the Dirac determinant for particles
*@param P input configuration containing N particles
*@param G a vector containing N gradients
@ -418,7 +415,6 @@ WaveFunctionComponentPtr AGPDeterminant::makeClone(ParticleSet& tqp) const
{
AGPDeterminant* myclone = new AGPDeterminant(0);
myclone->GeminalBasis = GeminalBasis->makeClone();
myclone->GeminalBasis->resetTargetParticleSet(tqp);
myclone->resize(Nup, Ndown);
myclone->Lambda = Lambda;
if (Nup != Ndown)

View File

@ -51,8 +51,6 @@ public:
void resetParameters(const opt_variables_type& active);
void reportStatus(std::ostream& os);
void resetTargetParticleSet(ParticleSet& P);
///reset the size: with the number of particles and number of orbtials
void resize(int nup, int ndown);

View File

@ -117,8 +117,6 @@ struct BasisSetBase : public OrbitalSetTraits<T>
/**@}*/
///resize the basis set
virtual void setBasisSetSize(int nbs) = 0;
///reset the target particle set
virtual void resetTargetParticleSet(ParticleSet& P) = 0;
virtual void evaluateWithHessian(const ParticleSet& P, int iat) = 0;
virtual void evaluateWithThirdDeriv(const ParticleSet& P, int iat) = 0;

View File

@ -113,8 +113,6 @@ public:
void resetParameters(const opt_variables_type& active) override {}
void resetTargetParticleSet(ParticleSet& e) override {}
void setOrbitalSetSize(int norbs) override { OrbitalSetSize = norbs; }
virtual void evaluate_notranspose(const ParticleSet& P,

View File

@ -94,13 +94,6 @@ void CompositeSPOSet::report()
}
void CompositeSPOSet::resetTargetParticleSet(ParticleSet& P)
{
for (int c = 0; c < components.size(); ++c)
components[c]->resetTargetParticleSet(P);
}
SPOSet* CompositeSPOSet::makeClone() const
{
// base class and shallow copy

View File

@ -48,8 +48,6 @@ public:
///size is determined by component sposets and nothing else
inline void setOrbitalSetSize(int norbs) {}
void resetTargetParticleSet(ParticleSet& P);
SPOSet* makeClone() const;
/** add sposet clones from another Composite SPOSet

View File

@ -24,7 +24,6 @@ public:
virtual void checkOutVariables(const opt_variables_type& active) override {}
virtual void resetParameters(const opt_variables_type& active) override {}
virtual void reportStatus(std::ostream& os) override {}
virtual void resetTargetParticleSet(ParticleSet& P) override {}
PsiValueType FakeGradRatio;

View File

@ -39,17 +39,6 @@ void DiffWaveFunctionComponent::evaluateDerivRatios(ParticleSet& VP,
APP_ABORT("Implement DiffWaveFunctionComponent::evaluateDerivRatios for this orbital");
}
void NumericalDiffOrbital::resetTargetParticleSet(ParticleSet& P)
{
int nptcls = P.getTotalNum();
dg_p.resize(nptcls);
dl_p.resize(nptcls);
dg_m.resize(nptcls);
dl_m.resize(nptcls);
gradLogPsi.resize(nptcls);
lapLogPsi.resize(nptcls);
}
void NumericalDiffOrbital::checkOutVariables(const opt_variables_type& optvars)
{
//do nothing
@ -117,20 +106,6 @@ void AnalyticDiffOrbital::resetParameters(const opt_variables_type& optvars)
refOrbital[i]->resetParameters(optvars);
}
void AnalyticDiffOrbital::resetTargetParticleSet(ParticleSet& P)
{
if (MyIndex < 0)
return;
for (int i = 0; i < refOrbital.size(); ++i)
refOrbital[i]->resetTargetParticleSet(P);
int nptcls = P.getTotalNum();
if (gradLogPsi.size() != nptcls)
{
gradLogPsi.resize(nptcls);
lapLogPsi.resize(nptcls);
}
}
void AnalyticDiffOrbital::checkOutVariables(const opt_variables_type& optvars)
{
MyIndex = -1;

View File

@ -69,9 +69,6 @@ struct DiffWaveFunctionComponent
*/
virtual void initialize() {}
///prepare internal data for a new particle sets
virtual void resetTargetParticleSet(ParticleSet& P) = 0;
/** evaluate derivatives at \f$\{R\}\f$
* @param P current configuration
* @param optvars optimizable variables
@ -130,7 +127,6 @@ struct NumericalDiffOrbital : public DiffWaveFunctionComponent
{
NumericalDiffOrbital(WaveFunctionComponent* orb = 0) : DiffWaveFunctionComponent(orb) {}
void resetTargetParticleSet(ParticleSet& P);
void evaluateDerivatives(ParticleSet& P,
const opt_variables_type& optvars,
std::vector<RealType>& dlogpsi,
@ -150,7 +146,6 @@ struct AnalyticDiffOrbital : public DiffWaveFunctionComponent
{
AnalyticDiffOrbital(WaveFunctionComponent* orb = 0) : DiffWaveFunctionComponent(orb) {}
void resetTargetParticleSet(ParticleSet& P);
void evaluateDerivatives(ParticleSet& P,
const opt_variables_type& optvars,
std::vector<RealType>& dlogpsi,

View File

@ -47,8 +47,6 @@ inline void EinsplineSetExtended<StorageType>::computePhaseFactors(const TinyVec
EinsplineSet::UnitCellType EinsplineSet::GetLattice() { return SuperLattice; }
void EinsplineSet::resetTargetParticleSet(ParticleSet& e) {}
void EinsplineSet::resetSourceParticleSet(ParticleSet& ions) {}
void EinsplineSet::setOrbitalSetSize(int norbs) { OrbitalSetSize = norbs; }
@ -139,10 +137,6 @@ template<typename StorageType>
void EinsplineSetExtended<StorageType>::resetParameters(const opt_variables_type& active)
{}
template<typename StorageType>
void EinsplineSetExtended<StorageType>::resetTargetParticleSet(ParticleSet& e)
{}
template<typename StorageType>
void EinsplineSetExtended<StorageType>::setOrbitalSetSize(int norbs)
{

View File

@ -78,7 +78,6 @@ public:
public:
UnitCellType GetLattice();
virtual void resetParameters(const opt_variables_type& active) {}
void resetTargetParticleSet(ParticleSet& e);
void resetSourceParticleSet(ParticleSet& ions);
void setOrbitalSetSize(int norbs);
inline std::string Type() { return "EinsplineSet"; }
@ -486,7 +485,6 @@ public:
#endif
void resetParameters(const opt_variables_type& active);
void resetTargetParticleSet(ParticleSet& e);
void setOrbitalSetSize(int norbs);
std::string Type();

View File

@ -37,7 +37,6 @@ struct EGOSet : public SPOSet
SPOSet* makeClone() const override { return new EGOSet(*this); }
void resetParameters(const opt_variables_type& optVariables) override {}
inline void resetTargetParticleSet(ParticleSet& P) override {}
void setOrbitalSetSize(int norbs) override {}
inline void evaluateValue(const ParticleSet& P, int iat, ValueVector_t& psi) override

View File

@ -146,7 +146,6 @@ WaveFunctionComponent* ElectronGasOrbitalBuilder::buildComponent(xmlNodePtr cur)
sdet->setBF(BFTrans);
if (BFTrans->isOptimizable())
sdet->Optimizable = true;
sdet->resetTargetParticleSet(targetPtcl);
}
else
{

Some files were not shown because too many files have changed in this diff Show More