mirror of https://github.com/QMCPACK/qmcpack.git
Merge branch 'develop' into dmc_population_cont
This commit is contained in:
commit
daf2082a1d
|
@ -768,8 +768,8 @@ IF(ENABLE_HIP)
|
|||
ADD_LIBRARY(HIP::HIP INTERFACE IMPORTED)
|
||||
# temporarily put hipsparse hipblas here for convenience, should be moved to Platforms.
|
||||
SET_TARGET_PROPERTIES(HIP::HIP PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${ROCM_ROOT}/include"
|
||||
INTERFACE_COMPILE_DEFINITIONS "ENABLE_HIP;__HIP_PLATFORM_HCC__"
|
||||
INTERFACE_LINK_LIBRARIES "-L${ROCM_ROOT}/lib;-lhipsparse;-lhipblas;-lhip_hcc")
|
||||
INTERFACE_COMPILE_DEFINITIONS "ENABLE_HIP"
|
||||
INTERFACE_LINK_LIBRARIES "-L${ROCM_ROOT}/lib;-lhipsparse;-lhipblas;-lamdhip64")
|
||||
ENDIF(ENABLE_HIP)
|
||||
|
||||
######################################################
|
||||
|
|
|
@ -1552,6 +1552,12 @@ def qmcpack_simulation():
|
|||
|
||||
|
||||
|
||||
def observables():
|
||||
nunit_all()
|
||||
#end def observables
|
||||
|
||||
|
||||
|
||||
def nxs_redo():
|
||||
nunit('test_redo')
|
||||
|
||||
|
@ -1913,6 +1919,7 @@ NexusTest( quantum_package_simulation )
|
|||
NexusTest( pwscf_postprocessor_simulations )
|
||||
NexusTest( qmcpack_converter_simulations )
|
||||
NexusTest( qmcpack_simulation )
|
||||
NexusTest( observables )
|
||||
NexusTest( nxs_redo )
|
||||
NexusTest( nxs_sim )
|
||||
NexusTest( qmca )
|
||||
|
@ -1938,6 +1945,7 @@ NexusTest( qdens )
|
|||
#NexusTest( numerics )
|
||||
#NexusTest( nxs_redo )
|
||||
#NexusTest( nxs_sim )
|
||||
#NexusTest( observables )
|
||||
#NexusTest( optional_dependencies , optional=True)
|
||||
#NexusTest( periodic_table )
|
||||
#NexusTest( physical_system )
|
||||
|
@ -2232,6 +2240,7 @@ if __name__=='__main__':
|
|||
exit()
|
||||
#end if
|
||||
|
||||
global_data['verbose'] = options.verbose
|
||||
global_data['job_ref_table'] = options.job_ref_table
|
||||
|
||||
if options.regex!='none':
|
||||
|
|
|
@ -64,22 +64,22 @@ sorted_generic = sorted_py2
|
|||
|
||||
|
||||
def log(*items,**kwargs):
|
||||
indent=None
|
||||
logfile=generic_settings.devlog
|
||||
indent = None
|
||||
logfile = generic_settings.devlog
|
||||
if len(kwargs)>0:
|
||||
n=0
|
||||
if 'indent' in kwargs:
|
||||
indent = kwargs['indent']
|
||||
n+=1
|
||||
indent = kwargs.pop('indent' ,None )
|
||||
logfile = kwargs.pop('logfile',logfile)
|
||||
n = kwargs.pop('n',0)
|
||||
if n!=0:
|
||||
if indent is None:
|
||||
indent = n*' '
|
||||
else:
|
||||
indent = n*indent
|
||||
#end if
|
||||
#end if
|
||||
if 'logfile' in kwargs:
|
||||
logfile = kwargs['logfile']
|
||||
n+=1
|
||||
#end if
|
||||
if n!=len(kwargs):
|
||||
valid = 'indent logfile'.split()
|
||||
invalid = set(kwargs.keys())-set(valid)
|
||||
error('invalid keyword arguments provided\ninvalid keywords: {0}\nvalid options are: {1}'.format(sorted(invalid),valid))
|
||||
if len(kwargs)>0:
|
||||
valid = 'indent logfile n'.split()
|
||||
error('Invalid keyword arguments provided.\nInvalid keywords: {0}\nValid options are: {1}'.format(sorted(kwargs.keys()),valid),'log')
|
||||
#end if
|
||||
#end if
|
||||
if len(items)==1 and isinstance(items[0],str):
|
||||
|
|
|
@ -80,8 +80,11 @@ Module contents
|
|||
---------------
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from generic import obj
|
||||
from developer import DevBase,ci,message,error,unavailable
|
||||
from fileio import StandardFile,XsfFile
|
||||
|
||||
try:
|
||||
import numpy as np
|
||||
|
@ -98,6 +101,12 @@ try:
|
|||
except:
|
||||
measure = unavailable('skimage','measure')
|
||||
#end try
|
||||
try:
|
||||
import scipy.ndimage as scipy_ndimage
|
||||
except:
|
||||
scipy_ndimage = unavailable('scipy','ndimage')
|
||||
#end try
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -715,6 +724,24 @@ class GBase(PlotHandler):
|
|||
initialized = (bool,False),
|
||||
)
|
||||
|
||||
vlogger = None
|
||||
|
||||
@staticmethod
|
||||
def reset_vlog():
|
||||
GBase.vlogger = None
|
||||
#end def reset_vlog
|
||||
|
||||
@staticmethod
|
||||
def set_vlog(vlog):
|
||||
GBase.vlogger = vlog
|
||||
#end def set_vlog
|
||||
|
||||
def vlog(self,*args,**kwargs):
|
||||
if GBase.vlogger is not None:
|
||||
GBase.vlogger(*args,**kwargs)
|
||||
#end if
|
||||
#end def vlog
|
||||
|
||||
|
||||
def __init__(self,*args,**kwargs):
|
||||
self.reset()
|
||||
|
@ -763,6 +790,28 @@ class GBase(PlotHandler):
|
|||
#end if
|
||||
#end def initialize
|
||||
|
||||
|
||||
def read(self,filepath,format=None,check=True):
|
||||
if isinstance(filepath,StandardFile):
|
||||
format = filepath.sftype.lower()
|
||||
elif not isinstance(filepath,str):
|
||||
self.error('Cannot read file.\nExpected a file path.\nInstead received type: {}\nWith value: {}\nPlease provide a file path and try again.'.format(filepath.__class__.__name__,filepath))
|
||||
elif not os.path.exists(filepath):
|
||||
self.error('Cannot read file. File path does not exist.\nFile path provided: {}'.format(filepath))
|
||||
elif format is not None:
|
||||
if not isinstance(format,str):
|
||||
self.error('Cannot read file.\nExpected text (string) for file format.\nInstead received type: {}\nWith value: {}'.format(format.__class__.__name__,format))
|
||||
#end if
|
||||
else:
|
||||
format = filepath.rsplit('.',1)[1].lower()
|
||||
#end if
|
||||
self.reset()
|
||||
self.read_local(filepath,format)
|
||||
if check:
|
||||
self.check_valid()
|
||||
#end if
|
||||
#end def read
|
||||
|
||||
|
||||
def valid(self):
|
||||
"""
|
||||
|
@ -865,6 +914,32 @@ class GBase(PlotHandler):
|
|||
"""
|
||||
self.not_implemented()
|
||||
#end def local_validity_checks
|
||||
|
||||
|
||||
def read_local(self,filepath,format):
|
||||
self.not_implemented()
|
||||
#end def read_local
|
||||
|
||||
|
||||
# test needed
|
||||
def ensure_array(self,dtype=None,**arrays_in):
|
||||
arrays = obj()
|
||||
for k,ai in arrays_in.items():
|
||||
if isinstance(ai,(tuple,list)):
|
||||
if dtype is None:
|
||||
a = np.array(ai)
|
||||
else:
|
||||
a = np.array(ai,dtype=dtype)
|
||||
#end if
|
||||
elif isinstance(ai,np.ndarray):
|
||||
a = ai
|
||||
else:
|
||||
self.error('Cannot ensure array value.\nReceived data with type: {}\nOnly tuple, list, and ndarray are supported.'.format(ai.__class__.__name__))
|
||||
#end if
|
||||
arrays[k] = a
|
||||
#end for
|
||||
return arrays
|
||||
#end def ensure_array
|
||||
#end class GBase
|
||||
|
||||
|
||||
|
@ -1127,6 +1202,12 @@ class Grid(GBase):
|
|||
plt.show()
|
||||
#end if
|
||||
#end def plot_points
|
||||
|
||||
|
||||
def grid_function(self,*args,**kwargs):
|
||||
gf = grid_function_from_grid(self)
|
||||
return gf(*args,**kwargs)
|
||||
#end def grid_function
|
||||
#end class Grid
|
||||
|
||||
|
||||
|
@ -1249,6 +1330,11 @@ class StructuredGrid(Grid):
|
|||
return np.prod(self.cell_grid_shape)
|
||||
#end def ncells
|
||||
|
||||
@property
|
||||
def npoints(self):
|
||||
return np.prod(self.shape)
|
||||
#end def npoints
|
||||
|
||||
@property
|
||||
def flat_points_shape(self):
|
||||
space_dim = self.r.shape[-1]
|
||||
|
@ -1262,6 +1348,11 @@ class StructuredGrid(Grid):
|
|||
return self.shape+(space_dim,)
|
||||
#end def flat_points_shape
|
||||
|
||||
@property
|
||||
def periodic(self):
|
||||
return (self.bconds==self.bcond_types.periodic).all()
|
||||
#end def periodic
|
||||
|
||||
|
||||
def initialize_local(self,**kwargs):
|
||||
"""
|
||||
|
@ -1401,6 +1492,28 @@ class StructuredGrid(Grid):
|
|||
#end def reshape_flat
|
||||
|
||||
|
||||
# test needed
|
||||
def flat_indices(self,full_indices):
|
||||
if not isinstance(full_indices,np.ndarray):
|
||||
full_indices = np.array(full_indices,dtype=int)
|
||||
#end if
|
||||
if len(full_indices.shape)!=2:
|
||||
self.error('full_indices must have shape (# points)x(# dimensions)\nShape received: {}'.format(full_indices.shape))
|
||||
elif full_indices.shape[-1]!=self.grid_dim:
|
||||
self.error('full_indices must have same dimension as the grid.\nfull_indices dimension: {}\nGrid dimension: {}'.format(full_indices.shape[-1],self.grid_dim))
|
||||
#end if
|
||||
grid_shape = self.grid_shape
|
||||
D = len(grid_shape)
|
||||
grid_shape_prod = np.empty((D,),dtype=int)
|
||||
grid_shape_prod[-1] = 1
|
||||
for d in range(D-1):
|
||||
grid_shape_prod[D-d-2] = grid_shape[D-d-1]*grid_shape_prod[D-d-1]
|
||||
#end for
|
||||
flat_indices = np.dot(full_indices,grid_shape_prod)
|
||||
return flat_indices
|
||||
#end def flat_indices
|
||||
|
||||
|
||||
def unit_points(self,points=None,project=False):
|
||||
"""
|
||||
(`External API`) Map a set of points into the unit cube.
|
||||
|
@ -1942,6 +2055,19 @@ class StructuredGridWithAxes(StructuredGrid):
|
|||
"""
|
||||
return np.abs(np.prod(np.linalg.svd(self.axes,compute_uv=False)))
|
||||
#end def axes_volume
|
||||
|
||||
|
||||
def indices_for_map_coord(self,points):
|
||||
if not self.centered:
|
||||
ucorner = np.array([0,0,0],dtype=float)
|
||||
else:
|
||||
ucorner = 0.5/np.array(self.cell_grid_shape)
|
||||
#end if
|
||||
grid_shape = np.array(self.grid_shape)
|
||||
ipoints = (self.unit_points(points)-ucorner)*grid_shape
|
||||
return ipoints.T
|
||||
#end def indices_for_map_coord
|
||||
|
||||
#end class StructuredGridWithAxes
|
||||
|
||||
|
||||
|
@ -2049,6 +2175,8 @@ class ParallelotopeGrid(StructuredGridWithAxes):
|
|||
The number of grid points in each dimension.
|
||||
cell_grid_shape : `tuple, int, property`
|
||||
The number of grid cells in each dimension.
|
||||
dr : `ndarray, float, shape(dg,ds), property`
|
||||
Vector displacements between neighboring grid points.
|
||||
ncells : `int, property`
|
||||
The total number of grid cells.
|
||||
flat_points_shape : `tuple, int, property`
|
||||
|
@ -2069,6 +2197,16 @@ class ParallelotopeGrid(StructuredGridWithAxes):
|
|||
Datatype of the grid point values.
|
||||
"""
|
||||
|
||||
@property
|
||||
def dr(self):
|
||||
dr = np.empty(self.axes.shape,self.dtype)
|
||||
cells = self.cell_grid_shape
|
||||
for d in range(self.grid_dim):
|
||||
dr[d] = self.axes[d]/cells[d]
|
||||
#end for
|
||||
return dr
|
||||
#end def dr
|
||||
|
||||
@property
|
||||
def corner(self):
|
||||
return self.origin
|
||||
|
@ -2135,6 +2273,45 @@ class ParallelotopeGrid(StructuredGridWithAxes):
|
|||
#end def initialize_local
|
||||
|
||||
|
||||
def read_local(self,filepath,format):
|
||||
if format=='xsf':
|
||||
self.read_xsf(filepath)
|
||||
else:
|
||||
self.error('Cannot read file.\nUnrecognized file format encountered.\nUnrecognized file format: {}\nValid options are: xsf'.format(format))
|
||||
#end if
|
||||
#end def read_local
|
||||
|
||||
|
||||
def read_xsf(self,filepath):
|
||||
if isinstance(filepath,XsfFile):
|
||||
xsf = filepath
|
||||
else:
|
||||
xsf = XsfFile(filepath)
|
||||
#end if
|
||||
|
||||
d = xsf.get_density()
|
||||
|
||||
cells = d.grid-1
|
||||
c = d.cell.sum(axis=0)/cells/2
|
||||
|
||||
centered = False
|
||||
corner = None
|
||||
if np.abs(c-d.corner).max()<1e-6:
|
||||
centered = True
|
||||
else:
|
||||
corner = d.corner
|
||||
#end if
|
||||
|
||||
self.initialize(
|
||||
bconds = tuple('ppp'),
|
||||
axes = d.cell.copy(),
|
||||
cells = cells,
|
||||
centered = centered,
|
||||
corner = corner,
|
||||
)
|
||||
#end def read_xsf
|
||||
|
||||
|
||||
def unit_points_bare(self,points):
|
||||
"""
|
||||
(`Internal API`) Maps points in a parallelotope into the unit cube.
|
||||
|
@ -2580,6 +2757,14 @@ class SpheroidGrid(StructuredGridWithAxes):
|
|||
#end if
|
||||
return cell_vols
|
||||
#end def cell_volumes
|
||||
|
||||
|
||||
def radii(self):
|
||||
self.reshape_full()
|
||||
rrad = np.array(self.r[:,0,0,-1].ravel())
|
||||
self.reshape_flat()
|
||||
return rrad
|
||||
#end def radii
|
||||
#end class SpheroidGrid
|
||||
|
||||
|
||||
|
@ -3023,8 +3208,9 @@ class GridFunction(GBase):
|
|||
#: (`obj`) Collection of attributes for the class. Used to check assigned
|
||||
#: members for type conformity and to assign default values.
|
||||
persistent_data_types = obj(
|
||||
grid = (Grid,None),
|
||||
values = (np.ndarray,None),
|
||||
grid = (Grid , None),
|
||||
values = (np.ndarray, None),
|
||||
value_shape = (tuple , None),
|
||||
**GBase.persistent_data_types
|
||||
)
|
||||
|
||||
|
@ -3038,9 +3224,14 @@ class GridFunction(GBase):
|
|||
return self.grid.npoints
|
||||
#end def npoints
|
||||
|
||||
@property
|
||||
def value_dim(self):
|
||||
return len(self.value_shape)
|
||||
#end def value_dim
|
||||
|
||||
@property
|
||||
def nvalues(self):
|
||||
return self.values.shape[1]
|
||||
return np.prod(self.value_shape)
|
||||
#end def nvalues
|
||||
|
||||
@property
|
||||
|
@ -3067,6 +3258,7 @@ class GridFunction(GBase):
|
|||
copy_values = True,
|
||||
dtype = None,
|
||||
grid_dtype = None,
|
||||
value_shape = None,
|
||||
**kwargs):
|
||||
"""
|
||||
(`Internal API`) Sets `grid` and `values` attributes.
|
||||
|
@ -3144,13 +3336,32 @@ class GridFunction(GBase):
|
|||
else:
|
||||
self.error('provided function values are of incorrect type\nvalues must be tuple, list, or ndarray\nyou provided: {}'.format(values.__class__.__name__))
|
||||
#end if
|
||||
if len(values.shape)==1:
|
||||
values.shape = (values.shape[0],1)
|
||||
|
||||
# process value_shape input
|
||||
if len(values.shape)==1 or values.shape==grid.shape:
|
||||
value_shape = (1,)
|
||||
elif value_shape is None:
|
||||
if len(values)==grid.npoints:
|
||||
value_shape = values.shape[1:]
|
||||
elif len(value.shape)>len(grid.shape) and value.shape[:len(grid.shape)]==grid.shape:
|
||||
value_shape = values.shape[len(grid.shape):]
|
||||
#end if
|
||||
elif isinstance(value_shape,(list,np.ndarray)):
|
||||
value_shape = tuple(value_shape)
|
||||
#end if
|
||||
|
||||
if value_shape is not None:
|
||||
nvtot = values.size
|
||||
nv = np.prod(value_shape)
|
||||
if nvtot%nv!=0 or nvtot//nv!=grid.npoints:
|
||||
self.error('value_shape and total number of values are inconsistent.\nTotal number of values: {}\nvalue_shape: {}\nExpected number of values per grid point: {}\nActual number of values per grid point: {}'.format(nvtot,value_shape,nv,nvtot/nv))
|
||||
#end if
|
||||
values.shape = (grid.npoints,nv)
|
||||
#end if
|
||||
|
||||
# assign grid and values
|
||||
self.grid = grid
|
||||
self.values = values
|
||||
self.grid = grid
|
||||
self.values = values
|
||||
self.value_shape = value_shape
|
||||
#end def initialize_local
|
||||
|
||||
|
||||
|
@ -3165,17 +3376,35 @@ class GridFunction(GBase):
|
|||
"""
|
||||
cls = self.__class__
|
||||
if not isinstance(self.grid,cls.grid_class):
|
||||
msgs.append('grid is not of the required type for current grid function\ngrid function type: {}\ngrid type required: {}'.format(cls.__name__,self.grid.__class__.__name__))
|
||||
msgs.append('Grid is not of the required type for current grid function.\nGrid function type: {}\nGrid type required: {}'.format(cls.__name__,self.grid.__class__.__name__))
|
||||
#end if
|
||||
self.grid.local_validity_checks(msgs)
|
||||
if len(self.values)!=self.npoints:
|
||||
msgs.append('number of function values and number of grid points do not match\nnumber of grid points: {}\nnumber of function values: {}'.format(self.npoints,len(self.values)))
|
||||
msgs.append('Number of function values and number of grid points do not match.\nNumber of grid points: {}\nNumber of function values: {}'.format(self.npoints,len(self.values)))
|
||||
#end if
|
||||
if len(self.values.shape)!=2:
|
||||
msgs.append('function values has incorrect shape\nexpected shape is (# of points, # of function values at each point)\nshape received: {}'.format(self.values.shape))
|
||||
msgs.append('Function values has incorrect shape.\nExpected shape is (# of points, # of function values at each point)\nShape received: {}'.format(self.values.shape))
|
||||
#end if
|
||||
if len(self.value_shape)<1:
|
||||
self.error('"value_shape" must have at least one entry.')
|
||||
#end if
|
||||
if np.prod(self.value_shape)!=self.values.size//self.npoints:
|
||||
self.error('"value_shape" and "values" are inconsistent.\nNumber of values per point based on "values": {}\nNumber of values per point based on "value_shape": {}'.format(self.values.size/self.npoints,np.prod(self.value_shape)))
|
||||
#end if
|
||||
if self.values.shape!=(self.npoints,self.nvalues):
|
||||
self.error('Function values has incorrect shape.\nExected shape: {}\nShape received: {}'.format((self.npoints,self.nvalues),self.values.shape))
|
||||
#end if
|
||||
#end def local_validity_checks
|
||||
|
||||
|
||||
def reshape_values_full(self):
|
||||
self.values.shape = (self.npoints,)+self.value_shape
|
||||
#end def reshape_values_full
|
||||
|
||||
|
||||
def reshape_values_flat(self):
|
||||
self.values.shape = (self.npoints,self.nvalues)
|
||||
#end def reshape_values_flat
|
||||
#end class GridFunction
|
||||
|
||||
|
||||
|
@ -3239,8 +3468,93 @@ class StructuredGridFunction(GridFunction):
|
|||
def full_points_shape(self):
|
||||
return self.grid.full_points_shape
|
||||
#end def full_points_shape
|
||||
|
||||
@property
|
||||
def flat_values_shape(self):
|
||||
None
|
||||
#end def flat_values_shape
|
||||
|
||||
@property
|
||||
def periodic(self):
|
||||
return self.grid.periodic
|
||||
#end def periodic
|
||||
|
||||
|
||||
def reshape_points_full(self):
|
||||
self.values.shape = self.grid_shape+(self.nvalues,)
|
||||
self.grid.reshape_full()
|
||||
#end def reshape_points_full
|
||||
|
||||
|
||||
def reshape_points_flat(self):
|
||||
self.values.shape = (self.npoints,self.nvalues)
|
||||
self.grid.reshape_flat()
|
||||
#end def reshape_points_flat
|
||||
|
||||
|
||||
def reshape_full(self):
|
||||
self.values.shape = self.grid_shape+self.value_shape
|
||||
self.grid.reshape_full()
|
||||
#end def reshape_full
|
||||
|
||||
|
||||
def reshape_flat(self):
|
||||
self.values.shape = (self.npoints,self.nvalues)
|
||||
self.grid.reshape_flat()
|
||||
#end def reshape_flat
|
||||
|
||||
|
||||
def get_values_with_upper_ghost(self):
|
||||
if 'values_with_upper_ghost' in self:
|
||||
return self.values_with_upper_ghost
|
||||
#end if
|
||||
self.reshape_points_full()
|
||||
|
||||
g = np.array(self.grid_shape)
|
||||
v = np.empty(tuple(g+1)+(self.nvalues,),dtype=self.values.dtype)
|
||||
dim = self.grid_dim
|
||||
if dim==1:
|
||||
n1 = self.npoints
|
||||
v[:-1] = self.values
|
||||
v[-1] = self.values[0]
|
||||
elif dim==2:
|
||||
v[:-1,:-1] = self.values
|
||||
v[ -1,:-1] = self.values[0,:]
|
||||
v[:-1, -1] = self.values[:,0]
|
||||
v[ -1, -1] = self.values[0,0]
|
||||
elif dim==3:
|
||||
v[:-1,:-1,:-1] = self.values
|
||||
v[ -1,:-1,:-1] = self.values[0,:,:]
|
||||
v[:-1, -1,:-1] = self.values[:,0,:]
|
||||
v[:-1,:-1, -1] = self.values[:,:,0]
|
||||
v[:-1, -1, -1] = self.values[:,0,0]
|
||||
v[ -1,:-1, -1] = self.values[0,:,0]
|
||||
v[ -1, -1,:-1] = self.values[0,0,:]
|
||||
v[ -1, -1, -1] = self.values[0,0,0]
|
||||
else:
|
||||
self.error('values_with_upper_ghost is not implemented for dimensions greater than 3.\nDimensionality of the current dataset: {}'.format(dim))
|
||||
#end if
|
||||
|
||||
self.reshape_points_flat()
|
||||
|
||||
self.values_with_upper_ghost = v
|
||||
|
||||
return v
|
||||
#end def values_with_upper_ghost
|
||||
|
||||
|
||||
def clear_ghost(self):
|
||||
ghost_fields = [
|
||||
'values_with_upper_ghost',
|
||||
]
|
||||
for f in ghost_fields:
|
||||
if f in self:
|
||||
del self[f]
|
||||
#end if
|
||||
#end for
|
||||
#end def clear_ghost
|
||||
|
||||
|
||||
def plot_unit_contours(self,boundary=False,fig=True,show=True,**kwargs):
|
||||
"""
|
||||
(`External API`) Make 2D contour plots in the unit coordinate space.
|
||||
|
@ -3362,7 +3676,62 @@ class StructuredGridFunctionWithAxes(StructuredGridFunction):
|
|||
This class should not be instantiated directly.
|
||||
"""
|
||||
|
||||
def plot_contours(self,boundary=False,fig=True,show=True,**kwargs):
|
||||
|
||||
def interpolate(self,r,type=None,copy=False,**kw):
|
||||
# https://stackoverflow.com/questions/16217995/fast-interpolation-of-regularly-sampled-3d-data-with-different-intervals-in-x-y
|
||||
kw = obj(kw)
|
||||
grid = None
|
||||
if isinstance(r,Grid):
|
||||
grid = r
|
||||
r = grid.r
|
||||
#end if
|
||||
if type is None:
|
||||
type = 'map_coordinates'
|
||||
#end if
|
||||
if type=='map_coordinates':
|
||||
if 'mode' not in kw:
|
||||
if self.periodic:
|
||||
kw.mode = 'wrap'
|
||||
else:
|
||||
kw.mode = 'nearest'
|
||||
#end if
|
||||
#end if
|
||||
if 'order' not in kw:
|
||||
kw.order = 3
|
||||
#end if
|
||||
indices = self.grid.indices_for_map_coord(r)
|
||||
# needed because of off-by-one error in map_coordinates
|
||||
# see: https://github.com/scipy/scipy/issues/2640
|
||||
v = self.get_values_with_upper_ghost()
|
||||
if self.nvalues>1:
|
||||
self.error('Interpolation is not yet supported for nvalues>1.')
|
||||
#end if
|
||||
v_shape = v.shape
|
||||
v.shape = v_shape[:-1]
|
||||
values = scipy_ndimage.map_coordinates(v, indices, **kw)
|
||||
v.shape = v_shape
|
||||
else:
|
||||
self.error('Interpolation of type "{}" is not supported.\nValid options are: map_coordinates'.format(type))
|
||||
#end if
|
||||
|
||||
if grid is None:
|
||||
return values
|
||||
elif grid is not None:
|
||||
if copy:
|
||||
grid = grid.copy()
|
||||
#end if
|
||||
gf = grid.grid_function(
|
||||
grid = grid,
|
||||
values = values,
|
||||
value_shape = tuple(self.value_shape),
|
||||
copy = False,
|
||||
)
|
||||
return gf
|
||||
#end if
|
||||
#end def interpolate
|
||||
|
||||
|
||||
def plot_contours(self,a1=(1,0),a2=(0,1),boundary=False,fig=True,show=True,**kwargs):
|
||||
"""
|
||||
(`External API`) Make 2D contour plots in the full coordinate space.
|
||||
|
||||
|
@ -3380,10 +3749,19 @@ class StructuredGridFunctionWithAxes(StructuredGridFunction):
|
|||
if self.grid_dim!=2:
|
||||
self.error('cannot plot contours\ngrid must have dimension 2 to make contour plots\ndimension of grid for this function: {}'.format(self.grid_dim))
|
||||
#end if
|
||||
if self.space_dim!=2:
|
||||
self.error('cannot plot contours\ngrid points must reside in a 2D space to make contour plots\ndimension of the space for this function: {}'.format(self.space_dim))
|
||||
#if self.space_dim!=2:
|
||||
# self.error('cannot plot contours\ngrid points must reside in a 2D space to make contour plots\ndimension of the space for this function: {}'.format(self.space_dim))
|
||||
##end if
|
||||
if self.space_dim==2:
|
||||
X,Y = self.r.T
|
||||
else:
|
||||
ax = np.dot(np.array([a1,a2]),self.grid.axes)
|
||||
ax[0] = ax[0]/np.linalg.norm(ax[0])
|
||||
ax[1] -= np.dot(ax[1],ax[0])*ax[0]
|
||||
ax[1] = ax[1]/np.linalg.norm(ax[1])
|
||||
X,Y = np.dot(ax,self.r.T)
|
||||
ax_trans = ax
|
||||
#end if
|
||||
X,Y = self.r.T
|
||||
X.shape = self.grid_shape
|
||||
Y.shape = self.grid_shape
|
||||
Zm = self.f.T
|
||||
|
@ -3392,7 +3770,16 @@ class StructuredGridFunctionWithAxes(StructuredGridFunction):
|
|||
fig,ax = self.setup_mpl_fig(fig=fig,dim=self.grid_dim)
|
||||
ax.contour(X,Y,Z,**kwargs)
|
||||
if boundary:
|
||||
self.grid.plot_boundary(fig=False,show=False)
|
||||
if self.space_dim==2:
|
||||
self.grid.plot_boundary(fig=False,show=False)
|
||||
else:
|
||||
bpoints = self.grid.get_boundary_lines()
|
||||
bpoints = np.inner(ax_trans,bpoints)
|
||||
bpoints = np.transpose(bpoints,(1,2,0))
|
||||
for bp in bpoints:
|
||||
ax.plot(*bp.T,color='k')
|
||||
#end for
|
||||
#end if
|
||||
#end if
|
||||
#end for
|
||||
if show:
|
||||
|
@ -3552,6 +3939,208 @@ class ParallelotopeGridFunction(StructuredGridFunctionWithAxes):
|
|||
Datatype of the function values.
|
||||
"""
|
||||
grid_class = ParallelotopeGrid
|
||||
|
||||
|
||||
def read_local(self,filepath,format):
|
||||
if format=='xsf':
|
||||
self.read_xsf(filepath)
|
||||
else:
|
||||
self.error('Cannot read file.\nUnrecognized file format encountered.\nUnrecognized file format: {}\nValid options are: xsf'.format(format))
|
||||
#end if
|
||||
#end def read_local
|
||||
|
||||
|
||||
def read_xsf(self,filepath):
|
||||
if isinstance(filepath,XsfFile):
|
||||
xsf = filepath
|
||||
copy = True
|
||||
else:
|
||||
xsf = XsfFile(filepath)
|
||||
copy = False,
|
||||
#end if
|
||||
|
||||
grid = self.grid_class()
|
||||
grid.read_xsf(xsf)
|
||||
|
||||
xsf.remove_ghost()
|
||||
d = xsf.get_density()
|
||||
|
||||
values = d.values_noghost.ravel()
|
||||
if copy:
|
||||
values = values.copy()
|
||||
#end if
|
||||
|
||||
self.initialize(
|
||||
grid = grid,
|
||||
values = values,
|
||||
)
|
||||
#end def read_xsf
|
||||
|
||||
|
||||
# test needed
|
||||
def read_from_points(self,points,values,axes,tol=1e-6,average=False):
|
||||
self.vlog('Reading grid function values from scattered data.')
|
||||
|
||||
# check data types and shapes
|
||||
d = self.ensure_array(
|
||||
points = points,
|
||||
values = values,
|
||||
axes = axes,
|
||||
dtype = float,
|
||||
)
|
||||
points = d.points
|
||||
values = d.values
|
||||
axes = d.axes.copy()
|
||||
del d
|
||||
if len(points)!=len(values):
|
||||
self.error('"points" and "values" must have the same length.\nNumber of points: {}\nNumber of values: {}'.format(len(points),len(values)))
|
||||
elif len(points.shape)!=2:
|
||||
self.error('Shape of "points" array must be (# points) x (# dimensions).\nShape provided: {}'.format(points.shape))
|
||||
#end if
|
||||
N,D = points.shape
|
||||
if axes.shape!=(D,D):
|
||||
self.error('"axes" must have shape {}\nShape provided: {} '.format((D,D),axes.shape))
|
||||
#end if
|
||||
|
||||
# reshape values (for now GridFunction does not support more structured values)
|
||||
values.shape = len(values),values.size//len(values)
|
||||
|
||||
# normalize the axes
|
||||
for d in range(D):
|
||||
axes[d] /= np.linalg.norm(axes[d])
|
||||
#end for
|
||||
|
||||
# make the points rectilinear
|
||||
self.vlog('Transforming points to unit coords',n=1,time=True)
|
||||
rpoints = np.dot(points,np.linalg.inv(axes))
|
||||
|
||||
# search for layers in each dimension
|
||||
def xlayers(xpoints,tol):
|
||||
xmin = xpoints.min()
|
||||
xmax = xpoints.max()
|
||||
nbins = np.uint64(np.round(np.ceil((xmax-xmin+tol)/tol)))
|
||||
dx = (xmax-xmin+tol)/nbins
|
||||
layers = obj()
|
||||
for x in xpoints:
|
||||
n = np.uint64(x/dx)
|
||||
if n not in layers:
|
||||
layers[n] = obj(xsum=x,nsum=1)
|
||||
else:
|
||||
l = layers[n]
|
||||
l.xsum += x
|
||||
l.nsum += 1
|
||||
#end if
|
||||
#end for
|
||||
for l in layers:
|
||||
l.xmean = l.xsum/l.nsum
|
||||
#end for
|
||||
lprev = None
|
||||
for n in sorted(layers.keys()):
|
||||
l = layers[n]
|
||||
if lprev is not None and np.abs(l.xmean-lprev.xmean)<tol:
|
||||
lprev.xsum += l.xsum
|
||||
lprev.nsum += l.nsum
|
||||
lprev.xmean = lprev.xsum/lprev.nsum
|
||||
del layers[n]
|
||||
else:
|
||||
lprev = l
|
||||
#end if
|
||||
#end for
|
||||
xlayers = np.empty((len(layers),),dtype=float)
|
||||
i = 0
|
||||
for n in sorted(layers.keys()):
|
||||
l = layers[n]
|
||||
xlayers[i] = l.xmean
|
||||
i += 1
|
||||
#end for
|
||||
order = xlayers.argsort()
|
||||
xlayers = xlayers[order]
|
||||
return xlayers,xmin,xmax
|
||||
#end def xlayers
|
||||
def index_by_layer(xpoints,tol):
|
||||
xlayer,xmin,xmax = xlayers(xpoints,tol)
|
||||
dxlayer = xlayer[1:]-xlayer[:-1]
|
||||
dxmin = dxlayer.min()
|
||||
dxmax = dxlayer.max()
|
||||
if np.abs(dxmax-dxmin)>2*tol:
|
||||
error('Could not determine layer separation.\nLayers are not evenly spaced.\nMin layer spacing: {}\nMax layer spacing: {}\nSpread : {}\nTolerance: {}'.format(dxmin,dxmax,dxmax-dxmin,2*tol),'read_from_points')
|
||||
#end if
|
||||
dx = dxlayer.mean()
|
||||
ipoints = np.array(np.around((xpoints-xmin)/dx),dtype=int)
|
||||
return ipoints,xmin,xmax
|
||||
#end def index_by_layer
|
||||
|
||||
# create a grid consistent with the detected layer separations
|
||||
self.vlog('Initializing point index array',n=1,time=True)
|
||||
grid_shape = np.empty((D, ),dtype=int )
|
||||
grid_axes = np.zeros((D,D),dtype=float)
|
||||
grid_corner = np.empty((D, ),dtype=float)
|
||||
ipoints = np.empty((N,D),dtype=int)
|
||||
for d in range(D):
|
||||
self.vlog('Indexing points along dim {}'.format(d),n=2,time=True)
|
||||
ixpoints,xmin,xmax = index_by_layer(rpoints[:,d],tol)
|
||||
grid_shape[d] = ixpoints.max()+1
|
||||
grid_axes[d,d] = xmax-xmin
|
||||
grid_corner[d] = xmin
|
||||
ipoints[:,d] = ixpoints
|
||||
#end for
|
||||
grid_axes = np.dot(grid_axes,axes)
|
||||
grid_corner = np.dot(grid_corner,axes)
|
||||
grid_bconds = D*('o',) # assumed for now
|
||||
|
||||
self.vlog('Constructing regular bounding grid',n=1,time=True)
|
||||
grid = self.grid_class(
|
||||
shape = grid_shape,
|
||||
axes = grid_axes,
|
||||
corner = grid_corner,
|
||||
bconds = grid_bconds,
|
||||
centered = False,
|
||||
)
|
||||
|
||||
self.vlog('Checking grid point mapping',n=1,time=True)
|
||||
# check that the generated grid contains the inputted points
|
||||
ipflat = grid.flat_indices(ipoints)
|
||||
dmax = np.linalg.norm(points-grid.points[ipflat],axis=1).max()
|
||||
if dmax>tol:
|
||||
self.error('Generated grid points do not match those read in.\nMaximum deviation: {}\nTolerance : {}'.format(dmax,tol))
|
||||
#end if
|
||||
# count number of times each grid point is mapped to
|
||||
point_counts = np.bincount(ipflat,minlength=grid.npoints)
|
||||
# if not averaging, check for one-to-one mapping
|
||||
max_count = point_counts.max()
|
||||
if not average and max_count>1:
|
||||
self.error('Mapping to grid points is not one-to-one.\nMax no. of read points mapped to a grid point: {}'.format(max_count))
|
||||
#end if
|
||||
|
||||
# map the inputted values onto the generated grid
|
||||
self.vlog('Mapping data values onto grid',n=1,time=True)
|
||||
grid_values = np.zeros((grid.npoints,values.shape[1]),dtype=float)
|
||||
if not average or max_count==1:
|
||||
grid_values[ipflat] = values
|
||||
else:
|
||||
self.vlog('Averaging multi-valued points',n=2,time=True)
|
||||
for i,v in zip(ipflat,values):
|
||||
grid_values[i] += v
|
||||
#end for
|
||||
for i,c in enumerate(point_counts):
|
||||
if c>1:
|
||||
grid_values[i] /= c
|
||||
#end if
|
||||
#end for
|
||||
#end if
|
||||
|
||||
# initialize the GridFunction object
|
||||
self.vlog('Constructing GridFunction object',n=1,time=True)
|
||||
self.reset()
|
||||
self.initialize(
|
||||
grid = grid,
|
||||
values = grid_values,
|
||||
copy = False,
|
||||
)
|
||||
|
||||
self.vlog('Read complete',n=1,time=True)
|
||||
#end def read_from_points
|
||||
|
||||
#end class ParallelotopeGridFunction
|
||||
|
||||
|
||||
|
@ -3710,6 +4299,145 @@ class SpheroidSurfaceGridFunction(StructuredGridFunctionWithAxes):
|
|||
|
||||
|
||||
|
||||
# test needed
|
||||
def parallelotope_grid_function(
|
||||
loc = 'parallelotope_grid_function',
|
||||
**kwargs
|
||||
):
|
||||
if 'points' not in kwargs:
|
||||
gf = ParallelotopeGridFunction(**kwargs)
|
||||
else:
|
||||
required = set(('points','values','axes'))
|
||||
optional = set(('tol','average'))
|
||||
present = set(kwargs.keys())
|
||||
if len(required-present)>0:
|
||||
error('Grid function cannot be created.\nWhen "points" is provided, "axes" and "values" must also be given.\nInputs provided: {}'.format(sorted(present)),loc)
|
||||
elif len(present-required-optional)>0:
|
||||
error('Grid function cannot be created.\nUnrecognized inputs provided.\nUnrecognized inputs: {}\nValid options are: {}'.format(sorted(present-required-optional),sorted(required|optional)))
|
||||
#end if
|
||||
gf = ParallelotopeGridFunction()
|
||||
gf.read_from_points(**kwargs)
|
||||
#end if
|
||||
return gf
|
||||
#end def parallelotope_grid_function
|
||||
|
||||
|
||||
|
||||
# test needed
|
||||
def grid_function(
|
||||
type = 'parallelotope',
|
||||
loc = 'grid_function',
|
||||
**kwargs
|
||||
):
|
||||
filepath = kwargs.pop('filepath',None)
|
||||
if filepath is not None:
|
||||
return read_grid_function(filepath,loc=loc)
|
||||
#end if
|
||||
gf = None
|
||||
if type=='parallelotope':
|
||||
gf = parallelotope_grid_function(loc=loc,**kwargs)
|
||||
elif type=='spheroid':
|
||||
gf = SpheroidGridFunction(**kwargs)
|
||||
elif type=='spheroid_surface':
|
||||
gf = SpheroidSurfaceGridFunction(**kwargs)
|
||||
else:
|
||||
error('Grid function type "{}" is not recognized.\nValid options are: parallelotope, spheroid, or spheroid_surface'.format(type),loc)
|
||||
#end if
|
||||
return gf
|
||||
#end def grid_function
|
||||
|
||||
|
||||
def grid(
|
||||
type = 'parallelotope',
|
||||
loc = 'grid',
|
||||
**kwargs
|
||||
):
|
||||
filepath = kwargs.pop('filepath',None)
|
||||
if filepath is not None:
|
||||
return read_grid(filepath,loc=loc)
|
||||
#end if
|
||||
g = None
|
||||
if type=='parallelotope':
|
||||
g = ParallelotopeGrid(**kwargs)
|
||||
elif type=='spheroid':
|
||||
g = SpheroidGrid(**kwargs)
|
||||
elif type=='spheroid_surface':
|
||||
g = SpheroidSurfaceGrid(**kwargs)
|
||||
else:
|
||||
error('Grid type "{}" is not recognized.\nValid options are: parallelotope, spheroid, or spheroid_surface'.format(type),loc)
|
||||
#end if
|
||||
return g
|
||||
#end def grid
|
||||
|
||||
|
||||
|
||||
#test needed
|
||||
gf_file_type_map = obj(
|
||||
xsf = ParallelotopeGridFunction,
|
||||
)
|
||||
def read_grid_function(filepath,format=None,loc='read_grid_function'):
|
||||
filepath,format = process_file_format(filepath,format,loc)
|
||||
if format not in gf_file_type_map:
|
||||
error('Cannot read file.\nUnrecognized file format for grid function.\nFile format provided: {}\nAllowed formats include: {}'.format(format,sorted(gf_file_type_map.keys())),loc)
|
||||
#end if
|
||||
gf = gf_file_type_map[format]()
|
||||
gf.read(filepath)
|
||||
return gf
|
||||
#end def read_grid_function
|
||||
|
||||
|
||||
# test needed
|
||||
g_file_type_map = obj(
|
||||
xsf = ParallelotopeGrid,
|
||||
)
|
||||
def read_grid(filepath,format=None,loc='read_grid'):
|
||||
filepath,format = process_file_format(filepath,format,loc)
|
||||
if format not in g_file_type_map:
|
||||
error('Cannot read file.\nUnrecognized file format for grid.\nFile format provided: {}\nAllowed formats include: {}'.format(format,sorted(g_file_type_map.keys())),loc)
|
||||
#end if
|
||||
g = g_file_type_map[format]()
|
||||
g.read(filepath)
|
||||
return g
|
||||
#end def read_grid
|
||||
|
||||
|
||||
def process_file_format(filepath,format,loc):
|
||||
if isinstance(filepath,StandardFile):
|
||||
format = filepath.sftype
|
||||
elif not isinstance(filepath,str):
|
||||
error('Cannot read file.\nExpected a file path.\nInstead received type: {}\nWith value: {}\nPlease provide a file path and try again.'.format(filepath.__class__.__name__,filepath),loc)
|
||||
elif not os.path.exists(filepath):
|
||||
error('Cannot read file. File path does not exist.\nFile path provided: {}'.format(filepath),loc)
|
||||
#end if
|
||||
if format is None:
|
||||
format = filepath.rsplit('.',1)[1].lower()
|
||||
#end if
|
||||
return filepath,format
|
||||
#end def process_file_format
|
||||
|
||||
|
||||
|
||||
gfs = [
|
||||
ParallelotopeGridFunction,
|
||||
SpheroidGridFunction,
|
||||
SpheroidSurfaceGridFunction,
|
||||
]
|
||||
grid_to_grid_function = obj()
|
||||
for gf in gfs:
|
||||
grid_to_grid_function[gf.grid_class.__name__] = gf
|
||||
#end for
|
||||
del gfs
|
||||
|
||||
def grid_function_from_grid(grid):
|
||||
gname = grid.__class__.__name__
|
||||
if gname not in grid_to_grid_function:
|
||||
error('Cannot find matching grid function for grid "{}".'.format(gname))
|
||||
#end if
|
||||
return grid_to_grid_function[gname]
|
||||
#end def grid_function_from_grid
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
|
||||
|
|
|
@ -1323,3 +1323,151 @@ def convex_hull(points,dimension=None,tol=None):
|
|||
verts = list(set(verts))
|
||||
return verts
|
||||
#end def convex_hull
|
||||
|
||||
|
||||
|
||||
|
||||
def layers_1d(xpoints,tol,xmin=None,xmax=None,merge=True,periodic=False,full_return=False):
|
||||
|
||||
# Update inputs to be consistent with periodic merge, if requested
|
||||
if merge and periodic:
|
||||
if xmax is None:
|
||||
error('"xmax" must be provided.','layers_1d')
|
||||
elif xmin is None:
|
||||
xmin = 0.0
|
||||
#end if
|
||||
#end if
|
||||
|
||||
# Setup a virtual fine grid along x with grid cell width of tol
|
||||
if xmin is None:
|
||||
xmin = xpoints.min()
|
||||
#end if
|
||||
if xmax is None:
|
||||
xmax = xpoints.max()
|
||||
#end if
|
||||
nbins = np.uint64(np.round(np.ceil((xmax-xmin+tol)/tol)))
|
||||
dx = (xmax-xmin+tol)/nbins
|
||||
|
||||
# Find the points belonging to each grid cell/layer
|
||||
layers = obj()
|
||||
for i,x in enumerate(xpoints):
|
||||
n = np.uint64(x/dx)
|
||||
if n not in layers:
|
||||
layers[n] = obj(ilist=[i],xsum=x,nsum=1)
|
||||
else:
|
||||
l = layers[n]
|
||||
l.ilist.append(i)
|
||||
l.xsum += x
|
||||
l.nsum += 1
|
||||
#end if
|
||||
#end for
|
||||
|
||||
# Find the mean of each set of points
|
||||
for l in layers:
|
||||
l.xmean = l.xsum/l.nsum
|
||||
#end for
|
||||
|
||||
# Merge neighboring layers if the means are within the tolerance
|
||||
if merge:
|
||||
lprev = None
|
||||
for n in sorted(layers.keys()):
|
||||
l = layers[n]
|
||||
if lprev is not None and np.abs(l.xmean-lprev.xmean)<tol:
|
||||
lprev.ilist.extend(l.ilist)
|
||||
lprev.xsum += l.xsum
|
||||
lprev.nsum += l.nsum
|
||||
lprev.xmean = lprev.xsum/lprev.nsum
|
||||
del layers[n]
|
||||
else:
|
||||
lprev = l
|
||||
#end if
|
||||
#end for
|
||||
|
||||
# Merge around periodic boundary
|
||||
if periodic:
|
||||
nleft = 0
|
||||
nright = nbins-1
|
||||
if nleft in layers and nright in layers:
|
||||
ll = layers[nleft]
|
||||
lr = layers[nright]
|
||||
L = xmax-xmin
|
||||
if np.abs(ll.xmean + L - lr.xmean)<tol:
|
||||
ll.ilist.extend(lr.ilist)
|
||||
ll.xsum += lr.xsum
|
||||
ll.nsum += lr.nsum
|
||||
ll.xmean = ll.xsum/ll.nsum
|
||||
del layers[nright]
|
||||
#end if
|
||||
#end if
|
||||
#end if
|
||||
#end if
|
||||
|
||||
if not full_return:
|
||||
return layers
|
||||
else:
|
||||
return layers,xmin,xmax
|
||||
#end if
|
||||
#end def layers_1d
|
||||
|
||||
|
||||
|
||||
def layer_means_1d(xpoints,tol,full_return=False):
|
||||
# Get layer data
|
||||
layers,xmin,xmax = layers_1d(xpoints,tol,full_return=True)
|
||||
|
||||
# Extract and sort layer means
|
||||
xlayers = np.empty((len(layers),),dtype=float)
|
||||
i = 0
|
||||
for n in sorted(layers.keys()):
|
||||
l = layers[n]
|
||||
xlayers[i] = l.xmean
|
||||
i += 1
|
||||
#end for
|
||||
xlayers.sort()
|
||||
|
||||
if not full_return:
|
||||
return xlayers
|
||||
else:
|
||||
return xlayers,xmin,xmax
|
||||
#end if
|
||||
#end def layer_means_1d
|
||||
|
||||
|
||||
|
||||
def index_by_layer_1d(xpoints,tol,uniform=True,check=True,full_return=False):
|
||||
# Get layer means
|
||||
xlayer,xmin,xmax = layer_means_1d(xpoints,tol,full_return=True)
|
||||
|
||||
# Get layer separations
|
||||
dxlayer = xlayer[1:]-xlayer[:-1]
|
||||
|
||||
# Find appropriate layer separation for indexing
|
||||
if uniform:
|
||||
dxmin = dxlayer.min()
|
||||
dxmax = dxlayer.max()
|
||||
if np.abs(dxmax-dxmin)>2*tol:
|
||||
error('Could not determine layer separation.\nLayers are not evenly spaced.\nMin layer spacing: {}\nMax layer spacing: {}\nSpread : {}\nTolerance: {}'.format(dxmin,dxmax,dxmax-dxmin,2*tol),'index_by_layer_1d')
|
||||
#end if
|
||||
dx = dxlayer.mean()
|
||||
else:
|
||||
dx = dxlayer.min()
|
||||
#end if
|
||||
|
||||
# Find indices for each layer
|
||||
ipoints = np.array(np.around((xpoints-xmin)/dx),dtype=int)
|
||||
|
||||
# Check the layer indices, if requested
|
||||
if check:
|
||||
if np.abs(ipoints*dx+xmin-xpoints).max()>3*tol: # Tolerance accounts for merge
|
||||
error('Layer indexing failed.','index_by_layer_1d')
|
||||
#end if
|
||||
#end if
|
||||
|
||||
if not full_return:
|
||||
return ipoints
|
||||
else:
|
||||
return ipoints,xmin,xmax
|
||||
#end if
|
||||
#end def index_by_layer
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2992,6 +2992,52 @@ class Structure(Sobj):
|
|||
#end def voronoi_neighbors
|
||||
|
||||
|
||||
def voronoi_vectors(self,indices=None,restrict=None):
|
||||
ni = self.voronoi_neighbors(indices,restrict)
|
||||
vt = self.vector_table()
|
||||
vv = obj()
|
||||
for i,vi in ni.items():
|
||||
vv[i] = vt[i,vi]
|
||||
#end for
|
||||
return vv
|
||||
#end def voronoi_vectors
|
||||
|
||||
|
||||
def voronoi_distances(self,indices=None,restrict=False):
|
||||
vv = self.voronoi_vectors(indices,restrict)
|
||||
vd = obj()
|
||||
for i,vvi in vv.items():
|
||||
vd[i] = np.linalg.norm(vvi,axis=1)
|
||||
#end for
|
||||
return vd
|
||||
#end def voronoi_distances
|
||||
|
||||
|
||||
def voronoi_radii(self,indices=None,restrict=None):
|
||||
vd = self.voronoi_distances(indices,restrict)
|
||||
vr = obj()
|
||||
for i,vdi in vd.items():
|
||||
vr[i] = vdi.min()/2
|
||||
#end for
|
||||
return vr
|
||||
#end def voronoi_radii
|
||||
|
||||
|
||||
def voronoi_species_radii(self):
|
||||
vr = self.voronoi_radii()
|
||||
vsr = obj()
|
||||
for i,r in vr.items():
|
||||
e = self.elem[i]
|
||||
if e not in vsr:
|
||||
vsr[e] = r
|
||||
else:
|
||||
vsr[e] = min(vsr[e],r)
|
||||
#end if
|
||||
#end for
|
||||
return vsr
|
||||
#end def voronoi_species_radii
|
||||
|
||||
|
||||
# test needed
|
||||
# get nearest neighbors according to constrants (voronoi, max distance, coord. number)
|
||||
def nearest_neighbors(self,indices=None,rmax=None,nmax=None,restrict=False,voronoi=False,distances=False,**spec_max):
|
||||
|
@ -4626,7 +4672,9 @@ class Structure(Sobj):
|
|||
|
||||
|
||||
def read_xsf(self,filepath):
|
||||
if os.path.exists(filepath):
|
||||
if isinstance(filepath,XsfFile):
|
||||
f = filepath
|
||||
elif os.path.exists(filepath):
|
||||
f = XsfFile(filepath)
|
||||
else:
|
||||
f = XsfFile()
|
||||
|
@ -4634,7 +4682,11 @@ class Structure(Sobj):
|
|||
#end if
|
||||
elem = []
|
||||
for n in f.elem:
|
||||
elem.append(pt.simple_elements[n].symbol)
|
||||
if isinstance(n,str):
|
||||
elem.append(n)
|
||||
else:
|
||||
elem.append(pt.simple_elements[n].symbol)
|
||||
#end if
|
||||
#end for
|
||||
self.dim = 3
|
||||
self.units = 'A'
|
||||
|
@ -5060,6 +5112,155 @@ class Structure(Sobj):
|
|||
cell = (lattice,positions,numbers)
|
||||
return cell
|
||||
#end def spglib_cell
|
||||
|
||||
|
||||
def get_symmetry(self,symprec=1e-5):
|
||||
cell = self.spglib_cell()
|
||||
return spglib.get_symmetry(cell,symprec=symprec)
|
||||
#end def get_symmetry
|
||||
|
||||
|
||||
def get_symmetry_dataset(self,symprec=1e-5, angle_tolerance=-1.0, hall_number=0):
|
||||
cell = self.spglib_cell()
|
||||
ds = spglib.get_symmetry_dataset(
|
||||
cell,
|
||||
symprec = symprec,
|
||||
angle_tolerance = angle_tolerance,
|
||||
hall_number = hall_number,
|
||||
)
|
||||
return ds
|
||||
#end def get_symmetry
|
||||
|
||||
|
||||
# functions based on direct spglib interface
|
||||
|
||||
def symmetry_data(self,*args,**kwargs):
|
||||
ds = self.get_symmetry_dataset(*args,**kwargs)
|
||||
ds = obj(ds)
|
||||
for k,v in ds.items():
|
||||
if isinstance(v,dict):
|
||||
ds[k] = obj(v)
|
||||
#end if
|
||||
#end for
|
||||
return ds
|
||||
#end def symmetry_data
|
||||
|
||||
|
||||
# test needed
|
||||
def space_group_operations(self,tol=1e-5,unit=False):
|
||||
ds = self.get_symmetry(symprec=tol)
|
||||
if ds is None:
|
||||
self.error('Symmetry search failed.\nspglib error message:\n{}'.format(spglib.get_error_message()))
|
||||
#end if
|
||||
ds = obj(ds)
|
||||
rotations = ds.rotations
|
||||
translations = ds.translations
|
||||
|
||||
if not unit:
|
||||
# Transform to Cartesian
|
||||
axes = self.axes
|
||||
axinv = np.linalg.inv(axes)
|
||||
for n,(R,t) in enumerate(zip(rotations,translations)):
|
||||
rotations[n] = np.dot(axinv,np.dot(R,axes))
|
||||
translations[n] = np.dot(t,axes)
|
||||
#end for
|
||||
#end if
|
||||
|
||||
return rotations,translations
|
||||
#end def space_group_operations
|
||||
|
||||
|
||||
def point_group_operations(self,tol=1e-5,unit=False):
|
||||
rotations,translations = self.space_group_operations(tol=tol,unit=unit)
|
||||
no_trans = translations.max(axis=1) < tol
|
||||
return rotations[no_trans]
|
||||
#end def point_group_operations
|
||||
|
||||
|
||||
def check_point_group_operations(self,rotations=None,tol=1e-5,unit=False,dtol=1e-5,ncheck=1,exit=False):
|
||||
if rotations is None:
|
||||
rotations = self.point_group_operations(tol=tol,unit=unit)
|
||||
#ned if
|
||||
r = self.pos
|
||||
if ncheck=='all':
|
||||
ncheck = len(r)
|
||||
#end if
|
||||
all_same = True
|
||||
for n in range(ncheck):
|
||||
rc = r[n]
|
||||
for R in rotations:
|
||||
rp = np.dot(r-rc,R)+rc
|
||||
dt = self.min_image_distances(r,rp)
|
||||
same = True
|
||||
for d in dt:
|
||||
same &= dt.min()<dtol
|
||||
#end for
|
||||
all_same &= same
|
||||
#end for
|
||||
#end for
|
||||
if not all_same and exit:
|
||||
self.error('Point group operators are not all symmetries of the structure.')
|
||||
#end if
|
||||
return all_same
|
||||
#end def check_point_group_operations
|
||||
|
||||
|
||||
def equivalent_atoms(self):
|
||||
ds = self.symmetry_data()
|
||||
|
||||
# collect sets of species labels
|
||||
species_by_specnum = obj()
|
||||
for e,sn in zip(self.elem,ds.equivalent_atoms):
|
||||
is_elem,es = is_element(e,symbol=True)
|
||||
if sn not in species_by_specnum:
|
||||
species_by_specnum[sn] = set()
|
||||
#end if
|
||||
species_by_specnum[sn].add(es)
|
||||
#end for
|
||||
for sn,sset in species_by_specnum.items():
|
||||
if len(sset)>1:
|
||||
self.error('Cannot find equivalent atoms.\nMultiple atomic species were marked as being equivalent.\nSpecies marked in this way: {}'.format(list(sset)))
|
||||
#end if
|
||||
species_by_specnum[sn] = list(sset)[0]
|
||||
#end for
|
||||
|
||||
# give each unique species a unique label
|
||||
labels_by_specnum = obj()
|
||||
species_list = list(species_by_specnum.values())
|
||||
species_set = set(species_list)
|
||||
species_counts = obj()
|
||||
for s in species_set:
|
||||
species_counts[s] = species_list.count(s)
|
||||
#end for
|
||||
spec_counts = obj()
|
||||
for sn,s in species_by_specnum.items():
|
||||
if species_counts[s]==1:
|
||||
labels_by_specnum[sn] = s
|
||||
else:
|
||||
if s not in spec_counts:
|
||||
spec_counts[s] = 1
|
||||
else:
|
||||
spec_counts[s] += 1
|
||||
#end if
|
||||
labels_by_specnum[sn] = s + str(spec_counts[s])
|
||||
#end if
|
||||
#end for
|
||||
|
||||
# find indices for each unique species
|
||||
equiv_indices = obj()
|
||||
for s in labels_by_specnum.values():
|
||||
equiv_indices[s] = list()
|
||||
#end for
|
||||
for i,sn in enumerate(ds.equivalent_atoms):
|
||||
equiv_indices[labels_by_specnum[sn]].append(i)
|
||||
#end for
|
||||
for s,indices in equiv_indices.items():
|
||||
equiv_indices[s] = np.array(indices,dtype=int)
|
||||
#end for
|
||||
|
||||
return equiv_indices
|
||||
#end def equivalent_atoms
|
||||
|
||||
#end class Structure
|
||||
Structure.set_operations()
|
||||
|
||||
|
@ -5728,8 +5929,8 @@ class Crystal(Structure):
|
|||
)
|
||||
|
||||
lattice_centerings = obj(
|
||||
triclinic = ['P'],
|
||||
monoclinic = ['P','A','B','C'],
|
||||
triclinic = ['P'],
|
||||
monoclinic = ['P','A','B','C'],
|
||||
orthorhombic = ['P','C','I','F'],
|
||||
tetragonal = ['P','I'],
|
||||
hexagonal = ['P','R'],
|
||||
|
|
|
@ -202,9 +202,10 @@ def text_diff(t1,t2,atol=def_atol,rtol=def_rtol,int_as_float=False,full=False,by
|
|||
# print the difference between two objects
|
||||
def print_diff(o1,o2,atol=def_atol,rtol=def_rtol,int_as_float=False,text=False,by_line=False): # used in debugging, not actual tests
|
||||
from generic import obj
|
||||
print(20*'=')
|
||||
hline = '========== {} =========='
|
||||
print(hline.format('left object'))
|
||||
print(o1)
|
||||
print(20*'=')
|
||||
print(hline.format('right object'))
|
||||
print(o2)
|
||||
if not text:
|
||||
diff,diff1,diff2 = object_diff(o1,o2,atol,rtol,int_as_float,full=True)
|
||||
|
@ -213,13 +214,24 @@ def print_diff(o1,o2,atol=def_atol,rtol=def_rtol,int_as_float=False,text=False,b
|
|||
#end if
|
||||
d1 = obj(diff1)
|
||||
d2 = obj(diff2)
|
||||
print(20*'=')
|
||||
print(hline.format('left diff'))
|
||||
print(d1)
|
||||
print(20*'=')
|
||||
print(hline.format('right diff'))
|
||||
print(d2)
|
||||
#end def print_diff
|
||||
|
||||
|
||||
# check for object equality and if different, print the difference
|
||||
def check_object_eq(o1,o2):
|
||||
same = object_eq(o1,o2)
|
||||
if not same and global_data['verbose']:
|
||||
print('\nObjects differ, please see below for details')
|
||||
print_diff(o1,o2)
|
||||
#end if
|
||||
return same
|
||||
#end def check_object_eq
|
||||
|
||||
|
||||
|
||||
# additional convenience functions to use value_diff and object_diff
|
||||
value_neq = value_diff
|
||||
|
@ -586,6 +598,7 @@ class FailedTest(Exception):
|
|||
|
||||
|
||||
global_data = dict(
|
||||
verbose = False,
|
||||
job_ref_table = False,
|
||||
)
|
||||
|
||||
|
|
|
@ -0,0 +1,404 @@
|
|||
|
||||
import testing
|
||||
from testing import value_eq,object_eq,text_eq,check_object_eq
|
||||
from testing import FailedTest,failed
|
||||
|
||||
|
||||
def test_import():
|
||||
import observables
|
||||
from observables import AttributeProperties,DefinedAttributeBase
|
||||
from observables import Observable
|
||||
from observables import MomentumDistribution
|
||||
#end def test_import
|
||||
|
||||
|
||||
|
||||
def test_defined_attribute_base():
|
||||
from generic import obj,NexusError
|
||||
from observables import AttributeProperties,DefinedAttributeBase
|
||||
|
||||
# empty init
|
||||
p = AttributeProperties()
|
||||
o = DefinedAttributeBase()
|
||||
|
||||
pref = obj(
|
||||
assigned = set(),
|
||||
deepcopy = False,
|
||||
default = None,
|
||||
dest = None,
|
||||
name = None,
|
||||
no_default = False,
|
||||
required = False,
|
||||
type = None,
|
||||
)
|
||||
assert(check_object_eq(p,pref))
|
||||
assert(len(o)==0)
|
||||
|
||||
# init
|
||||
p = AttributeProperties(
|
||||
default = 2,
|
||||
dest = 'nest',
|
||||
deepcopy = True,
|
||||
)
|
||||
|
||||
pref = obj(
|
||||
assigned = {'dest', 'default', 'deepcopy'},
|
||||
deepcopy = True,
|
||||
default = 2,
|
||||
dest = 'nest',
|
||||
name = None,
|
||||
no_default = False,
|
||||
required = False,
|
||||
type = None,
|
||||
)
|
||||
assert(check_object_eq(p,pref))
|
||||
|
||||
|
||||
# define attributes
|
||||
class DA(DefinedAttributeBase):
|
||||
None
|
||||
#end class DA
|
||||
|
||||
da_attributes = obj(
|
||||
a = obj(
|
||||
default = 1,
|
||||
),
|
||||
b = obj(
|
||||
default = 2,
|
||||
type = int,
|
||||
required = True,
|
||||
),
|
||||
c = obj(
|
||||
dest = 'nest',
|
||||
type = str,
|
||||
),
|
||||
d = obj(
|
||||
type = dict,
|
||||
deepcopy = True,
|
||||
no_default = True,
|
||||
),
|
||||
nest = obj(
|
||||
type = obj,
|
||||
default = obj,
|
||||
),
|
||||
)
|
||||
|
||||
DA.define_attributes(**da_attributes)
|
||||
|
||||
def get_class_dict(cls):
|
||||
o = obj()
|
||||
o.transfer_from(cls.__dict__)
|
||||
for k in list(o.keys()):
|
||||
if k.startswith('_'):
|
||||
del o[k]
|
||||
#end if
|
||||
#end for
|
||||
return o
|
||||
#end def get_class_dict
|
||||
|
||||
o = get_class_dict(DA)
|
||||
|
||||
oref = obj(
|
||||
deepcopy_attributes = {'d'},
|
||||
required_attributes = {'b'},
|
||||
sublevel_attributes = {'c'},
|
||||
toplevel_attributes = {'d', 'a', 'b', 'nest'},
|
||||
typed_attributes = {'d', 'c', 'b', 'nest'},
|
||||
attribute_definitions = obj(
|
||||
a = obj(
|
||||
assigned = {'default'},
|
||||
deepcopy = False,
|
||||
default = 1,
|
||||
dest = None,
|
||||
name = 'a',
|
||||
no_default = False,
|
||||
required = False,
|
||||
type = None,
|
||||
),
|
||||
b = obj(
|
||||
assigned = {'required', 'default', 'type'},
|
||||
deepcopy = False,
|
||||
default = 2,
|
||||
dest = None,
|
||||
name = 'b',
|
||||
no_default = False,
|
||||
required = True,
|
||||
type = int,
|
||||
),
|
||||
c = obj(
|
||||
assigned = {'dest', 'type'},
|
||||
deepcopy = False,
|
||||
default = None,
|
||||
dest = 'nest',
|
||||
name = 'c',
|
||||
no_default = False,
|
||||
required = False,
|
||||
type = str,
|
||||
),
|
||||
d = obj(
|
||||
assigned = {'deepcopy', 'no_default', 'type'},
|
||||
deepcopy = True,
|
||||
default = None,
|
||||
dest = None,
|
||||
name = 'd',
|
||||
no_default = True,
|
||||
required = False,
|
||||
type = dict,
|
||||
),
|
||||
nest = obj(
|
||||
assigned = {'default', 'type'},
|
||||
deepcopy = False,
|
||||
default = obj,
|
||||
no_default = False,
|
||||
dest = None,
|
||||
name = 'nest',
|
||||
required = False,
|
||||
type = obj,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
assert(check_object_eq(o,oref))
|
||||
|
||||
|
||||
class DA2(DA):
|
||||
None
|
||||
#end class DA2
|
||||
|
||||
DA2.define_attributes(
|
||||
DA,
|
||||
e = obj(
|
||||
required = True,
|
||||
)
|
||||
)
|
||||
|
||||
o = get_class_dict(DA)
|
||||
assert(check_object_eq(o,oref))
|
||||
|
||||
o2 = get_class_dict(DA2)
|
||||
oref.required_attributes.add('e')
|
||||
oref.toplevel_attributes.add('e')
|
||||
oref.attribute_definitions.e = obj(
|
||||
assigned = {'required'},
|
||||
deepcopy = False,
|
||||
default = None,
|
||||
dest = None,
|
||||
name = 'e',
|
||||
no_default = False,
|
||||
required = True,
|
||||
type = None,
|
||||
)
|
||||
assert(check_object_eq(o2,oref))
|
||||
|
||||
|
||||
# empty init
|
||||
da = DA()
|
||||
assert(len(da)==0)
|
||||
assert(not da.check_attributes())
|
||||
|
||||
|
||||
# set_default_attributes
|
||||
da.set_default_attributes()
|
||||
|
||||
da_ref = obj(
|
||||
a = 1,
|
||||
b = 2,
|
||||
nest = obj(
|
||||
c = None,
|
||||
),
|
||||
)
|
||||
|
||||
assert(check_object_eq(da,da_ref))
|
||||
assert(not da.check_attributes())
|
||||
|
||||
|
||||
# set_attributes/init
|
||||
da = DA(
|
||||
a = 2,
|
||||
b = 3,
|
||||
c = 'hi',
|
||||
d = dict(a=2),
|
||||
)
|
||||
|
||||
da_ref = obj(
|
||||
a = 2,
|
||||
b = 3,
|
||||
d = {'a': 2},
|
||||
nest = obj(
|
||||
c = 'hi',
|
||||
),
|
||||
)
|
||||
|
||||
assert(check_object_eq(da,da_ref))
|
||||
assert(da.check_attributes())
|
||||
|
||||
|
||||
# set_attribute
|
||||
da = DA()
|
||||
|
||||
assert('b' not in da)
|
||||
da.set_attribute('b',5)
|
||||
assert('b' in da)
|
||||
assert(da.b==5)
|
||||
|
||||
try:
|
||||
da.set_attribute('unknown',None)
|
||||
raise FailedTest
|
||||
except NexusError:
|
||||
None
|
||||
except FailedTest:
|
||||
failed()
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
#end try
|
||||
|
||||
da.set_attribute('b',3)
|
||||
assert(da.b==3)
|
||||
|
||||
try:
|
||||
da.set_attribute('b',3.5)
|
||||
raise FailedTest
|
||||
except NexusError:
|
||||
None
|
||||
except FailedTest:
|
||||
failed()
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
#end try
|
||||
|
||||
|
||||
# get_attribute
|
||||
da = DA()
|
||||
|
||||
da.set_attribute('b',5)
|
||||
assert('b' in da)
|
||||
assert(da.b==5)
|
||||
|
||||
assert(da.get_attribute('b')==5)
|
||||
assert(da.get_attribute('a',None) is None)
|
||||
assert(da.get_attribute('c',None) is None)
|
||||
|
||||
try:
|
||||
da.get_attribute('unknown')
|
||||
raise FailedTest
|
||||
except NexusError:
|
||||
None
|
||||
except FailedTest:
|
||||
failed()
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
#end try
|
||||
|
||||
try:
|
||||
da.get_attribute('a')
|
||||
raise FailedTest
|
||||
except NexusError:
|
||||
None
|
||||
except FailedTest:
|
||||
failed()
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
#end try
|
||||
|
||||
try:
|
||||
da.get_attribute('c')
|
||||
raise FailedTest
|
||||
except NexusError:
|
||||
None
|
||||
except FailedTest:
|
||||
failed()
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
#end try
|
||||
|
||||
|
||||
# default values
|
||||
class DA_def(DefinedAttributeBase):
|
||||
None
|
||||
#end class DA_def
|
||||
DA_def.set_unassigned_default(None)
|
||||
|
||||
class DA_def2(DA_def):
|
||||
None
|
||||
#end class DA_def2
|
||||
|
||||
DA_def2.define_attributes(**da_attributes)
|
||||
|
||||
assert(not DefinedAttributeBase.class_has('unassigned_default'))
|
||||
assert(DA_def.class_has('unassigned_default'))
|
||||
assert(DA_def2.class_has('unassigned_default'))
|
||||
assert(DA_def.unassigned_default is None)
|
||||
assert(DA_def2.unassigned_default is None)
|
||||
|
||||
da = DA_def2()
|
||||
assert('a' not in da)
|
||||
assert('c' not in da)
|
||||
assert('nest' not in da)
|
||||
assert(da.get_attribute('a',None) is None)
|
||||
assert(da.get_attribute('c',None) is None)
|
||||
|
||||
try:
|
||||
da.get_attribute('a')
|
||||
raise FailedTest
|
||||
except NexusError:
|
||||
None
|
||||
except FailedTest:
|
||||
failed()
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
#end try
|
||||
|
||||
try:
|
||||
da.get_attribute('c')
|
||||
raise FailedTest
|
||||
except NexusError:
|
||||
None
|
||||
except FailedTest:
|
||||
failed()
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
#end try
|
||||
|
||||
da.set_default_attributes()
|
||||
|
||||
da_ref = obj(
|
||||
a = 1,
|
||||
b = 2,
|
||||
nest = obj(
|
||||
c = None,
|
||||
)
|
||||
)
|
||||
|
||||
assert(check_object_eq(da,da_ref))
|
||||
|
||||
da.b = None
|
||||
|
||||
assert(da.get_attribute('a')==1)
|
||||
assert(da.get_attribute('b',assigned=False) is None)
|
||||
assert(da.get_attribute('c',assigned=False) is None)
|
||||
assert(da.get_attribute('a',2)==1)
|
||||
assert(da.get_attribute('a',assigned=False)==1)
|
||||
|
||||
try:
|
||||
da.get_attribute('b')
|
||||
raise FailedTest
|
||||
except NexusError:
|
||||
None
|
||||
except FailedTest:
|
||||
failed()
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
#end try
|
||||
|
||||
try:
|
||||
da.get_attribute('c')
|
||||
raise FailedTest
|
||||
except NexusError:
|
||||
None
|
||||
except FailedTest:
|
||||
failed()
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
#end try
|
||||
|
||||
#end def test_defined_attribute_base
|
|
@ -1457,3 +1457,48 @@ def test_interpolate():
|
|||
assert(value_eq(Cr_positions,Cr_positions_ref))
|
||||
|
||||
#end def test_interpolate
|
||||
|
||||
|
||||
|
||||
if versions.spglib_available:
|
||||
def test_point_group_operations():
|
||||
from structure import generate_structure,Crystal
|
||||
|
||||
nrotations = dict(
|
||||
Ca2CuO3 = 8,
|
||||
CaO = 48,
|
||||
Cl2Ca2CuO2 = 16,
|
||||
CuO = 2,
|
||||
CuO2_plane = 16,
|
||||
La2CuO4 = 2,
|
||||
NaCl = 48,
|
||||
ZnO = 6,
|
||||
calcium = 48,
|
||||
copper = 48,
|
||||
diamond = 24,
|
||||
graphene = 12,
|
||||
oxygen = 4,
|
||||
rocksalt = 48,
|
||||
wurtzite = 6,
|
||||
)
|
||||
|
||||
for struct,cell in sorted(Crystal.known_crystals.keys()):
|
||||
if cell!='prim':
|
||||
continue
|
||||
#end if
|
||||
|
||||
s = generate_structure(
|
||||
structure = struct,
|
||||
cell = cell,
|
||||
)
|
||||
|
||||
rotations = s.point_group_operations()
|
||||
assert(struct in nrotations)
|
||||
assert(len(rotations)==nrotations[struct])
|
||||
|
||||
valid = s.check_point_group_operations(rotations,exit=False)
|
||||
assert(valid)
|
||||
#end for
|
||||
|
||||
#end def test_point_group_operations
|
||||
#end if
|
||||
|
|
|
@ -30,10 +30,6 @@
|
|||
#include "AFQMC/Drivers/DriverFactory.h"
|
||||
#include "AFQMC/Memory/buffer_allocators.h"
|
||||
|
||||
#include "AFQMC/Utilities/myTimer.h"
|
||||
|
||||
myTimer Timer;
|
||||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
TimerList_t AFQMCTimers;
|
||||
|
@ -86,7 +82,7 @@ bool AFQMCFactory::parse(xmlNodePtr cur)
|
|||
cur = cur->next;
|
||||
}
|
||||
|
||||
// eventually read an inital buffer size from input
|
||||
// eventually read an initial buffer size from input
|
||||
// they are initialized now with 10 MBs
|
||||
// host_buffer_type->resize(buffer_size);
|
||||
// device_buffer_type->resize(buffer_size);
|
||||
|
|
|
@ -100,7 +100,6 @@ ELSEIF(ENABLE_HIP)
|
|||
)
|
||||
SET(AFQMC_SRCS ${AFQMC_SRCS}
|
||||
Memory/HIP/hip_utilities.cpp
|
||||
Numerics/detail/HIP/hipblas_utils.cpp
|
||||
Memory/HIP/hip_arch.cpp
|
||||
Memory/HIP/hip_init.cpp)
|
||||
ELSE(ENABLE_CUDA)
|
||||
|
@ -175,3 +174,4 @@ IF (BUILD_UNIT_TESTS)
|
|||
SUBDIRS(Estimators/tests)
|
||||
ENDIF()
|
||||
|
||||
SUBDIRS(Numerics/performance)
|
||||
|
|
|
@ -172,7 +172,7 @@ bool AFQMCDriver::checkpoint(WalkerSet& wset, int block, int step)
|
|||
|
||||
if (!dumpToHDF5(wset, dump))
|
||||
{
|
||||
app_error() << " Problems writting checkpoint file in Driver/AFQMCDriver::checkpoint(). \n";
|
||||
app_error() << " Problems writing checkpoint file in Driver/AFQMCDriver::checkpoint(). \n";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include "AFQMC/Utilities/type_conversion.hpp"
|
||||
#include "AFQMC/Utilities/taskgroup.h"
|
||||
#include "AFQMC/Utilities/myTimer.h"
|
||||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "type_traits/scalar_traits.h"
|
||||
#include "AFQMC/Wavefunctions/Excitations.hpp"
|
||||
#include "AFQMC/Wavefunctions/phmsd_helpers.hpp"
|
||||
#include "AFQMC/Utilities/myTimer.h"
|
||||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
|
||||
#include "AFQMC/Utilities/type_conversion.hpp"
|
||||
#include "AFQMC/Utilities/taskgroup.h"
|
||||
#include "AFQMC/Utilities/myTimer.h"
|
||||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "AFQMC/Utilities/Utils.hpp"
|
||||
#include "AFQMC/Numerics/batched_operations.hpp"
|
||||
#include "AFQMC/Numerics/tensor_operations.hpp"
|
||||
#include "AFQMC/Utilities/myTimer.h"
|
||||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
|
|
|
@ -71,7 +71,7 @@ HamiltonianOperations RealDenseHamiltonian::getHamiltonianOperations(bool pureSD
|
|||
ndown = PsiT[1].size(0);
|
||||
int NEL = nup + ndown;
|
||||
|
||||
// distribute work over equivalent nodes in TGprop.TG() accross TG.Global()
|
||||
// distribute work over equivalent nodes in TGprop.TG() across TG.Global()
|
||||
auto Qcomm(TG.Global().split(TGprop.getLocalGroupNumber(), TG.Global().rank()));
|
||||
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
|
||||
auto distNode(TG.Node().split(TGprop.getLocalGroupNumber(), TG.Node().rank()));
|
||||
|
@ -234,7 +234,7 @@ HamiltonianOperations RealDenseHamiltonian::getHamiltonianOperations(bool pureSD
|
|||
CMatrix lak({nup, NMO});
|
||||
for (int nd = 0; nd < ndet; nd++)
|
||||
{
|
||||
// all nodes accross Qcomm share same segment {nc0,ncN}
|
||||
// all nodes across Qcomm share same segment {nc0,ncN}
|
||||
for (int nc = 0; nc < local_ncv; nc++)
|
||||
{
|
||||
if (nc % Qcomm.size() != Qcomm.rank())
|
||||
|
|
|
@ -69,7 +69,7 @@ HamiltonianOperations RealDenseHamiltonian_v2::getHamiltonianOperations(bool pur
|
|||
ndown = PsiT[1].size(0);
|
||||
int NEL = nup + ndown;
|
||||
|
||||
// distribute work over equivalent nodes in TGprop.TG() accross TG.Global()
|
||||
// distribute work over equivalent nodes in TGprop.TG() across TG.Global()
|
||||
auto Qcomm(TG.Global().split(TGprop.getLocalGroupNumber(), TG.Global().rank()));
|
||||
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
|
||||
auto distNode(TG.Node().split(TGprop.getLocalGroupNumber(), TG.Node().rank()));
|
||||
|
@ -232,7 +232,7 @@ HamiltonianOperations RealDenseHamiltonian_v2::getHamiltonianOperations(bool pur
|
|||
CMatrix lak({nup, NMO});
|
||||
for (int nd = 0; nd < ndet; nd++)
|
||||
{
|
||||
// all nodes accross Qcomm share same segment {nc0,ncN}
|
||||
// all nodes across Qcomm share same segment {nc0,ncN}
|
||||
for (int nc = 0; nc < local_ncv; nc++)
|
||||
{
|
||||
if (nc % Qcomm.size() != Qcomm.rank())
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "AFQMC/Numerics/ma_operations.hpp"
|
||||
#include "AFQMC/SlaterDeterminantOperations/rotate.hpp"
|
||||
#include "AFQMC/Utilities/afqmc_TTI.hpp"
|
||||
#include "AFQMC/Utilities/myTimer.h"
|
||||
|
||||
#include "AFQMC/Hamiltonians/rotateHamiltonian_Helper2.hpp"
|
||||
|
||||
|
|
|
@ -89,28 +89,68 @@ void INIT(boost::mpi3::shared_communicator& node, unsigned long long int iseed)
|
|||
}
|
||||
}
|
||||
|
||||
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind)
|
||||
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind, const std::string& message)
|
||||
{
|
||||
if (cudaSuccess != cudaMemcpy(dst, src, count, tocudaMemcpyKind(kind)))
|
||||
cudaError_t status = cudaMemcpy(dst, src, count, tocudaMemcpyKind(kind));
|
||||
if (status != cudaSuccess)
|
||||
{
|
||||
if (message != "")
|
||||
{
|
||||
std::cerr << "Error: " << message << std::endl;
|
||||
}
|
||||
std::cerr << " Error when calling cudaMemcpy: " << cudaGetErrorString(status) << std::endl;
|
||||
throw std::runtime_error("Error: cudaMemcpy returned error code.");
|
||||
}
|
||||
}
|
||||
|
||||
void memcopy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, MEMCOPYKIND kind)
|
||||
void memcopy2D(void* dst,
|
||||
size_t dpitch,
|
||||
const void* src,
|
||||
size_t spitch,
|
||||
size_t width,
|
||||
size_t height,
|
||||
MEMCOPYKIND kind,
|
||||
const std::string& message)
|
||||
{
|
||||
if (cudaSuccess != cudaMemcpy2D(dst, dpitch, src, spitch, width, height, tocudaMemcpyKind(kind)))
|
||||
cudaError_t status = cudaMemcpy2D(dst, dpitch, src, spitch, width, height, tocudaMemcpyKind(kind));
|
||||
if (status != cudaSuccess)
|
||||
{
|
||||
if (message != "")
|
||||
{
|
||||
std::cerr << "Error: " << message << std::endl;
|
||||
}
|
||||
std::cerr << " Error when calling cudaMemcpy2D: " << cudaGetErrorString(status) << std::endl;
|
||||
throw std::runtime_error("Error: cudaMemcpy2D returned error code.");
|
||||
}
|
||||
}
|
||||
|
||||
void malloc(void** devPtr, size_t size)
|
||||
void malloc(void** devPtr, size_t size, const std::string& message)
|
||||
{
|
||||
if (cudaSuccess != cudaMalloc(devPtr, size))
|
||||
cudaError_t status = cudaMalloc(devPtr, size);
|
||||
if (status != cudaSuccess)
|
||||
{
|
||||
std::cerr << " Error allocating " << size * 1024.0 / 1024.0 << " MBs on GPU." << std::endl;
|
||||
if (message != "")
|
||||
{
|
||||
std::cerr << " Error from: " << message << std::endl;
|
||||
}
|
||||
std::cerr << " Error when calling cudaMalloc: " << cudaGetErrorString(status) << std::endl;
|
||||
throw std::runtime_error("Error: cudaMalloc returned error code.");
|
||||
}
|
||||
}
|
||||
|
||||
void free(void* p) { cudaFree(p); }
|
||||
void free(void* p, const std::string& message)
|
||||
{
|
||||
cudaError_t status = cudaFree(p);
|
||||
if (status != cudaSuccess)
|
||||
{
|
||||
if (message != "")
|
||||
{
|
||||
std::cerr << " Error from: " << message << std::endl;
|
||||
}
|
||||
std::cerr << " Error from calling cudaFree: " << cudaGetErrorString(status) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace arch
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ cudaMemcpyKind tocudaMemcpyKind(MEMCOPYKIND v);
|
|||
|
||||
void INIT(boost::mpi3::shared_communicator& node, unsigned long long int iseed = 911ULL);
|
||||
|
||||
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind = memcopyDefault);
|
||||
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind = memcopyDefault, const std::string& message = "");
|
||||
|
||||
void memcopy2D(void* dst,
|
||||
size_t dpitch,
|
||||
|
@ -72,11 +72,12 @@ void memcopy2D(void* dst,
|
|||
size_t spitch,
|
||||
size_t width,
|
||||
size_t height,
|
||||
MEMCOPYKIND kind = memcopyDefault);
|
||||
MEMCOPYKIND kind = memcopyDefault,
|
||||
const std::string& message = "");
|
||||
|
||||
void malloc(void** devPtr, size_t size);
|
||||
void malloc(void** devPtr, size_t size, const std::string& message = "");
|
||||
|
||||
void free(void* p);
|
||||
void free(void* p, const std::string& message = "");
|
||||
|
||||
} // namespace arch
|
||||
|
||||
|
|
|
@ -58,6 +58,7 @@ void CUDA_INIT(boost::mpi3::shared_communicator& node, unsigned long long int is
|
|||
cudaDeviceProp dev;
|
||||
cuda_check(cudaGetDeviceProperties(&dev, 0), "cudaGetDeviceProperties");
|
||||
qmcplusplus::app_log() << " CUDA compute capability: " << dev.major << "." << dev.minor << std::endl;
|
||||
qmcplusplus::app_log() << " Device Name: " << dev.name << std::endl;
|
||||
if (dev.major <= 6)
|
||||
{
|
||||
qmcplusplus::app_log() << " Warning CUDA major compute capability < 6.0" << std::endl;
|
||||
|
|
|
@ -86,28 +86,68 @@ void INIT(boost::mpi3::shared_communicator& node, unsigned long long int iseed)
|
|||
}
|
||||
}
|
||||
|
||||
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind)
|
||||
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind, const std::string& message)
|
||||
{
|
||||
if (hipSuccess != hipMemcpy(dst, src, count, tohipMemcpyKind(kind)))
|
||||
hipError_t status = hipMemcpy(dst, src, count, tohipMemcpyKind(kind));
|
||||
if (status != hipSuccess)
|
||||
{
|
||||
if (message != "")
|
||||
{
|
||||
std::cerr << "Error: " << message << std::endl;
|
||||
}
|
||||
std::cerr << " Error when calling hipMemcpy: " << hipGetErrorString(status) << std::endl;
|
||||
throw std::runtime_error("Error: hipMemcpy returned error code.");
|
||||
}
|
||||
}
|
||||
|
||||
void memcopy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, MEMCOPYKIND kind)
|
||||
void memcopy2D(void* dst,
|
||||
size_t dpitch,
|
||||
const void* src,
|
||||
size_t spitch,
|
||||
size_t width,
|
||||
size_t height,
|
||||
MEMCOPYKIND kind,
|
||||
const std::string& message)
|
||||
{
|
||||
if (hipSuccess != hipMemcpy2D(dst, dpitch, src, spitch, width, height, tohipMemcpyKind(kind)))
|
||||
hipError_t status = hipMemcpy2D(dst, dpitch, src, spitch, width, height, tohipMemcpyKind(kind));
|
||||
if (status != hipSuccess)
|
||||
{
|
||||
if (message != "")
|
||||
{
|
||||
std::cerr << "Error: " << message << std::endl;
|
||||
}
|
||||
std::cerr << " Error when calling hipMemcpy2D: " << hipGetErrorString(status) << std::endl;
|
||||
throw std::runtime_error("Error: hipMemcpy2D returned error code.");
|
||||
}
|
||||
}
|
||||
|
||||
void malloc(void** devPtr, size_t size)
|
||||
void malloc(void** devPtr, size_t size, const std::string& message)
|
||||
{
|
||||
if (hipSuccess != hipMalloc(devPtr, size))
|
||||
hipError_t status = hipMalloc(devPtr, size);
|
||||
if (status != hipSuccess)
|
||||
{
|
||||
std::cerr << " Error allocating " << size * 1024.0 / 1024.0 << " MBs on GPU." << std::endl;
|
||||
if (message != "")
|
||||
{
|
||||
std::cerr << " Error from : " << message << std::endl;
|
||||
}
|
||||
std::cerr << " Error when call hipMalloc " << < hipGetErrorString(status) << std::endl;
|
||||
throw std::runtime_error("Error: hipMalloc returned error code.");
|
||||
}
|
||||
}
|
||||
|
||||
void free(void* p) { hipFree(p); }
|
||||
void free(void* p, const std::string& message)
|
||||
{
|
||||
hipError_t status = hipFree(p);
|
||||
if (status != hipSuccess)
|
||||
{
|
||||
if (message != "")
|
||||
{
|
||||
std::cerr << " Error from : " << message << std::endl;
|
||||
}
|
||||
std::cerr << " Error when calling hipFree: " << hipGetErrorString(status) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace arch
|
||||
|
||||
|
|
|
@ -62,7 +62,7 @@ hipMemcpyKind tohipMemcpyKind(MEMCOPYKIND v);
|
|||
|
||||
void INIT(boost::mpi3::shared_communicator& node, unsigned long long int iseed = 911ULL);
|
||||
|
||||
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind = memcopyDefault);
|
||||
void memcopy(void* dst, const void* src, size_t count, MEMCOPYKIND kind = memcopyDefault, const std::string& message = "");
|
||||
|
||||
void memcopy2D(void* dst,
|
||||
size_t dpitch,
|
||||
|
@ -70,11 +70,12 @@ void memcopy2D(void* dst,
|
|||
size_t spitch,
|
||||
size_t width,
|
||||
size_t height,
|
||||
MEMCOPYKIND kind = memcopyDefault);
|
||||
MEMCOPYKIND kind = memcopyDefault,
|
||||
const std::string& message = "");
|
||||
|
||||
void malloc(void** devPtr, size_t size);
|
||||
void malloc(void** devPtr, size_t size, const std::string& message = "");
|
||||
|
||||
void free(void* p);
|
||||
void free(void* p, const std::string& message = "");
|
||||
|
||||
} // namespace arch
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ void HIP_INIT(boost::mpi3::shared_communicator& node, unsigned long long int ise
|
|||
hipDeviceProp_t dev;
|
||||
hip_check(hipGetDeviceProperties(&dev, 0), "hipGetDeviceProperties");
|
||||
qmcplusplus::app_log() << " HIP compute capability: " << dev.major << "." << dev.minor << std::endl;
|
||||
qmcplusplus::app_log() << " Device Name: " << dev.name << std::endl;
|
||||
if (dev.major <= 6)
|
||||
{
|
||||
qmcplusplus::app_log() << " Warning HIP major compute capability < 6.0" << std::endl;
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
//#include "AFQMC/Memory/CUDA/cuda_gpu_pointer.hpp"
|
||||
#include "AFQMC/Utilities/type_conversion.hpp"
|
||||
#include "AFQMC/Memory/device_pointers.hpp"
|
||||
#include "AFQMC/Memory/arch.hpp"
|
||||
#include "AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp"
|
||||
//#include "AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp"
|
||||
// hand coded kernels for blas extensions
|
||||
|
@ -55,9 +56,7 @@ template<typename T, typename Q>
|
|||
inline static void copy(int n, T const* x, int incx, device_pointer<Q> y, int incy)
|
||||
{
|
||||
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
|
||||
if (cudaSuccess !=
|
||||
cudaMemcpy2D(to_address(y), sizeof(Q) * incy, x, sizeof(T) * incx, sizeof(T), n, cudaMemcpyHostToDevice))
|
||||
throw std::runtime_error("Error: cudaMemcpy2D returned error code.");
|
||||
arch::memcopy2D(to_address(y), sizeof(Q) * incy, x, sizeof(T) * incx, sizeof(T), n, arch::memcopyH2D, "lapack_cuda_gpu_ptr::copy");
|
||||
}
|
||||
|
||||
template<typename T, typename Q>
|
||||
|
@ -65,9 +64,7 @@ inline static void copy(int n, device_pointer<Q> x, int incx, T* y, int incy)
|
|||
{
|
||||
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
|
||||
assert(sizeof(Q) == sizeof(T));
|
||||
if (cudaSuccess !=
|
||||
cudaMemcpy2D(y, sizeof(T) * incy, to_address(x), sizeof(Q) * incx, sizeof(T), n, cudaMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: cudaMemcpy2D returned error code.");
|
||||
arch::memcopy2D(y, sizeof(T) * incy, to_address(x), sizeof(Q) * incx, sizeof(T), n, arch::memcopyD2H, "lapack_cuda_gpu_ptr::copy");
|
||||
}
|
||||
|
||||
// scal Specializations
|
||||
|
@ -329,7 +326,7 @@ inline static void gemmBatched(char Atrans,
|
|||
{
|
||||
static_assert(std::is_same<typename std::decay<Q1>::type, T>::value, "Wrong dispatch.\n");
|
||||
static_assert(std::is_same<typename std::decay<Q2>::type, T>::value, "Wrong dispatch.\n");
|
||||
// replace with single call to cudaMalloc and cudaMemcpy
|
||||
// replace with single call to arch::malloc and arch::memcopy
|
||||
T **A_d, **B_d, **C_d;
|
||||
Q1** A_h;
|
||||
Q2** B_h;
|
||||
|
@ -343,17 +340,17 @@ inline static void gemmBatched(char Atrans,
|
|||
B_h[i] = to_address(B[i]);
|
||||
C_h[i] = to_address(C[i]);
|
||||
}
|
||||
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
cudaMalloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(B_d, B_h, batchSize * sizeof(*B_h), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
|
||||
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
|
||||
cublas::cublas_gemmBatched(*(A[0]).handles.cublas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
|
||||
C_d, ldc, batchSize);
|
||||
cudaFree(A_d);
|
||||
cudaFree(B_d);
|
||||
cudaFree(C_d);
|
||||
arch::free(A_d);
|
||||
arch::free(B_d);
|
||||
arch::free(C_d);
|
||||
delete[] A_h;
|
||||
delete[] B_h;
|
||||
delete[] C_h;
|
||||
|
@ -385,7 +382,7 @@ inline static void gemmBatched(char Atrans,
|
|||
static_assert(std::is_same<typename std::decay<Q1>::type, T2>::value, "Wrong dispatch.\n");
|
||||
static_assert(std::is_same<typename std::decay<Q2>::type, T>::value, "Wrong dispatch.\n");
|
||||
assert(Atrans == 'N' || Atrans == 'n');
|
||||
// replace with single call to cudaMalloc and cudaMemcpy
|
||||
// replace with single call to arch::malloc and arch::memcopy
|
||||
T2** A_d;
|
||||
T** B_d;
|
||||
T2** C_d;
|
||||
|
@ -401,17 +398,17 @@ inline static void gemmBatched(char Atrans,
|
|||
B_h[i] = to_address(B[i]);
|
||||
C_h[i] = to_address(C[i]);
|
||||
}
|
||||
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
cudaMalloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(B_d, B_h, batchSize * sizeof(*B_h), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
|
||||
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
|
||||
cublas::cublas_gemmBatched(*(A[0]).handles.cublas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
|
||||
C_d, ldc, batchSize);
|
||||
cudaFree(A_d);
|
||||
cudaFree(B_d);
|
||||
cudaFree(C_d);
|
||||
arch::free(A_d);
|
||||
arch::free(B_d);
|
||||
arch::free(C_d);
|
||||
delete[] A_h;
|
||||
delete[] B_h;
|
||||
delete[] C_h;
|
||||
|
@ -465,28 +462,21 @@ template<typename T, typename T2>
|
|||
inline static void copy2D(int N, int M, device_pointer<T> src, int lda, device_pointer<T2> dst, int ldb)
|
||||
{
|
||||
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
|
||||
if (cudaSuccess !=
|
||||
cudaMemcpy2D(to_address(dst), sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N,
|
||||
cudaMemcpyDeviceToDevice))
|
||||
throw std::runtime_error("Error: cudaMemcpy2D returned error code in copy2D.");
|
||||
arch::memcopy2D(to_address(dst), sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N, arch::memcopyD2D, "blas_cuda_gpu_ptr::copy2D");
|
||||
}
|
||||
|
||||
template<typename T, typename T2>
|
||||
inline static void copy2D(int N, int M, T const* src, int lda, device_pointer<T2> dst, int ldb)
|
||||
{
|
||||
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
|
||||
if (cudaSuccess !=
|
||||
cudaMemcpy2D(to_address(dst), sizeof(T2) * ldb, src, sizeof(T) * lda, M * sizeof(T), N, cudaMemcpyHostToDevice))
|
||||
throw std::runtime_error("Error: cudaMemcpy2D returned error code in copy2D.");
|
||||
arch::memcopy2D(to_address(dst), sizeof(T2) * ldb, src, sizeof(T) * lda, M * sizeof(T), N, arch::memcopyH2D, "blas_cuda_gpu_ptr::copy2D");
|
||||
}
|
||||
|
||||
template<typename T, typename T2>
|
||||
inline static void copy2D(int N, int M, device_pointer<T> src, int lda, T2* dst, int ldb)
|
||||
{
|
||||
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
|
||||
if (cudaSuccess !=
|
||||
cudaMemcpy2D(dst, sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N, cudaMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: cudaMemcpy2D returned error code in copy2D.");
|
||||
arch::memcopy2D(dst, sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N, arch::memcopyD2H, "blas_cuda_gpu_ptr::copy2D");
|
||||
}
|
||||
|
||||
template<typename T, typename T2>
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <cassert>
|
||||
#include "AFQMC/Utilities/type_conversion.hpp"
|
||||
#include "AFQMC/Memory/custom_pointers.hpp"
|
||||
#include "AFQMC/Memory/arch.hpp"
|
||||
#include "AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp"
|
||||
#include "AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp"
|
||||
#include "AFQMC/Numerics/detail/CUDA/Kernels/setIdentity.cuh"
|
||||
|
@ -72,7 +73,7 @@ inline static void getrf(const int n,
|
|||
{
|
||||
cusolverStatus_t status = cusolver::cusolver_getrf(*a.handles.cusolverDn_handle, n, m, to_address(a), lda,
|
||||
to_address(work), to_address(piv), to_address(piv) + n);
|
||||
cudaMemcpy(&st, to_address(piv) + n, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
arch::memcopy(&st, to_address(piv) + n, sizeof(int), arch::memcopyD2H);
|
||||
if (CUSOLVER_STATUS_SUCCESS != status)
|
||||
{
|
||||
std::cerr << " cublas_getrf status, info: " << status << " " << st << std::endl;
|
||||
|
@ -95,13 +96,13 @@ inline static void getrfBatched(const int n,
|
|||
A_h = new T*[batchSize];
|
||||
for (int i = 0; i < batchSize; i++)
|
||||
A_h[i] = to_address(a[i]);
|
||||
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
cublasStatus_t status = cublas::cublas_getrfBatched(*(a[0]).handles.cublas_handle, n, A_d, lda, to_address(piv),
|
||||
to_address(info), batchSize);
|
||||
if (CUBLAS_STATUS_SUCCESS != status)
|
||||
throw std::runtime_error("Error: cublas_getrf returned error code.");
|
||||
cudaFree(A_d);
|
||||
arch::free(A_d);
|
||||
delete[] A_h;
|
||||
}
|
||||
|
||||
|
@ -129,20 +130,16 @@ inline static void getri(int n,
|
|||
throw std::runtime_error("Error: getri<GPU_MEMORY_POINTER_TYPE> required lda = 1.");
|
||||
|
||||
int* info;
|
||||
if (cudaSuccess != cudaMalloc((void**)&info, sizeof(int)))
|
||||
{
|
||||
std::cerr << " Error getri: Error allocating on GPU." << std::endl;
|
||||
throw std::runtime_error("Error: cudaMalloc returned error code.");
|
||||
}
|
||||
arch::malloc((void**)&info, sizeof(int), "lapack_cuda_gpu_ptr::getri");
|
||||
|
||||
kernels::set_identity(n, n, to_address(work), n);
|
||||
if (CUSOLVER_STATUS_SUCCESS !=
|
||||
cusolver::cusolver_getrs(*a.handles.cusolverDn_handle, CUBLAS_OP_N, n, n, to_address(a), lda, to_address(piv),
|
||||
to_address(work), n, info))
|
||||
throw std::runtime_error("Error: cusolver_getrs returned error code.");
|
||||
cudaMemcpy(to_address(a), to_address(work), n * n * sizeof(T), cudaMemcpyDeviceToDevice);
|
||||
cudaMemcpy(&status, info, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
cudaFree(info);
|
||||
arch::memcopy(to_address(a), to_address(work), n * n * sizeof(T), arch::memcopyD2D);
|
||||
arch::memcopy(&status, info, sizeof(int), arch::memcopyD2H);
|
||||
arch::free(info);
|
||||
}
|
||||
|
||||
// getriBatched
|
||||
|
@ -165,16 +162,16 @@ inline static void getriBatched(int n,
|
|||
A_h[i] = to_address(a[i]);
|
||||
C_h[i] = to_address(ainv[i]);
|
||||
}
|
||||
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
|
||||
cublasStatus_t status = cublas::cublas_getriBatched(*(a[0]).handles.cublas_handle, n, A_d, lda, to_address(piv), C_d,
|
||||
ldc, to_address(info), batchSize);
|
||||
if (CUBLAS_STATUS_SUCCESS != status)
|
||||
throw std::runtime_error("Error: cublas_getri returned error code.");
|
||||
cudaFree(A_d);
|
||||
cudaFree(C_d);
|
||||
arch::free(A_d);
|
||||
arch::free(C_d);
|
||||
delete[] A_h;
|
||||
delete[] C_h;
|
||||
}
|
||||
|
@ -198,16 +195,16 @@ inline static void matinvBatched(int n,
|
|||
A_h[i] = to_address(a[i]);
|
||||
C_h[i] = to_address(ainv[i]);
|
||||
}
|
||||
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
|
||||
cublasStatus_t status = cublas::cublas_matinvBatched(*(a[0]).handles.cublas_handle, n, A_d, lda, C_d, lda_inv,
|
||||
to_address(info), batchSize);
|
||||
if (CUBLAS_STATUS_SUCCESS != status)
|
||||
throw std::runtime_error("Error: cublas_matinv returned error code.");
|
||||
cudaFree(A_d);
|
||||
cudaFree(C_d);
|
||||
arch::free(A_d);
|
||||
arch::free(C_d);
|
||||
delete[] A_h;
|
||||
delete[] C_h;
|
||||
}
|
||||
|
@ -233,15 +230,11 @@ inline static void geqrf(int M,
|
|||
{
|
||||
// allocating here for now
|
||||
int* piv;
|
||||
if (cudaSuccess != cudaMalloc((void**)&piv, sizeof(int)))
|
||||
{
|
||||
std::cerr << " Error geqrf: Error allocating on GPU." << std::endl;
|
||||
throw std::runtime_error("Error: cudaMalloc returned error code.");
|
||||
}
|
||||
arch::malloc((void**)&piv, sizeof(int), "lapack_cuda_gpu_ptr::geqrf");
|
||||
|
||||
cusolverStatus_t status = cusolver::cusolver_geqrf(*A.handles.cusolverDn_handle, M, N, to_address(A), LDA,
|
||||
to_address(TAU), to_address(WORK), LWORK, piv);
|
||||
cudaMemcpy(&INFO, piv, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
arch::memcopy(&INFO, piv, sizeof(int), arch::memcopyD2H);
|
||||
if (CUSOLVER_STATUS_SUCCESS != status)
|
||||
{
|
||||
int st;
|
||||
|
@ -249,7 +242,7 @@ inline static void geqrf(int M,
|
|||
std::cerr.flush();
|
||||
throw std::runtime_error("Error: cublas_geqrf returned error code.");
|
||||
}
|
||||
cudaFree(piv);
|
||||
arch::free(piv);
|
||||
}
|
||||
|
||||
// gelqf
|
||||
|
@ -294,15 +287,11 @@ void static gqr(int M,
|
|||
{
|
||||
// allocating here for now
|
||||
int* piv;
|
||||
if (cudaSuccess != cudaMalloc((void**)&piv, sizeof(int)))
|
||||
{
|
||||
std::cerr << " Error gqr: Error allocating on GPU." << std::endl;
|
||||
throw std::runtime_error("Error: cudaMalloc returned error code.");
|
||||
}
|
||||
arch::malloc((void**)&piv, sizeof(int), "lapack_cuda_gpu_ptr::gqr");
|
||||
|
||||
cusolverStatus_t status = cusolver::cusolver_gqr(*A.handles.cusolverDn_handle, M, N, K, to_address(A), LDA,
|
||||
to_address(TAU), to_address(WORK), LWORK, piv);
|
||||
cudaMemcpy(&INFO, piv, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
arch::memcopy(&INFO, piv, sizeof(int), arch::memcopyD2H);
|
||||
if (CUSOLVER_STATUS_SUCCESS != status)
|
||||
{
|
||||
int st;
|
||||
|
@ -310,7 +299,7 @@ void static gqr(int M,
|
|||
std::cerr.flush();
|
||||
throw std::runtime_error("Error: cublas_gqr returned error code.");
|
||||
}
|
||||
cudaFree(piv);
|
||||
arch::free(piv);
|
||||
}
|
||||
|
||||
template<typename T, typename I>
|
||||
|
@ -378,15 +367,15 @@ inline static void geqrfBatched(int M,
|
|||
T_h[i] = to_address(TAU[i]);
|
||||
T** B_d;
|
||||
std::vector<int> inf(batchSize);
|
||||
cudaMalloc((void**)&B_d, 2 * batchSize * sizeof(*B_h));
|
||||
cudaMemcpy(B_d, B_h, 2 * batchSize * sizeof(*B_h), cudaMemcpyHostToDevice);
|
||||
arch::malloc((void**)&B_d, 2 * batchSize * sizeof(*B_h));
|
||||
arch::memcopy(B_d, B_h, 2 * batchSize * sizeof(*B_h), arch::memcopyH2D);
|
||||
T** A_d(B_d);
|
||||
T** T_d(B_d + batchSize);
|
||||
cublasStatus_t status = cublas::cublas_geqrfBatched(*(A[0]).handles.cublas_handle, M, N, A_d, LDA, T_d,
|
||||
to_address(inf.data()), batchSize);
|
||||
if (CUBLAS_STATUS_SUCCESS != status)
|
||||
throw std::runtime_error("Error: cublas_geqrfBatched returned error code.");
|
||||
cudaFree(B_d);
|
||||
arch::free(B_d);
|
||||
delete[] B_h;
|
||||
}
|
||||
|
||||
|
@ -410,8 +399,8 @@ inline static void geqrfStrided(int M,
|
|||
for(int i=0; i<batchSize; i++)
|
||||
T_h[i] = to_address(TAU)+i*Tstride;
|
||||
T **B_d;
|
||||
cudaMalloc((void **)&B_d, 2*batchSize*sizeof(*B_h));
|
||||
cudaMemcpy(B_d, B_h, 2*batchSize*sizeof(*B_h), cudaMemcpyHostToDevice);
|
||||
arch::malloc((void **)&B_d, 2*batchSize*sizeof(*B_h));
|
||||
arch::memcopy(B_d, B_h, 2*batchSize*sizeof(*B_h), arch::memcopyH2D);
|
||||
T **A_d(B_d);
|
||||
T **T_d(B_d+batchSize);
|
||||
*/
|
||||
|
@ -424,19 +413,19 @@ inline static void geqrfStrided(int M,
|
|||
for (int i = 0; i < batchSize; i++)
|
||||
T_h[i] = to_address(TAU) + i * Tstride;
|
||||
T **A_d, **T_d;
|
||||
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
|
||||
cudaMalloc((void**)&T_d, batchSize * sizeof(*T_h));
|
||||
cudaMemcpy(T_d, T_h, batchSize * sizeof(*T_h), cudaMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::malloc((void**)&T_d, batchSize * sizeof(*T_h));
|
||||
arch::memcopy(T_d, T_h, batchSize * sizeof(*T_h), arch::memcopyH2D);
|
||||
cublasStatus_t status =
|
||||
cublas::cublas_geqrfBatched(*A.handles.cublas_handle, M, N, A_d, LDA, T_d, to_address(inf.data()), batchSize);
|
||||
for (int i = 0; i < batchSize; i++)
|
||||
assert(inf[i] == 0);
|
||||
if (CUBLAS_STATUS_SUCCESS != status)
|
||||
throw std::runtime_error("Error: cublas_geqrfBatched returned error code.");
|
||||
cudaFree(A_d);
|
||||
arch::free(A_d);
|
||||
delete[] A_h;
|
||||
cudaFree(T_d);
|
||||
arch::free(T_d);
|
||||
delete[] T_h;
|
||||
}
|
||||
|
||||
|
@ -464,18 +453,18 @@ inline static void gesvd(char jobU,
|
|||
int& st)
|
||||
{
|
||||
int* devSt;
|
||||
cudaMalloc((void**)&devSt, sizeof(int));
|
||||
arch::malloc((void**)&devSt, sizeof(int));
|
||||
cusolverStatus_t status =
|
||||
cusolver::cusolver_gesvd(*A.handles.cusolverDn_handle, jobU, jobVT, m, n, to_address(A), lda, to_address(S),
|
||||
to_address(U), ldu, to_address(VT), ldvt, to_address(W), lw, devSt);
|
||||
cudaMemcpy(&st, devSt, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
arch::memcopy(&st, devSt, sizeof(int), arch::memcopyD2H);
|
||||
if (CUSOLVER_STATUS_SUCCESS != status)
|
||||
{
|
||||
std::cerr << " cublas_gesvd status, info: " << status << " " << st << std::endl;
|
||||
std::cerr.flush();
|
||||
throw std::runtime_error("Error: cublas_gesvd returned error code.");
|
||||
}
|
||||
cudaFree(devSt);
|
||||
arch::free(devSt);
|
||||
}
|
||||
|
||||
template<typename T, typename R>
|
||||
|
@ -496,18 +485,18 @@ inline static void gesvd(char jobU,
|
|||
int& st)
|
||||
{
|
||||
int* devSt;
|
||||
cudaMalloc((void**)&devSt, sizeof(int));
|
||||
arch::malloc((void**)&devSt, sizeof(int));
|
||||
cusolverStatus_t status =
|
||||
cusolver::cusolver_gesvd(*A.handles.cusolverDn_handle, jobU, jobVT, m, n, to_address(A), lda, to_address(S),
|
||||
to_address(U), ldu, to_address(VT), ldvt, to_address(W), lw, devSt);
|
||||
cudaMemcpy(&st, devSt, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
arch::memcopy(&st, devSt, sizeof(int), arch::memcopyD2H);
|
||||
if (CUSOLVER_STATUS_SUCCESS != status)
|
||||
{
|
||||
std::cerr << " cublas_gesvd status, info: " << status << " " << st << std::endl;
|
||||
std::cerr.flush();
|
||||
throw std::runtime_error("Error: cublas_gesvd returned error code.");
|
||||
}
|
||||
cudaFree(devSt);
|
||||
arch::free(devSt);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -566,17 +566,17 @@ inline static void gemmBatched(char Atrans,
|
|||
B_h[i] = to_address(B[i]);
|
||||
C_h[i] = to_address(C[i]);
|
||||
}
|
||||
cudaMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
cudaMalloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
cudaMalloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
cudaMemcpy(A_d, A_h, batchSize * sizeof(*A_h), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(B_d, B_h, batchSize * sizeof(*B_h), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(C_d, C_h, batchSize * sizeof(*C_h), cudaMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
|
||||
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
|
||||
cublas::cublas_gemmBatched(*(A[0]).handles.cublas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
|
||||
C_d, ldc, batchSize);
|
||||
cudaFree(A_d);
|
||||
cudaFree(B_d);
|
||||
cudaFree(C_d);
|
||||
arch::free(A_d);
|
||||
arch::free(B_d);
|
||||
arch::free(C_d);
|
||||
delete[] A_h;
|
||||
delete[] B_h;
|
||||
delete[] C_h;
|
||||
|
|
|
@ -49,10 +49,8 @@ void csrmv(const char transa,
|
|||
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
|
||||
// somehow need to check if the matrix is compact!
|
||||
int pb, pe;
|
||||
if (cudaSuccess != cudaMemcpy(std::addressof(pb), to_address(pntrb), sizeof(int), cudaMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: cudaMemcpy returned error code in csrmv.");
|
||||
if (cudaSuccess != cudaMemcpy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), cudaMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: cudaMemcpy returned error code in csrmv.");
|
||||
arch::memcopy(std::addressof(pb), to_address(pntrb), sizeof(int), arch::memcopyD2H, "sparse_cuda_gpu_ptr::csrmv");
|
||||
arch::memcopy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), arch::memcopyD2H, "sparse_cuda_gpu_ptr::csrmv");
|
||||
int nnz = pe - pb;
|
||||
if (CUSPARSE_STATUS_SUCCESS !=
|
||||
cusparse::cusparse_csrmv(*A.handles.cusparse_handle, transa, M, K, nnz, alpha,
|
||||
|
@ -81,10 +79,8 @@ void csrmm(const char transa,
|
|||
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
|
||||
// somehow need to check if the matrix is compact!
|
||||
int pb, pe;
|
||||
if (cudaSuccess != cudaMemcpy(std::addressof(pb), to_address(pntrb), sizeof(int), cudaMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: cudaMemcpy returned error code in csrmm.");
|
||||
if (cudaSuccess != cudaMemcpy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), cudaMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: cudaMemcpy returned error code in csrmm.");
|
||||
arch::memcopy(std::addressof(pb), to_address(pntrb), sizeof(int), arch::memcopyD2H, "lapack_sparse_gpu_ptr::csrmm");
|
||||
arch::memcopy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), arch::memcopyD2H, "lapack_sparse_gpu_ptr::csrmm");
|
||||
int nnz = pe - pb;
|
||||
if (transa == 'N')
|
||||
{
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include "uninitialized_array.hpp"
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -281,8 +281,8 @@ void Auwn_Bun_Cuw(int nu,
|
|||
reinterpret_cast<thrust::complex<double> const*>(A),
|
||||
reinterpret_cast<thrust::complex<double> const*>(B),
|
||||
reinterpret_cast<thrust::complex<double>*>(C));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
@ -304,8 +304,8 @@ void Auwn_Bun_Cuw(int nu,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(A),
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), reinterpret_cast<thrust::complex<float>*>(C));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// C[u][w] = alpha * sum_i A[w][i][u] * B[i][u]
|
||||
|
@ -329,8 +329,8 @@ void Awiu_Biu_Cuw(int nu,
|
|||
static_cast<thrust::complex<double> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<double> const*>(A), B, ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(C), ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
@ -354,8 +354,8 @@ void Awiu_Biu_Cuw(int nu,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(A), B, ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(C), ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void Awiu_Biu_Cuw(int nu,
|
||||
|
@ -379,8 +379,8 @@ void Awiu_Biu_Cuw(int nu,
|
|||
reinterpret_cast<thrust::complex<double> const*>(A),
|
||||
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(C), ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
@ -405,8 +405,8 @@ void Awiu_Biu_Cuw(int nu,
|
|||
reinterpret_cast<thrust::complex<float> const*>(A),
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(C), ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
@ -428,8 +428,8 @@ void Aijk_Bkj_Cik(int ni,
|
|||
reinterpret_cast<thrust::complex<double> const*>(A), lda, stride,
|
||||
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(C), ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void Aijk_Bkj_Cik(int ni,
|
||||
|
@ -448,8 +448,8 @@ void Aijk_Bkj_Cik(int ni,
|
|||
hipLaunchKernelGGL(kernel_Aijk_Bkj_Cik, dim3(grid_dim), dim3(32), 0, 0, ni, nj, nk,
|
||||
reinterpret_cast<thrust::complex<double> const*>(A), lda, stride, B, ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(C), ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void Aijk_Bkj_Cik(int ni,
|
||||
|
@ -469,8 +469,8 @@ void Aijk_Bkj_Cik(int ni,
|
|||
reinterpret_cast<thrust::complex<float> const*>(A), lda, stride,
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(C), ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void Aijk_Bkj_Cik(int ni,
|
||||
|
@ -489,8 +489,8 @@ void Aijk_Bkj_Cik(int ni,
|
|||
hipLaunchKernelGGL(kernel_Aijk_Bkj_Cik, dim3(grid_dim), dim3(32), 0, 0, ni, nj, nk,
|
||||
reinterpret_cast<thrust::complex<float> const*>(A), lda, stride, B, ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(C), ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// v[w][i][j] = v[i][w][j]
|
||||
|
@ -501,8 +501,8 @@ void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<double> const* B, st
|
|||
hipLaunchKernelGGL(kernel_viwj_vwij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, nw, ni, i0, iN,
|
||||
reinterpret_cast<thrust::complex<double> const*>(B),
|
||||
reinterpret_cast<thrust::complex<double>*>(A));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<double> const* B, std::complex<float>* A)
|
||||
|
@ -511,8 +511,8 @@ void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<double> const* B, st
|
|||
dim3 grid_dim(nw, (iN - i0), 1);
|
||||
hipLaunchKernelGGL(kernel_viwj_vwij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, nw, ni, i0, iN,
|
||||
reinterpret_cast<thrust::complex<double> const*>(B), reinterpret_cast<thrust::complex<float>*>(A));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<float> const* B, std::complex<double>* A)
|
||||
|
@ -521,8 +521,8 @@ void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<float> const* B, std
|
|||
dim3 grid_dim(nw, (iN - i0), 1);
|
||||
hipLaunchKernelGGL(kernel_viwj_vwij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, nw, ni, i0, iN,
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), reinterpret_cast<thrust::complex<double>*>(A));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<float> const* B, std::complex<float>* A)
|
||||
|
@ -531,8 +531,8 @@ void viwj_vwij(int nw, int ni, int i0, int iN, std::complex<float> const* B, std
|
|||
dim3 grid_dim(nw, (iN - i0), 1);
|
||||
hipLaunchKernelGGL(kernel_viwj_vwij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, nw, ni, i0, iN,
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), reinterpret_cast<thrust::complex<float>*>(A));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// element-wise C[k][i][j] = A[i][j] * B[j][k]
|
||||
|
@ -554,8 +554,8 @@ void element_wise_Aij_Bjk_Ckij(char transA,
|
|||
hipLaunchKernelGGL(kernel_element_wise_Aij_Bjk_Ckij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, transA, ni, nj,
|
||||
nk, A, lda, reinterpret_cast<thrust::complex<double> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(C), ldc1, ldc2);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void element_wise_Aij_Bjk_Ckij(char transA,
|
||||
|
@ -576,8 +576,8 @@ void element_wise_Aij_Bjk_Ckij(char transA,
|
|||
hipLaunchKernelGGL(kernel_element_wise_Aij_Bjk_Ckij, dim3(grid_dim), dim3(MAX_THREADS_PER_DIM), 0, 0, transA, ni, nj,
|
||||
nk, A, lda, reinterpret_cast<thrust::complex<float> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(C), ldc1, ldc2);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void element_wise_Aij_Bjk_Ckij(char transA,
|
||||
|
@ -599,8 +599,8 @@ void element_wise_Aij_Bjk_Ckij(char transA,
|
|||
nk, reinterpret_cast<thrust::complex<double> const*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(C), ldc1, ldc2);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void element_wise_Aij_Bjk_Ckij(char transA,
|
||||
|
@ -622,8 +622,8 @@ void element_wise_Aij_Bjk_Ckij(char transA,
|
|||
nk, reinterpret_cast<thrust::complex<float> const*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(C), ldc1, ldc2);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
@ -649,8 +649,8 @@ void element_wise_Aij_Bjk_Ckji(int ni,
|
|||
hipLaunchKernelGGL(kernel_element_wise_Aij_Bjk_Ckji, dim3(grid_dim), dim3(block_dim), 0, 0, ni, nj, nk, A, lda,
|
||||
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(C), ldc, stride);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void element_wise_Aij_Bjk_Ckji(int ni,
|
||||
|
@ -676,8 +676,8 @@ void element_wise_Aij_Bjk_Ckji(int ni,
|
|||
reinterpret_cast<thrust::complex<double> const*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(C), ldc, stride);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void element_wise_Aij_Bjk_Ckji(int ni,
|
||||
|
@ -701,8 +701,8 @@ void element_wise_Aij_Bjk_Ckji(int ni,
|
|||
hipLaunchKernelGGL(kernel_element_wise_Aij_Bjk_Ckji, dim3(grid_dim), dim3(block_dim), 0, 0, ni, nj, nk, A, lda,
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(C), ldc, stride);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void element_wise_Aij_Bjk_Ckji(int ni,
|
||||
|
@ -727,8 +727,8 @@ void element_wise_Aij_Bjk_Ckji(int ni,
|
|||
reinterpret_cast<thrust::complex<float> const*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(C), ldc, stride);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -113,8 +113,8 @@ void KaKjw_to_KKwaj(int nwalk,
|
|||
dim3 grid_dim(nkpts, nkpts, 1);
|
||||
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
}
|
||||
|
||||
void KaKjw_to_KKwaj(int nwalk,
|
||||
|
@ -135,8 +135,8 @@ void KaKjw_to_KKwaj(int nwalk,
|
|||
dim3 grid_dim(nkpts, nkpts, 1);
|
||||
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
}
|
||||
|
||||
void KaKjw_to_KKwaj(int nwalk,
|
||||
|
@ -157,8 +157,8 @@ void KaKjw_to_KKwaj(int nwalk,
|
|||
dim3 grid_dim(nkpts, nkpts, 1);
|
||||
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
}
|
||||
|
||||
void KaKjw_to_KKwaj(int nwalk,
|
||||
|
@ -180,8 +180,8 @@ void KaKjw_to_KKwaj(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, reinterpret_cast<thrust::complex<double> const*>(A),
|
||||
reinterpret_cast<thrust::complex<double>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
}
|
||||
|
||||
void KaKjw_to_KKwaj(int nwalk,
|
||||
|
@ -203,8 +203,8 @@ void KaKjw_to_KKwaj(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, reinterpret_cast<thrust::complex<float> const*>(A),
|
||||
reinterpret_cast<thrust::complex<float>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
}
|
||||
|
||||
void KaKjw_to_KKwaj(int nwalk,
|
||||
|
@ -226,8 +226,8 @@ void KaKjw_to_KKwaj(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_KaKjw_to_KKwaj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, reinterpret_cast<thrust::complex<double> const*>(A),
|
||||
reinterpret_cast<thrust::complex<float>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_KKwaj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_KKwaj");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -120,8 +120,8 @@ void KaKjw_to_QKajw(int nwalk,
|
|||
dim3 grid_dim(nkpts, nkpts, 1);
|
||||
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
}
|
||||
|
||||
void KaKjw_to_QKajw(int nwalk,
|
||||
|
@ -143,8 +143,8 @@ void KaKjw_to_QKajw(int nwalk,
|
|||
dim3 grid_dim(nkpts, nkpts, 1);
|
||||
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
}
|
||||
|
||||
void KaKjw_to_QKajw(int nwalk,
|
||||
|
@ -166,8 +166,8 @@ void KaKjw_to_QKajw(int nwalk,
|
|||
dim3 grid_dim(nkpts, nkpts, 1);
|
||||
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
}
|
||||
|
||||
void KaKjw_to_QKajw(int nwalk,
|
||||
|
@ -190,8 +190,8 @@ void KaKjw_to_QKajw(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, reinterpret_cast<thrust::complex<float> const*>(A),
|
||||
reinterpret_cast<thrust::complex<float>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
}
|
||||
|
||||
void KaKjw_to_QKajw(int nwalk,
|
||||
|
@ -214,8 +214,8 @@ void KaKjw_to_QKajw(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, reinterpret_cast<thrust::complex<double> const*>(A),
|
||||
reinterpret_cast<thrust::complex<double>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
}
|
||||
|
||||
void KaKjw_to_QKajw(int nwalk,
|
||||
|
@ -238,8 +238,8 @@ void KaKjw_to_QKajw(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_KaKjw_to_QKajw, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot,
|
||||
nocc_max, nmo, nmo0, nocc, nocc0, QKtok2, reinterpret_cast<thrust::complex<double> const*>(A),
|
||||
reinterpret_cast<thrust::complex<float>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "KaKjw_to_QKajw");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "KaKjw_to_QKajw");
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -74,8 +74,8 @@ void Tab_to_Kl(int nwalk, int nocc, int nchol, std::complex<double> const* Tab,
|
|||
hipLaunchKernelGGL(kernel_Tab_to_Kl, dim3(grid_dim), dim3(nthr), 0, 0, nwalk, nocc, nchol,
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(Kl));
|
||||
qmc_hip::hip_check(hipGetLastError(), "Tab_to_Kl");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "Tab_to_Kl");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "Tab_to_Kl");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "Tab_to_Kl");
|
||||
}
|
||||
|
||||
void Tab_to_Kl(int nwalk, int nocc, int nchol, std::complex<float> const* Tab, std::complex<float>* Kl)
|
||||
|
@ -85,8 +85,8 @@ void Tab_to_Kl(int nwalk, int nocc, int nchol, std::complex<float> const* Tab, s
|
|||
hipLaunchKernelGGL(kernel_Tab_to_Kl, dim3(grid_dim), dim3(nthr), 0, 0, nwalk, nocc, nchol,
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<float>*>(Kl));
|
||||
qmc_hip::hip_check(hipGetLastError(), "Tab_to_Kl");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "Tab_to_Kl");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "Tab_to_Kl");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "Tab_to_Kl");
|
||||
}
|
||||
|
||||
|
||||
|
@ -102,8 +102,8 @@ void Tanb_to_Kl(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_Tanb_to_Kl, dim3(grid_dim), dim3(nthr), 0, 0, nwalk, nocc, nchol, nchol_tot,
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(Kl));
|
||||
qmc_hip::hip_check(hipGetLastError(), "Tab_to_Kl");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "Tab_to_Kl");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "Tab_to_Kl");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "Tab_to_Kl");
|
||||
}
|
||||
|
||||
void Tanb_to_Kl(int nwalk, int nocc, int nchol, int nchol_tot, std::complex<float> const* Tab, std::complex<float>* Kl)
|
||||
|
@ -113,8 +113,8 @@ void Tanb_to_Kl(int nwalk, int nocc, int nchol, int nchol_tot, std::complex<floa
|
|||
hipLaunchKernelGGL(kernel_Tanb_to_Kl, dim3(grid_dim), dim3(nthr), 0, 0, nwalk, nocc, nchol, nchol_tot,
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<float>*>(Kl));
|
||||
qmc_hip::hip_check(hipGetLastError(), "Tab_to_Kl");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "Tab_to_Kl");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "Tab_to_Kl");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "Tab_to_Kl");
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -73,8 +73,8 @@ void acAxpbB(int m,
|
|||
dim3 block_dim(xblock_dim, yblock_dim);
|
||||
dim3 grid_dim(xgrid_dim, ygrid_dim);
|
||||
hipLaunchKernelGGL(kernel_acAxpbB, dim3(grid_dim), dim3(block_dim), 0, 0, m, n, alpha, A, lda, x, incx, beta, B, ldb);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void acAxpbB(int m,
|
||||
|
@ -95,8 +95,8 @@ void acAxpbB(int m,
|
|||
dim3 block_dim(xblock_dim, yblock_dim);
|
||||
dim3 grid_dim(xgrid_dim, ygrid_dim);
|
||||
hipLaunchKernelGGL(kernel_acAxpbB, dim3(grid_dim), dim3(block_dim), 0, 0, m, n, alpha, A, lda, x, incx, beta, B, ldb);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void acAxpbB(int m,
|
||||
|
@ -122,8 +122,8 @@ void acAxpbB(int m,
|
|||
reinterpret_cast<thrust::complex<double> const*>(x), incx,
|
||||
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(B),
|
||||
ldb);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void acAxpbB(int m,
|
||||
|
@ -149,8 +149,8 @@ void acAxpbB(int m,
|
|||
reinterpret_cast<thrust::complex<float> const*>(x), incx,
|
||||
static_cast<thrust::complex<float> const>(beta), reinterpret_cast<thrust::complex<float>*>(B),
|
||||
ldb);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -49,8 +49,8 @@ void adiagApy(int N, double const alpha, double const* A, int lda, double* y, in
|
|||
int block_dim = 256;
|
||||
int grid_dim = (N + block_dim - 1) / block_dim;
|
||||
hipLaunchKernelGGL(kernel_adiagApy, dim3(grid_dim), dim3(block_dim), 0, 0, N, alpha, A, lda, y, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void adiagApy(int N,
|
||||
|
@ -66,8 +66,8 @@ void adiagApy(int N,
|
|||
static_cast<thrust::complex<double> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<double> const*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<double>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void adiagApy(int N, float const alpha, float const* A, int lda, float* y, int incy)
|
||||
|
@ -75,8 +75,8 @@ void adiagApy(int N, float const alpha, float const* A, int lda, float* y, int i
|
|||
int block_dim = 256;
|
||||
int grid_dim = (N + block_dim - 1) / block_dim;
|
||||
hipLaunchKernelGGL(kernel_adiagApy, dim3(grid_dim), dim3(block_dim), 0, 0, N, alpha, A, lda, y, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void adiagApy(int N,
|
||||
|
@ -92,8 +92,8 @@ void adiagApy(int N,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<float>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "uninitialized_array.hpp"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -142,8 +142,8 @@ void adotpby(int N,
|
|||
double* res)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_adotpby, dim3(1), dim3(1024), 0, 0, N, alpha, x, incx, y, incy, beta, res);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void adotpby(int N,
|
||||
|
@ -159,8 +159,8 @@ void adotpby(int N,
|
|||
reinterpret_cast<thrust::complex<double> const*>(x), incx,
|
||||
reinterpret_cast<thrust::complex<double> const*>(y), incy,
|
||||
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(res));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void adotpby(int N,
|
||||
|
@ -173,8 +173,8 @@ void adotpby(int N,
|
|||
float* res)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_adotpby, dim3(1), dim3(1024), 0, 0, N, alpha, x, incx, y, incy, beta, res);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void adotpby(int N,
|
||||
|
@ -190,8 +190,8 @@ void adotpby(int N,
|
|||
reinterpret_cast<thrust::complex<float> const*>(x), incx,
|
||||
reinterpret_cast<thrust::complex<float> const*>(y), incy,
|
||||
static_cast<thrust::complex<float> const>(beta), reinterpret_cast<thrust::complex<float>*>(res));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void adotpby(int N,
|
||||
|
@ -204,8 +204,8 @@ void adotpby(int N,
|
|||
double* res)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_adotpby, dim3(1), dim3(1024), 0, 0, N, alpha, x, incx, y, incy, beta, res);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void adotpby(int N,
|
||||
|
@ -221,8 +221,8 @@ void adotpby(int N,
|
|||
reinterpret_cast<thrust::complex<float> const*>(x), incx,
|
||||
reinterpret_cast<thrust::complex<float> const*>(y), incy,
|
||||
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(res));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void strided_adotpby(int NB,
|
||||
|
@ -242,8 +242,8 @@ void strided_adotpby(int NB,
|
|||
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
|
||||
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(C),
|
||||
ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void strided_adotpby(int NB,
|
||||
|
@ -263,8 +263,8 @@ void strided_adotpby(int NB,
|
|||
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
|
||||
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(C),
|
||||
ldc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -82,15 +82,15 @@ __global__ void kernel_transpose_wabn_to_wban(int nwalk,
|
|||
void ajw_to_waj(int na, int nj, int nw, int inca, double const* A, double* B)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_ajw_to_waj, dim3(na), dim3(128), 0, 0, na, nj, nw, inca, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void ajw_to_waj(int na, int nj, int nw, int inca, float const* A, float* B)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_ajw_to_waj, dim3(na), dim3(128), 0, 0, na, nj, nw, inca, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void ajw_to_waj(int na, int nj, int nw, int inca, std::complex<double> const* A, std::complex<double>* B)
|
||||
|
@ -98,16 +98,16 @@ void ajw_to_waj(int na, int nj, int nw, int inca, std::complex<double> const* A,
|
|||
hipLaunchKernelGGL(kernel_ajw_to_waj, dim3(na), dim3(128), 0, 0, na, nj, nw, inca,
|
||||
reinterpret_cast<thrust::complex<double> const*>(A),
|
||||
reinterpret_cast<thrust::complex<double>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void ajw_to_waj(int na, int nj, int nw, int inca, std::complex<float> const* A, std::complex<float>* B)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_ajw_to_waj, dim3(na), dim3(128), 0, 0, na, nj, nw, inca,
|
||||
reinterpret_cast<thrust::complex<float> const*>(A), reinterpret_cast<thrust::complex<float>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void transpose_wabn_to_wban(int nwalk,
|
||||
|
@ -121,8 +121,8 @@ void transpose_wabn_to_wban(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_transpose_wabn_to_wban, dim3(grid_dim), dim3(32), 0, 0, nwalk, na, nb, nchol,
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(Tba));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void transpose_wabn_to_wban(int nwalk,
|
||||
|
@ -136,8 +136,8 @@ void transpose_wabn_to_wban(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_transpose_wabn_to_wban, dim3(grid_dim), dim3(32), 0, 0, nwalk, na, nb, nchol,
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<float>*>(Tba));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void transpose_wabn_to_wban(int nwalk,
|
||||
|
@ -151,8 +151,8 @@ void transpose_wabn_to_wban(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_transpose_wabn_to_wban, dim3(grid_dim), dim3(32), 0, 0, nwalk, na, nb, nchol,
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<float>*>(Tba));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void transpose_wabn_to_wban(int nwalk,
|
||||
|
@ -166,8 +166,8 @@ void transpose_wabn_to_wban(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_transpose_wabn_to_wban, dim3(grid_dim), dim3(32), 0, 0, nwalk, na, nb, nchol,
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(Tba));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -94,8 +94,8 @@ void axpy_batched_gpu(int n,
|
|||
hipMemcpy(b_, b, batchSize * sizeof(*b), hipMemcpyHostToDevice);
|
||||
hipMemcpy(x_, x, batchSize * sizeof(*x), hipMemcpyHostToDevice);
|
||||
hipLaunchKernelGGL(kernel_axpy_batched, dim3(batchSize), dim3(128), 0, 0, n, x_, a_, inca, b_, incb, batchSize);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
hipFree(a_);
|
||||
hipFree(b_);
|
||||
hipFree(x_);
|
||||
|
@ -120,8 +120,8 @@ void sumGw_batched_gpu(int n,
|
|||
hipMemcpy(x_, x, batchSize * sizeof(*x), hipMemcpyHostToDevice);
|
||||
int nb_(nw > batchSize ? batchSize : nw);
|
||||
hipLaunchKernelGGL(kernel_sumGw_batched, dim3(nb_), dim3(256), 0, 0, n, x_, a_, inca, b_, incb, b0, nw, batchSize);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
hipFree(a_);
|
||||
hipFree(b_);
|
||||
hipFree(x_);
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/transform.h>
|
||||
#include <thrust/functional.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -47,26 +47,26 @@ inline static void kernel_axty(int n, std::complex<T> const alpha, std::complex<
|
|||
void axty(int n, float alpha, float const* x, float* y)
|
||||
{
|
||||
kernel_axty(n, alpha, x, y);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void axty(int n, double alpha, double const* x, double* y)
|
||||
{
|
||||
kernel_axty(n, alpha, x, y);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void axty(int n, std::complex<float> alpha, std::complex<float> const* x, std::complex<float>* y)
|
||||
{
|
||||
kernel_axty(n, alpha, x, y);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void axty(int n, std::complex<double> alpha, std::complex<double> const* x, std::complex<double>* y)
|
||||
{
|
||||
kernel_axty(n, alpha, x, y);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include "uninitialized_array.hpp"
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -93,8 +93,8 @@ void batchedDot(int m,
|
|||
int incy)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_dot, dim3(m), dim3(DOT_BLOCK_SIZE), 0, 0, n, alpha, A, lda, B, ldb, beta, y, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void batchedDot(int m,
|
||||
|
@ -109,8 +109,8 @@ void batchedDot(int m,
|
|||
int incy)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_dot, dim3(m), dim3(DOT_BLOCK_SIZE), 0, 0, n, alpha, A, lda, B, ldb, beta, y, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void batchedDot(int m,
|
||||
|
@ -130,8 +130,8 @@ void batchedDot(int m,
|
|||
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
|
||||
static_cast<thrust::complex<double> const>(beta), reinterpret_cast<thrust::complex<double>*>(y),
|
||||
incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void batchedDot(int m,
|
||||
|
@ -151,8 +151,8 @@ void batchedDot(int m,
|
|||
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
|
||||
static_cast<thrust::complex<float> const>(beta), reinterpret_cast<thrust::complex<float>*>(y),
|
||||
incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -159,8 +159,8 @@ void batched_Tab_to_Klr(int nterms,
|
|||
hipLaunchKernelGGL(kernel_batched_Tab_to_Klr, dim3(grid_dim), dim3(nthr), 0, 0, nterms, nwalk, nocc, nchol_max,
|
||||
nchol_tot, ncholQ, ncholQ0, kdiag, reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(Kl), reinterpret_cast<thrust::complex<double>*>(Kr));
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_Tab_to_Klr");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_Tab_to_Klr");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_Tab_to_Klr");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_Tab_to_Klr");
|
||||
}
|
||||
|
||||
void batched_Tab_to_Klr(int nterms,
|
||||
|
@ -180,8 +180,8 @@ void batched_Tab_to_Klr(int nterms,
|
|||
hipLaunchKernelGGL(kernel_batched_Tab_to_Klr, dim3(grid_dim), dim3(nthr), 0, 0, nterms, nwalk, nocc, nchol_max,
|
||||
nchol_tot, ncholQ, ncholQ0, kdiag, reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<float>*>(Kl), reinterpret_cast<thrust::complex<float>*>(Kr));
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_Tab_to_Klr");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_Tab_to_Klr");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_Tab_to_Klr");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_Tab_to_Klr");
|
||||
}
|
||||
|
||||
void batched_Tanb_to_Klr(int nterms,
|
||||
|
@ -201,8 +201,8 @@ void batched_Tanb_to_Klr(int nterms,
|
|||
hipLaunchKernelGGL(kernel_batched_Tanb_to_Klr, dim3(grid_dim), dim3(nthr), 0, 0, nterms, nwalk, nocc, nchol_max,
|
||||
nchol_tot, ncholQ, ncholQ0, kdiag, reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(Kl), reinterpret_cast<thrust::complex<double>*>(Kr));
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_Tanb_to_Klr");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_Tanb_to_Klr");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_Tanb_to_Klr");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_Tanb_to_Klr");
|
||||
}
|
||||
|
||||
void batched_Tanb_to_Klr(int nterms,
|
||||
|
@ -222,8 +222,8 @@ void batched_Tanb_to_Klr(int nterms,
|
|||
hipLaunchKernelGGL(kernel_batched_Tanb_to_Klr, dim3(grid_dim), dim3(nthr), 0, 0, nterms, nwalk, nocc, nchol_max,
|
||||
nchol_tot, ncholQ, ncholQ0, kdiag, reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<float>*>(Kl), reinterpret_cast<thrust::complex<float>*>(Kr));
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_Tanb_to_Klr");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_Tanb_to_Klr");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_Tanb_to_Klr");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_Tanb_to_Klr");
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include "uninitialized_array.hpp"
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -138,8 +138,8 @@ void batched_dot_wabn_wban(int nbatch,
|
|||
nchol, reinterpret_cast<thrust::complex<double> const*>(alpha),
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wabn_wban");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wabn_wban");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
|
||||
}
|
||||
|
||||
void batched_dot_wabn_wban(int nbatch,
|
||||
|
@ -157,8 +157,8 @@ void batched_dot_wabn_wban(int nbatch,
|
|||
nchol, reinterpret_cast<thrust::complex<float> const*>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab), reinterpret_cast<thrust::complex<float>*>(y),
|
||||
incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wabn_wban");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wabn_wban");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
|
||||
}
|
||||
|
||||
void batched_dot_wabn_wban(int nbatch,
|
||||
|
@ -176,8 +176,8 @@ void batched_dot_wabn_wban(int nbatch,
|
|||
nchol, reinterpret_cast<thrust::complex<float> const*>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wabn_wban");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wabn_wban");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wabn_wban");
|
||||
}
|
||||
|
||||
// anb/bna
|
||||
|
@ -196,8 +196,8 @@ void batched_dot_wanb_wbna(int nbatch,
|
|||
nchol, reinterpret_cast<thrust::complex<double> const*>(alpha),
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wanb_wbna");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wanb_wbna");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
|
||||
}
|
||||
|
||||
void batched_dot_wanb_wbna(int nbatch,
|
||||
|
@ -215,8 +215,8 @@ void batched_dot_wanb_wbna(int nbatch,
|
|||
nchol, reinterpret_cast<thrust::complex<float> const*>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab), reinterpret_cast<thrust::complex<float>*>(y),
|
||||
incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wanb_wbna");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wanb_wbna");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
|
||||
}
|
||||
|
||||
void batched_dot_wanb_wbna(int nbatch,
|
||||
|
@ -234,8 +234,8 @@ void batched_dot_wanb_wbna(int nbatch,
|
|||
nchol, reinterpret_cast<thrust::complex<float> const*>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "batched_dot_wanb_wbna");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "batched_dot_wanb_wbna");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "batched_dot_wanb_wbna");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include "uninitialized_array.hpp"
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -156,8 +156,8 @@ void construct_X(int nCV,
|
|||
reinterpret_cast<thrust::complex<double> const*>(vbias),
|
||||
reinterpret_cast<thrust::complex<double>*>(HW), reinterpret_cast<thrust::complex<double>*>(MF),
|
||||
reinterpret_cast<thrust::complex<double>*>(X));
|
||||
qmc_hip::hip_check(hipGetLastError(), "construct_X");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "construct_X");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "construct_X");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "construct_X");
|
||||
}
|
||||
void construct_X(int nCV,
|
||||
int nsteps,
|
||||
|
@ -183,8 +183,8 @@ void construct_X(int nCV,
|
|||
reinterpret_cast<thrust::complex<float> const*>(vbias),
|
||||
reinterpret_cast<thrust::complex<double>*>(HW), reinterpret_cast<thrust::complex<double>*>(MF),
|
||||
reinterpret_cast<thrust::complex<float>*>(X));
|
||||
qmc_hip::hip_check(hipGetLastError(), "construct_X");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "construct_X");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "construct_X");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "construct_X");
|
||||
}
|
||||
void construct_X(int nCV,
|
||||
int nsteps,
|
||||
|
@ -210,8 +210,8 @@ void construct_X(int nCV,
|
|||
reinterpret_cast<thrust::complex<double> const*>(vbias),
|
||||
reinterpret_cast<thrust::complex<double>*>(HW), reinterpret_cast<thrust::complex<double>*>(MF),
|
||||
reinterpret_cast<thrust::complex<float>*>(X));
|
||||
qmc_hip::hip_check(hipGetLastError(), "construct_X");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "construct_X");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "construct_X");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "construct_X");
|
||||
}
|
||||
void construct_X(int nCV,
|
||||
int nsteps,
|
||||
|
@ -237,8 +237,8 @@ void construct_X(int nCV,
|
|||
reinterpret_cast<thrust::complex<float> const*>(vbias),
|
||||
reinterpret_cast<thrust::complex<double>*>(HW), reinterpret_cast<thrust::complex<double>*>(MF),
|
||||
reinterpret_cast<thrust::complex<double>*>(X));
|
||||
qmc_hip::hip_check(hipGetLastError(), "construct_X");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "construct_X");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "construct_X");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "construct_X");
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -40,16 +40,16 @@ void copy_n_cast(double const* A, int n, float* B)
|
|||
int block_dim = 256;
|
||||
int grid_dim = (n + block_dim - 1) / block_dim;
|
||||
hipLaunchKernelGGL(kernel_copy_n_cast, dim3(grid_dim), dim3(block_dim), 0, 0, A, n, B);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void copy_n_cast(float const* A, int n, double* B)
|
||||
{
|
||||
int block_dim = 256;
|
||||
int grid_dim = (n + block_dim - 1) / block_dim;
|
||||
hipLaunchKernelGGL(kernel_copy_n_cast, dim3(grid_dim), dim3(block_dim), 0, 0, A, n, B);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void copy_n_cast(std::complex<double> const* A, int n, std::complex<float>* B)
|
||||
{
|
||||
|
@ -58,8 +58,8 @@ void copy_n_cast(std::complex<double> const* A, int n, std::complex<float>* B)
|
|||
hipLaunchKernelGGL(kernel_copy_n_cast, dim3(grid_dim), dim3(block_dim), 0, 0,
|
||||
reinterpret_cast<thrust::complex<double> const*>(A), n,
|
||||
reinterpret_cast<thrust::complex<float>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void copy_n_cast(std::complex<float> const* A, int n, std::complex<double>* B)
|
||||
{
|
||||
|
@ -68,8 +68,8 @@ void copy_n_cast(std::complex<float> const* A, int n, std::complex<double>* B)
|
|||
hipLaunchKernelGGL(kernel_copy_n_cast, dim3(grid_dim), dim3(block_dim), 0, 0,
|
||||
reinterpret_cast<thrust::complex<float> const*>(A), n,
|
||||
reinterpret_cast<thrust::complex<double>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <thrust/device_malloc.h>
|
||||
#include <thrust/device_free.h>
|
||||
#include "uninitialized_array.hpp"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -390,8 +390,8 @@ __global__ void kernel_scale_columns(int n, int m, T* A, int lda, T* scl)
|
|||
void determinant_from_getrf_gpu(int N, double* m, int lda, int* piv, double LogOverlapFactor, double* res)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_determinant_from_getrf, dim3(1), dim3(256), 0, 0, N, m, lda, piv, LogOverlapFactor, res);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void determinant_from_getrf_gpu(int N,
|
||||
|
@ -405,8 +405,8 @@ void determinant_from_getrf_gpu(int N,
|
|||
reinterpret_cast<thrust::complex<double>*>(m), lda, piv,
|
||||
static_cast<thrust::complex<double>>(LogOverlapFactor),
|
||||
reinterpret_cast<thrust::complex<double>*>(res));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void strided_determinant_from_getrf_gpu(int N,
|
||||
|
@ -421,8 +421,8 @@ void strided_determinant_from_getrf_gpu(int N,
|
|||
{
|
||||
hipLaunchKernelGGL(kernel_strided_determinant_from_getrf, dim3(nbatch), dim3(64), 0, 0, N, m, lda, mstride, piv,
|
||||
pstride, LogOverlapFactor, res, nbatch);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void strided_determinant_from_getrf_gpu(int N,
|
||||
|
@ -439,8 +439,8 @@ void strided_determinant_from_getrf_gpu(int N,
|
|||
reinterpret_cast<thrust::complex<double>*>(m), lda, mstride, piv, pstride,
|
||||
static_cast<thrust::complex<double>>(LogOverlapFactor),
|
||||
reinterpret_cast<thrust::complex<double>*>(res), nbatch);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void batched_determinant_from_getrf_gpu(int N,
|
||||
|
@ -454,8 +454,8 @@ void batched_determinant_from_getrf_gpu(int N,
|
|||
{
|
||||
hipLaunchKernelGGL(kernel_batched_determinant_from_getrf, dim3(nbatch), dim3(64), 0, 0, N, m, lda, piv, pstride,
|
||||
LogOverlapFactor, res, nbatch);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void batched_determinant_from_getrf_gpu(int N,
|
||||
|
@ -471,8 +471,8 @@ void batched_determinant_from_getrf_gpu(int N,
|
|||
reinterpret_cast<thrust::complex<double>**>(m), lda, piv, pstride,
|
||||
static_cast<thrust::complex<double>>(LogOverlapFactor),
|
||||
reinterpret_cast<thrust::complex<double>*>(res), nbatch);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
std::complex<double> determinant_from_geqrf_gpu(int N, double* m, int lda, double* buff, double LogOverlapFactor)
|
||||
|
@ -480,10 +480,10 @@ std::complex<double> determinant_from_geqrf_gpu(int N, double* m, int lda, doubl
|
|||
thrust::device_ptr<thrust::complex<double>> d_ptr = thrust::device_malloc<thrust::complex<double>>(1);
|
||||
hipLaunchKernelGGL(kernel_determinant_from_geqrf, dim3(1), dim3(256), 0, 0, N, m, lda, buff, LogOverlapFactor,
|
||||
thrust::raw_pointer_cast(d_ptr));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
std::complex<double> res;
|
||||
qmc_hip::hip_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
|
||||
qmc_hip::hip_kernel_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
|
||||
hipMemcpyDeviceToHost));
|
||||
thrust::device_free(d_ptr);
|
||||
return res;
|
||||
|
@ -500,10 +500,10 @@ std::complex<double> determinant_from_geqrf_gpu(int N,
|
|||
reinterpret_cast<thrust::complex<double>*>(m), lda,
|
||||
reinterpret_cast<thrust::complex<double>*>(buff),
|
||||
static_cast<thrust::complex<double>>(LogOverlapFactor), thrust::raw_pointer_cast(d_ptr));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
std::complex<double> res;
|
||||
qmc_hip::hip_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
|
||||
qmc_hip::hip_kernel_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
|
||||
hipMemcpyDeviceToHost));
|
||||
thrust::device_free(d_ptr);
|
||||
return res;
|
||||
|
@ -512,8 +512,8 @@ std::complex<double> determinant_from_geqrf_gpu(int N,
|
|||
void determinant_from_geqrf_gpu(int N, double* m, int lda, double* buff)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_determinant_from_geqrf, dim3(1), dim3(256), 0, 0, N, m, lda, buff);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void determinant_from_geqrf_gpu(int N, std::complex<double>* m, int lda, std::complex<double>* buff)
|
||||
|
@ -521,8 +521,8 @@ void determinant_from_geqrf_gpu(int N, std::complex<double>* m, int lda, std::co
|
|||
hipLaunchKernelGGL(kernel_determinant_from_geqrf, dim3(1), dim3(256), 0, 0, N,
|
||||
reinterpret_cast<thrust::complex<double>*>(m), lda,
|
||||
reinterpret_cast<thrust::complex<double>*>(buff));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
@ -531,8 +531,8 @@ double determinant_from_getrf_gpu(int N, double* m, int lda, int* piv, double Lo
|
|||
thrust::device_ptr<double> d_ptr = thrust::device_malloc<double>(1);
|
||||
hipLaunchKernelGGL(kernel_determinant_from_getrf, dim3(1), dim3(256), 0, 0, N, m, lda, piv, LogOverlapFactor,
|
||||
thrust::raw_pointer_cast(d_ptr));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
double res = *d_ptr;
|
||||
thrust::device_free(d_ptr);
|
||||
return res;
|
||||
|
@ -548,10 +548,10 @@ std::complex<double> determinant_from_getrf_gpu(int N,
|
|||
hipLaunchKernelGGL(kernel_determinant_from_getrf, dim3(1), dim3(256), 0, 0, N,
|
||||
reinterpret_cast<thrust::complex<double>*>(m), lda, piv,
|
||||
static_cast<thrust::complex<double>>(LogOverlapFactor), thrust::raw_pointer_cast(d_ptr));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
std::complex<double> res;
|
||||
qmc_hip::hip_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
|
||||
qmc_hip::hip_kernel_check(hipMemcpy(std::addressof(res), thrust::raw_pointer_cast(d_ptr), sizeof(std::complex<double>),
|
||||
hipMemcpyDeviceToHost));
|
||||
thrust::device_free(d_ptr);
|
||||
return res;
|
||||
|
@ -562,8 +562,8 @@ void scale_columns(int n, int m, double* A, int lda, double* scl)
|
|||
int xblock_dim = 32;
|
||||
dim3 block_dim(xblock_dim, xblock_dim, 1);
|
||||
hipLaunchKernelGGL(kernel_scale_columns, dim3(1), dim3(block_dim), 0, 0, n, m, A, lda, scl);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void scale_columns(int n, int m, std::complex<double>* A, int lda, std::complex<double>* scl)
|
||||
{
|
||||
|
@ -572,8 +572,8 @@ void scale_columns(int n, int m, std::complex<double>* A, int lda, std::complex<
|
|||
hipLaunchKernelGGL(kernel_scale_columns, dim3(1), dim3(block_dim), 0, 0, n, m,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<double>*>(scl));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include "uninitialized_array.hpp"
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -223,8 +223,8 @@ void dot_wabn(int nwalk,
|
|||
static_cast<thrust::complex<double> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "dot_wabn");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wabn");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wabn");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wabn");
|
||||
}
|
||||
|
||||
void dot_wabn(int nwalk,
|
||||
|
@ -241,8 +241,8 @@ void dot_wabn(int nwalk,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab), reinterpret_cast<thrust::complex<float>*>(y),
|
||||
incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "dot_wabn");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wabn");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wabn");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wabn");
|
||||
}
|
||||
|
||||
void dot_wabn(int nwalk,
|
||||
|
@ -259,8 +259,8 @@ void dot_wabn(int nwalk,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "dot_wabn");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wabn");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wabn");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wabn");
|
||||
}
|
||||
|
||||
// v2
|
||||
|
@ -283,8 +283,8 @@ void dot_wanb(int nwalk,
|
|||
static_cast<thrust::complex<double> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "dot_wanb");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wanb");
|
||||
}
|
||||
|
||||
void dot_wanb(int nwalk,
|
||||
|
@ -306,8 +306,8 @@ void dot_wanb(int nwalk,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab), reinterpret_cast<thrust::complex<float>*>(y),
|
||||
incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "dot_wanb");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wanb");
|
||||
}
|
||||
|
||||
void dot_wanb(int nwalk,
|
||||
|
@ -329,8 +329,8 @@ void dot_wanb(int nwalk,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(y), incy);
|
||||
qmc_hip::hip_check(hipGetLastError(), "dot_wanb");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wanb");
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -345,8 +345,8 @@ void dot_wanb( int nwalk, int nocc, int nchol,
|
|||
static_cast<thrust::complex<double> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double> *>(y),incy);
|
||||
qmc_hip::hip_check(hipGetLastError(),"dot_wanb");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(),"dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(),"dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(),"dot_wanb");
|
||||
}
|
||||
|
||||
void dot_wanb( int nwalk, int nocc, int nchol,
|
||||
|
@ -359,8 +359,8 @@ void dot_wanb( int nwalk, int nocc, int nchol,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<float> *>(y),incy);
|
||||
qmc_hip::hip_check(hipGetLastError(),"dot_wanb");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(),"dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(),"dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(),"dot_wanb");
|
||||
}
|
||||
|
||||
void dot_wanb( int nwalk, int nocc, int nchol,
|
||||
|
@ -373,8 +373,8 @@ void dot_wanb( int nwalk, int nocc, int nchol,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double> *>(y),incy);
|
||||
qmc_hip::hip_check(hipGetLastError(),"dot_wanb");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(),"dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(),"dot_wanb");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(),"dot_wanb");
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -390,8 +390,8 @@ void dot_wpan_waqn_Fwpq(int nwalk,
|
|||
static_cast<thrust::complex<double> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<double> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(F));
|
||||
qmc_hip::hip_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
|
||||
}
|
||||
|
||||
void dot_wpan_waqn_Fwpq(int nwalk,
|
||||
|
@ -406,8 +406,8 @@ void dot_wpan_waqn_Fwpq(int nwalk,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<float>*>(F));
|
||||
qmc_hip::hip_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
|
||||
}
|
||||
|
||||
|
||||
|
@ -423,8 +423,8 @@ void dot_wpan_waqn_Fwpq(int nwalk,
|
|||
static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(Tab),
|
||||
reinterpret_cast<thrust::complex<double>*>(F));
|
||||
qmc_hip::hip_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "dot_wpan_waqn_Fwpq");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "dot_wpan_waqn_Fwpq");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#include <cassert>
|
||||
#include <complex>
|
||||
#include <type_traits>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -41,118 +41,118 @@ __global__ void kernel_fill2D_n(Size N, Size M, T* y, Size lda, T const a)
|
|||
void fill_n(char* first, int N, int incx, char const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(int* first, int N, int incx, int const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(float* first, int N, int incx, float const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(double* first, int N, int incx, double const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(std::complex<float>* first, int N, int incx, std::complex<float> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(std::complex<double>* first, int N, int incx, std::complex<double> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, incx, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void fill_n(char* first, int N, char const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(long int* first, long unsigned int N, const long int value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(long unsigned int* first, long unsigned int N, const long unsigned int value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(int* first, int N, int const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(float* first, int N, float const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(double* first, int N, double const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(std::complex<float>* first, int N, std::complex<float> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill_n(std::complex<double>* first, int N, std::complex<double> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill_n, dim3(1), dim3(256), 0, 0, N, first, 1, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void fill2D_n(int N, int M, int* A, int lda, int const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill2D_n(int N, int M, float* A, int lda, float const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill2D_n(int N, int M, double* A, int lda, double const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill2D_n(int N, int M, std::complex<double>* A, int lda, std::complex<double> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void fill2D_n(int N, int M, std::complex<float>* A, int lda, std::complex<float> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_fill2D_n, dim3(32), dim3(32), 0, 0, N, M, A, lda, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
//#include "hip_settings.h"
|
||||
//#include "hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -52,8 +52,8 @@ void get_diagonal_strided(int nk,
|
|||
hipLaunchKernelGGL(kernel_get_diagonal_strided, dim3(grid_dim), dim3(nthr), 0, 0, nk, ni,
|
||||
reinterpret_cast<thrust::complex<double> const*>(B), ldb, stride,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void get_diagonal_strided(int nk,
|
||||
|
@ -70,8 +70,8 @@ void get_diagonal_strided(int nk,
|
|||
hipLaunchKernelGGL(kernel_get_diagonal_strided, dim3(grid_dim), dim3(nthr), 0, 0, nk, ni,
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), ldb, stride,
|
||||
reinterpret_cast<thrust::complex<float>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
|
||||
namespace kernels
|
||||
|
@ -61,32 +61,32 @@ void inplace_cast(unsigned long n, std::complex<float>* A, std::complex<double>*
|
|||
{
|
||||
hipLaunchKernelGGL(kernel_inplace_cast, dim3(1), dim3(MAX_THREADS_PER_DIM), 0, 0, n,
|
||||
reinterpret_cast<thrust::complex<float>*>(A), reinterpret_cast<thrust::complex<double>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void inplace_cast(unsigned long n, std::complex<double>* A, std::complex<float>* B)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_inplace_cast, dim3(1), dim3(MAX_THREADS_PER_DIM), 0, 0, n,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), reinterpret_cast<thrust::complex<float>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void inplace_cast(long n, std::complex<float>* A, std::complex<double>* B)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_inplace_cast, dim3(1), dim3(MAX_THREADS_PER_DIM), 0, 0, n,
|
||||
reinterpret_cast<thrust::complex<float>*>(A), reinterpret_cast<thrust::complex<double>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void inplace_cast(long n, std::complex<double>* A, std::complex<float>* B)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_inplace_cast, dim3(1), dim3(MAX_THREADS_PER_DIM), 0, 0, n,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), reinterpret_cast<thrust::complex<float>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#include <complex>
|
||||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
|
||||
namespace kernels
|
||||
|
@ -38,8 +38,8 @@ void inplace_product(int nbatch, int n, int m, double const* B, int ldb, std::co
|
|||
dim3 block_dim(1, MAX_THREADS_PER_DIM, 1);
|
||||
hipLaunchKernelGGL(kernel_inplace_product, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, n, m, B, ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void inplace_product(int nbatch, int n, int m, std::complex<double> const* B, int ldb, std::complex<double>* A, int lda)
|
||||
|
@ -50,8 +50,8 @@ void inplace_product(int nbatch, int n, int m, std::complex<double> const* B, in
|
|||
hipLaunchKernelGGL(kernel_inplace_product, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, n, m,
|
||||
reinterpret_cast<thrust::complex<double> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void inplace_product(int nbatch, int n, int m, float const* B, int ldb, std::complex<float>* A, int lda)
|
||||
|
@ -61,8 +61,8 @@ void inplace_product(int nbatch, int n, int m, float const* B, int ldb, std::com
|
|||
dim3 block_dim(1, MAX_THREADS_PER_DIM, 1);
|
||||
hipLaunchKernelGGL(kernel_inplace_product, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, n, m, B, ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void inplace_product(int nbatch, int n, int m, std::complex<float> const* B, int ldb, std::complex<float>* A, int lda)
|
||||
|
@ -73,8 +73,8 @@ void inplace_product(int nbatch, int n, int m, std::complex<float> const* B, int
|
|||
hipLaunchKernelGGL(kernel_inplace_product, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, n, m,
|
||||
reinterpret_cast<thrust::complex<float> const*>(B), ldb,
|
||||
reinterpret_cast<thrust::complex<float>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
#include <complex>
|
||||
#include <thrust/complex.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -79,112 +79,112 @@ __global__ void op_div__(thrust::complex<T>* x, thrust::complex<T> inc)
|
|||
void op_plus(double* x, double inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_plus__, dim3(1), dim3(1), 0, 0, x, inc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_plus(float* x, float inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_plus__, dim3(1), dim3(1), 0, 0, x, inc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_plus(std::complex<double>* x, std::complex<double> inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_plus__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<double>*>(x),
|
||||
static_cast<thrust::complex<double>>(inc));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_plus(std::complex<float>* x, std::complex<float> inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_plus__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<float>*>(x),
|
||||
static_cast<thrust::complex<float>>(inc));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// -=
|
||||
void op_minus(double* x, double inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_minus__, dim3(1), dim3(1), 0, 0, x, inc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_minus(float* x, float inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_minus__, dim3(1), dim3(1), 0, 0, x, inc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_minus(std::complex<double>* x, std::complex<double> inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_minus__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<double>*>(x),
|
||||
static_cast<thrust::complex<double>>(inc));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_minus(std::complex<float>* x, std::complex<float> inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_minus__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<float>*>(x),
|
||||
static_cast<thrust::complex<float>>(inc));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// *=
|
||||
void op_times(double* x, double inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_times__, dim3(1), dim3(1), 0, 0, x, inc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_times(float* x, float inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_times__, dim3(1), dim3(1), 0, 0, x, inc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_times(std::complex<double>* x, std::complex<double> inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_times__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<double>*>(x),
|
||||
static_cast<thrust::complex<double>>(inc));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_times(std::complex<float>* x, std::complex<float> inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_times__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<float>*>(x),
|
||||
static_cast<thrust::complex<float>>(inc));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// /=
|
||||
void op_div(double* x, double inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_div__, dim3(1), dim3(1), 0, 0, x, inc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_div(float* x, float inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_div__, dim3(1), dim3(1), 0, 0, x, inc);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_div(std::complex<double>* x, std::complex<double> inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_div__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<double>*>(x),
|
||||
static_cast<thrust::complex<double>>(inc));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
void op_div(std::complex<float>* x, std::complex<float> inc)
|
||||
{
|
||||
hipLaunchKernelGGL(op_div__, dim3(1), dim3(1), 0, 0, reinterpret_cast<thrust::complex<float>*>(x),
|
||||
static_cast<thrust::complex<float>>(inc));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -22,40 +22,40 @@ namespace kernels
|
|||
void sampleGaussianRNG(double* V, int n, rocrand_generator& gen)
|
||||
{
|
||||
qmc_hip::rocrand_check(rocrand_generate_normal_double(gen, V, n, 0.0, 1.0), "rocrand_generate_normal_double");
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// Convert to double if really necessary
|
||||
void sampleGaussianRNG(float* V, int n, rocrand_generator& gen)
|
||||
{
|
||||
qmc_hip::rocrand_check(rocrand_generate_normal(gen, V, n, float(0.0), float(1.0)), "rocrand_generate_normal");
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void sampleGaussianRNG(std::complex<double>* V, int n, rocrand_generator& gen)
|
||||
{
|
||||
qmc_hip::rocrand_check(rocrand_generate_normal_double(gen, reinterpret_cast<double*>(V), 2 * n, 0.0, 1.0),
|
||||
"rocrand_generate_normal_double");
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
// hack hack hack!!!
|
||||
kernels::zero_complex_part(n, V);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void sampleGaussianRNG(std::complex<float>* V, int n, rocrand_generator& gen)
|
||||
{
|
||||
qmc_hip::rocrand_check(rocrand_generate_normal(gen, reinterpret_cast<float*>(V), 2 * n, float(0.0), float(1.0)),
|
||||
"rocrand_generate_normal");
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
// hack hack hack!!!
|
||||
kernels::zero_complex_part(n, V);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -98,8 +98,8 @@ void set_identity(int m, int n, double* A, int lda)
|
|||
dim3 block_dim(xblock_dim, xblock_dim);
|
||||
dim3 grid_dim(xgrid_dim, ygrid_dim);
|
||||
hipLaunchKernelGGL(kernel_setIdentity, dim3(grid_dim), dim3(block_dim), 0, 0, m, n, A, lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void set_identity(int m, int n, float* A, int lda)
|
||||
|
@ -110,8 +110,8 @@ void set_identity(int m, int n, float* A, int lda)
|
|||
dim3 block_dim(xblock_dim, xblock_dim);
|
||||
dim3 grid_dim(xgrid_dim, ygrid_dim);
|
||||
hipLaunchKernelGGL(kernel_setIdentity, dim3(grid_dim), dim3(block_dim), 0, 0, m, n, A, lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void set_identity(int m, int n, std::complex<double>* A, int lda)
|
||||
|
@ -123,8 +123,8 @@ void set_identity(int m, int n, std::complex<double>* A, int lda)
|
|||
dim3 grid_dim(xgrid_dim, ygrid_dim);
|
||||
hipLaunchKernelGGL(kernel_setIdentity, dim3(grid_dim), dim3(block_dim), 0, 0, m, n,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void set_identity(int m, int n, std::complex<float>* A, int lda)
|
||||
|
@ -136,8 +136,8 @@ void set_identity(int m, int n, std::complex<float>* A, int lda)
|
|||
dim3 grid_dim(xgrid_dim, ygrid_dim);
|
||||
hipLaunchKernelGGL(kernel_setIdentity, dim3(grid_dim), dim3(block_dim), 0, 0, m, n,
|
||||
reinterpret_cast<thrust::complex<float>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void set_identity_strided(int nbatch, int stride, int m, int n, double* A, int lda)
|
||||
|
@ -148,8 +148,8 @@ void set_identity_strided(int nbatch, int stride, int m, int n, double* A, int l
|
|||
dim3 block_dim(xblock_dim, xblock_dim);
|
||||
dim3 grid_dim(xgrid_dim, ygrid_dim, nbatch);
|
||||
hipLaunchKernelGGL(kernel_setIdentity_strided, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, stride, m, n, A, lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void set_identity_strided(int nbatch, int stride, int m, int n, float* A, int lda)
|
||||
|
@ -160,8 +160,8 @@ void set_identity_strided(int nbatch, int stride, int m, int n, float* A, int ld
|
|||
dim3 block_dim(xblock_dim, xblock_dim);
|
||||
dim3 grid_dim(xgrid_dim, ygrid_dim, nbatch);
|
||||
hipLaunchKernelGGL(kernel_setIdentity_strided, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, stride, m, n, A, lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void set_identity_strided(int nbatch, int stride, int m, int n, std::complex<double>* A, int lda)
|
||||
|
@ -173,8 +173,8 @@ void set_identity_strided(int nbatch, int stride, int m, int n, std::complex<dou
|
|||
dim3 grid_dim(xgrid_dim, ygrid_dim, nbatch);
|
||||
hipLaunchKernelGGL(kernel_setIdentity_strided, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, stride, m, n,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void set_identity_strided(int nbatch, int stride, int m, int n, std::complex<float>* A, int lda)
|
||||
|
@ -186,8 +186,8 @@ void set_identity_strided(int nbatch, int stride, int m, int n, std::complex<flo
|
|||
dim3 grid_dim(xgrid_dim, ygrid_dim, nbatch);
|
||||
hipLaunchKernelGGL(kernel_setIdentity_strided, dim3(grid_dim), dim3(block_dim), 0, 0, nbatch, stride, m, n,
|
||||
reinterpret_cast<thrust::complex<float>*>(A), lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <thrust/device_ptr.h>
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/strided_range.hpp"
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/strided_2Drange.hpp"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -32,8 +32,8 @@ double sum(int n, double const* x, int incx)
|
|||
thrust::device_ptr<double const> x_(x);
|
||||
strided_range<thrust::device_ptr<double const>> strided(x_, x_ + n * incx, incx);
|
||||
double res = thrust::reduce(strided.begin(), strided.end());
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -44,8 +44,8 @@ std::complex<double> sum(int n, std::complex<double> const* x, int incx)
|
|||
double R = thrust::reduce(Rstrided.begin(), Rstrided.end());
|
||||
strided_range<thrust::device_ptr<double const>> Istrided(x_ + 1, x_ + 1 + 2 * n * incx, 2 * incx);
|
||||
double I = thrust::reduce(Istrided.begin(), Istrided.end());
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
return std::complex<double>(R, I);
|
||||
}
|
||||
|
||||
|
@ -54,8 +54,8 @@ float sum(int n, float const* x, int incx)
|
|||
thrust::device_ptr<float const> x_(x);
|
||||
strided_range<thrust::device_ptr<float const>> strided(x_, x_ + n * incx, incx);
|
||||
float res = thrust::reduce(strided.begin(), strided.end());
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -66,8 +66,8 @@ std::complex<float> sum(int n, std::complex<float> const* x, int incx)
|
|||
float R = thrust::reduce(Rstrided.begin(), Rstrided.end());
|
||||
strided_range<thrust::device_ptr<float const>> Istrided(x_ + 1, x_ + 1 + 2 * n * incx, 2 * incx);
|
||||
float I = thrust::reduce(Istrided.begin(), Istrided.end());
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
return std::complex<float>(R, I);
|
||||
}
|
||||
|
||||
|
@ -76,8 +76,8 @@ double sum(int m, int n, double const* x, int lda)
|
|||
thrust::device_ptr<double const> x_(x);
|
||||
strided_2Drange<thrust::device_ptr<double const>> strided(x_, x_ + n * lda, lda, m);
|
||||
double res = thrust::reduce(strided.begin(), strided.end());
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -86,8 +86,8 @@ std::complex<double> sum(int m, int n, std::complex<double> const* x, int lda)
|
|||
std::complex<double> res;
|
||||
for (int i = 0; i < m; i++)
|
||||
res += sum(n, x + i, lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -103,8 +103,8 @@ std::complex<float> sum(int m, int n, std::complex<float> const* x, int lda)
|
|||
std::complex<float> res;
|
||||
for (int i = 0; i < m; i++)
|
||||
res += sum(n, x + i, lda);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -131,8 +131,8 @@ void term_by_term_mat_vec_plus(int dim,
|
|||
hipLaunchKernelGGL(kernel_tbt_mv_plus, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<double> const*>(x), incx);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void term_by_term_mat_vec_minus(int dim,
|
||||
|
@ -149,8 +149,8 @@ void term_by_term_mat_vec_minus(int dim,
|
|||
hipLaunchKernelGGL(kernel_tbt_mv_minus, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<double> const*>(x), incx);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void term_by_term_mat_vec_mult(int dim,
|
||||
|
@ -167,8 +167,8 @@ void term_by_term_mat_vec_mult(int dim,
|
|||
hipLaunchKernelGGL(kernel_tbt_mv_mult, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<double> const*>(x), incx);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void term_by_term_mat_vec_div(int dim,
|
||||
|
@ -185,8 +185,8 @@ void term_by_term_mat_vec_div(int dim,
|
|||
hipLaunchKernelGGL(kernel_tbt_mv_div, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda,
|
||||
reinterpret_cast<thrust::complex<double> const*>(x), incx);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void term_by_term_mat_vec_plus(int dim, int nrow, int ncol, std::complex<double>* A, int lda, double* x, int incx)
|
||||
|
@ -196,8 +196,8 @@ void term_by_term_mat_vec_plus(int dim, int nrow, int ncol, std::complex<double>
|
|||
dim3 block_dim(xblock_dim, yblock_dim, 1);
|
||||
hipLaunchKernelGGL(kernel_tbt_mv_plus, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda, x, incx);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void term_by_term_mat_vec_minus(int dim, int nrow, int ncol, std::complex<double>* A, int lda, double* x, int incx)
|
||||
|
@ -207,8 +207,8 @@ void term_by_term_mat_vec_minus(int dim, int nrow, int ncol, std::complex<double
|
|||
dim3 block_dim(xblock_dim, yblock_dim, 1);
|
||||
hipLaunchKernelGGL(kernel_tbt_mv_minus, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda, x, incx);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void term_by_term_mat_vec_mult(int dim, int nrow, int ncol, std::complex<double>* A, int lda, double* x, int incx)
|
||||
|
@ -218,8 +218,8 @@ void term_by_term_mat_vec_mult(int dim, int nrow, int ncol, std::complex<double>
|
|||
dim3 block_dim(xblock_dim, yblock_dim, 1);
|
||||
hipLaunchKernelGGL(kernel_tbt_mv_mult, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda, x, incx);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void term_by_term_mat_vec_div(int dim, int nrow, int ncol, std::complex<double>* A, int lda, double* x, int incx)
|
||||
|
@ -229,8 +229,8 @@ void term_by_term_mat_vec_div(int dim, int nrow, int ncol, std::complex<double>*
|
|||
dim3 block_dim(xblock_dim, yblock_dim, 1);
|
||||
hipLaunchKernelGGL(kernel_tbt_mv_div, dim3(1), dim3(block_dim), 0, 0, dim, nrow, ncol,
|
||||
reinterpret_cast<thrust::complex<double>*>(A), lda, x, incx);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include <complex>
|
||||
#include <type_traits>
|
||||
#include <thrust/complex.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
//#include "AFQMC/Numerics/detail/HIP/Kernels/strided_range.hpp"
|
||||
|
||||
namespace kernels
|
||||
|
@ -32,44 +32,44 @@ __global__ void kernel_uninitialized_copy_n(Size N, T const* x, Size incx, T* ar
|
|||
void uninitialized_copy_n(int N, double const* first, int incx, double* array, int incy)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_copy_n(int N, std::complex<double> const* first, int incx, std::complex<double>* array, int incy)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_copy_n(int N, int const* first, int incx, int* array, int incy)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// long
|
||||
void uninitialized_copy_n(long N, double const* first, long incx, double* array, long incy)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_copy_n(long N, std::complex<double> const* first, long incx, std::complex<double>* array, long incy)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_copy_n(long N, int const* first, long incx, int* array, long incy)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_copy_n, dim3(1), dim3(256), 0, 0, N, first, incx, array, incy);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include <thrust/host_vector.h>
|
||||
#include <thrust/uninitialized_fill.h>
|
||||
*/
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -39,99 +39,99 @@ __global__ void kernel_uninitialized_fill_n(Size N, T* x, T const a)
|
|||
void uninitialized_fill_n(bool* first, int N, bool const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(int* first, int N, int const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(float* first, int N, float const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(double* first, int N, double const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(std::complex<float>* first, int N, std::complex<float> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(std::complex<double>* first, int N, std::complex<double> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(double2* first, int N, double2 const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(bool* first, long N, bool const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(int* first, long N, int const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(float* first, long N, float const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(double* first, long N, double const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(std::complex<float>* first, long N, std::complex<float> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(std::complex<double>* first, long N, std::complex<double> const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void uninitialized_fill_n(double2* first, long N, double2 const value)
|
||||
{
|
||||
hipLaunchKernelGGL(kernel_uninitialized_fill_n, dim3(1), dim3(256), 0, 0, N, first, value);
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -188,8 +188,8 @@ void vKKwij_to_vwKiKj(int nwalk,
|
|||
dim3 grid_dim(nkpts, nkpts, nwalk);
|
||||
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
|
||||
nmo, nmo0, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
}
|
||||
|
||||
void vKKwij_to_vwKiKj(int nwalk,
|
||||
|
@ -208,8 +208,8 @@ void vKKwij_to_vwKiKj(int nwalk,
|
|||
dim3 grid_dim(nkpts, nkpts, nwalk);
|
||||
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
|
||||
nmo, nmo0, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
}
|
||||
|
||||
void vKKwij_to_vwKiKj(int nwalk,
|
||||
|
@ -228,8 +228,8 @@ void vKKwij_to_vwKiKj(int nwalk,
|
|||
dim3 grid_dim(nkpts, nkpts, nwalk);
|
||||
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
|
||||
nmo, nmo0, A, B);
|
||||
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
}
|
||||
|
||||
void vKKwij_to_vwKiKj(int nwalk,
|
||||
|
@ -249,8 +249,8 @@ void vKKwij_to_vwKiKj(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
|
||||
nmo, nmo0, reinterpret_cast<thrust::complex<double> const*>(A),
|
||||
reinterpret_cast<thrust::complex<double>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
}
|
||||
|
||||
void vKKwij_to_vwKiKj(int nwalk,
|
||||
|
@ -270,8 +270,8 @@ void vKKwij_to_vwKiKj(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
|
||||
nmo, nmo0, reinterpret_cast<thrust::complex<float> const*>(A),
|
||||
reinterpret_cast<thrust::complex<float>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
}
|
||||
|
||||
void vKKwij_to_vwKiKj(int nwalk,
|
||||
|
@ -291,8 +291,8 @@ void vKKwij_to_vwKiKj(int nwalk,
|
|||
hipLaunchKernelGGL(kernel_vKKwij_to_vwKiKj, dim3(grid_dim), dim3(block_dim), 0, 0, nwalk, nkpts, nmo_max, nmo_tot, kk,
|
||||
nmo, nmo0, reinterpret_cast<thrust::complex<float> const*>(A),
|
||||
reinterpret_cast<thrust::complex<double>*>(B));
|
||||
qmc_hip::hip_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "vKKwij_to_vwKiKj");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vKKwij_to_vwKiKj");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Numerics/detail/HIP/Kernels/hip_settings.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -113,8 +113,8 @@ void vbias_from_v1(int nwalk,
|
|||
ncholpQ, ncholpQ0, static_cast<thrust::complex<double> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<double> const*>(v1),
|
||||
reinterpret_cast<thrust::complex<double>*>(vb));
|
||||
qmc_hip::hip_check(hipGetLastError(), "vbias_from_v1");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "vbias_from_v1");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "vbias_from_v1");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vbias_from_v1");
|
||||
}
|
||||
|
||||
void vbias_from_v1(int nwalk,
|
||||
|
@ -136,8 +136,8 @@ void vbias_from_v1(int nwalk,
|
|||
ncholpQ, ncholpQ0, static_cast<thrust::complex<float> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(v1),
|
||||
reinterpret_cast<thrust::complex<float>*>(vb));
|
||||
qmc_hip::hip_check(hipGetLastError(), "vbias_from_v1");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "vbias_from_v1");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "vbias_from_v1");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vbias_from_v1");
|
||||
}
|
||||
|
||||
void vbias_from_v1(int nwalk,
|
||||
|
@ -159,8 +159,8 @@ void vbias_from_v1(int nwalk,
|
|||
ncholpQ, ncholpQ0, static_cast<thrust::complex<double> const>(alpha),
|
||||
reinterpret_cast<thrust::complex<float> const*>(v1),
|
||||
reinterpret_cast<thrust::complex<double>*>(vb));
|
||||
qmc_hip::hip_check(hipGetLastError(), "vbias_from_v1");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(), "vbias_from_v1");
|
||||
qmc_hip::hip_kernel_check(hipGetLastError(), "vbias_from_v1");
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize(), "vbias_from_v1");
|
||||
}
|
||||
|
||||
} // namespace kernels
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hip_kernel_utils.h"
|
||||
|
||||
namespace kernels
|
||||
{
|
||||
|
@ -33,8 +33,8 @@ void zero_complex_part(int n, std::complex<double>* x)
|
|||
int grid_dim = (n + block_dim - 1) / block_dim;
|
||||
hipLaunchKernelGGL(kernel_zero_complex_part, dim3(grid_dim), dim3(block_dim), 0, 0, n,
|
||||
reinterpret_cast<thrust::complex<double>*>(x));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void zero_complex_part(int n, std::complex<float>* x)
|
||||
|
@ -43,8 +43,8 @@ void zero_complex_part(int n, std::complex<float>* x)
|
|||
int grid_dim = (n + block_dim - 1) / block_dim;
|
||||
hipLaunchKernelGGL(kernel_zero_complex_part, dim3(grid_dim), dim3(block_dim), 0, 0, n,
|
||||
reinterpret_cast<thrust::complex<float>*>(x));
|
||||
qmc_hip::hip_check(hipGetLastError());
|
||||
qmc_hip::hip_check(hipDeviceSynchronize());
|
||||
qmc_hip::hip_kernel_check(hipGetLastError());
|
||||
qmc_hip::hip_kernel_check(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
void zero_complex_part(int n, double* x) { return; }
|
||||
|
|
|
@ -50,9 +50,7 @@ template<typename T, typename Q>
|
|||
inline static void copy(int n, T const* x, int incx, device_pointer<Q> y, int incy)
|
||||
{
|
||||
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
|
||||
if (hipSuccess !=
|
||||
hipMemcpy2D(to_address(y), sizeof(Q) * incy, x, sizeof(T) * incx, sizeof(T), n, hipMemcpyHostToDevice))
|
||||
throw std::runtime_error("Error: hipMemcpy2D returned error code.");
|
||||
arch::memcopy2D(to_address(y), sizeof(Q) * incy, x, sizeof(T) * incx, sizeof(T), n, arch::memcopyH2D, "blas_hip_gpu_ptr::copy");
|
||||
}
|
||||
|
||||
template<typename T, typename Q>
|
||||
|
@ -60,9 +58,7 @@ inline static void copy(int n, device_pointer<Q> x, int incx, T* y, int incy)
|
|||
{
|
||||
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
|
||||
assert(sizeof(Q) == sizeof(T));
|
||||
if (hipSuccess !=
|
||||
hipMemcpy2D(y, sizeof(T) * incy, to_address(x), sizeof(Q) * incx, sizeof(T), n, hipMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: hipMemcpy2D returned error code.");
|
||||
arch::memcopy2D(y, sizeof(T) * incy, to_address(x), sizeof(Q) * incx, sizeof(T), n, arch::memcopyD2H, "blas_hip_gpu_ptr::copy");
|
||||
}
|
||||
|
||||
// scal Specializations
|
||||
|
@ -325,7 +321,7 @@ inline static void gemmBatched(char Atrans,
|
|||
{
|
||||
static_assert(std::is_same<typename std::decay<Q1>::type, T>::value, "Wrong dispatch.\n");
|
||||
static_assert(std::is_same<typename std::decay<Q2>::type, T>::value, "Wrong dispatch.\n");
|
||||
// replace with single call to hipMalloc and hipMemcpy
|
||||
// replace with single call to arch::malloc and arch::memcopy
|
||||
T **A_d, **B_d, **C_d;
|
||||
Q1** A_h;
|
||||
Q2** B_h;
|
||||
|
@ -339,17 +335,17 @@ inline static void gemmBatched(char Atrans,
|
|||
B_h[i] = to_address(B[i]);
|
||||
C_h[i] = to_address(C[i]);
|
||||
}
|
||||
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
hipMalloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
hipMalloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
|
||||
hipMemcpy(B_d, B_h, batchSize * sizeof(*B_h), hipMemcpyHostToDevice);
|
||||
hipMemcpy(C_d, C_h, batchSize * sizeof(*C_h), hipMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
|
||||
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
|
||||
hipblas::hipblas_gemmBatched(*(A[0]).handles.hipblas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
|
||||
C_d, ldc, batchSize);
|
||||
hipFree(A_d);
|
||||
hipFree(B_d);
|
||||
hipFree(C_d);
|
||||
arch::free(A_d);
|
||||
arch::free(B_d);
|
||||
arch::free(C_d);
|
||||
delete[] A_h;
|
||||
delete[] B_h;
|
||||
delete[] C_h;
|
||||
|
@ -381,7 +377,7 @@ inline static void gemmBatched(char Atrans,
|
|||
static_assert(std::is_same<typename std::decay<Q1>::type, T2>::value, "Wrong dispatch.\n");
|
||||
static_assert(std::is_same<typename std::decay<Q2>::type, T>::value, "Wrong dispatch.\n");
|
||||
assert(Atrans == 'N' || Atrans == 'n');
|
||||
// replace with single call to hipMalloc and hipMemcpy
|
||||
// replace with single call to arch::malloc and arch::memcopy
|
||||
T2** A_d;
|
||||
T** B_d;
|
||||
T2** C_d;
|
||||
|
@ -397,17 +393,17 @@ inline static void gemmBatched(char Atrans,
|
|||
B_h[i] = to_address(B[i]);
|
||||
C_h[i] = to_address(C[i]);
|
||||
}
|
||||
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
hipMalloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
hipMalloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
|
||||
hipMemcpy(B_d, B_h, batchSize * sizeof(*B_h), hipMemcpyHostToDevice);
|
||||
hipMemcpy(C_d, C_h, batchSize * sizeof(*C_h), hipMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::malloc((void**)&B_d, batchSize * sizeof(*B_h));
|
||||
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::memcopy(B_d, B_h, batchSize * sizeof(*B_h), arch::memcopyH2D);
|
||||
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
|
||||
hipblas::hipblas_gemmBatched(*(A[0]).handles.hipblas_handle, Atrans, Btrans, M, N, K, alpha, A_d, lda, B_d, ldb, beta,
|
||||
C_d, ldc, batchSize);
|
||||
hipFree(A_d);
|
||||
hipFree(B_d);
|
||||
hipFree(C_d);
|
||||
arch::free(A_d);
|
||||
arch::free(B_d);
|
||||
arch::free(C_d);
|
||||
delete[] A_h;
|
||||
delete[] B_h;
|
||||
delete[] C_h;
|
||||
|
@ -461,28 +457,24 @@ template<typename T, typename T2>
|
|||
inline static void copy2D(int N, int M, device_pointer<T> src, int lda, device_pointer<T2> dst, int ldb)
|
||||
{
|
||||
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
|
||||
if (hipSuccess !=
|
||||
hipMemcpy2D(to_address(dst), sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N,
|
||||
hipMemcpyDeviceToDevice))
|
||||
throw std::runtime_error("Error: hipMemcpy2D returned error code in copy2D.");
|
||||
arch::memcopy2D(to_address(dst), sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N,
|
||||
arch::memcopyD2D, "blas_hip_gpu_ptr::copy2D");
|
||||
}
|
||||
|
||||
template<typename T, typename T2>
|
||||
inline static void copy2D(int N, int M, T const* src, int lda, device_pointer<T2> dst, int ldb)
|
||||
{
|
||||
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
|
||||
if (hipSuccess !=
|
||||
hipMemcpy2D(to_address(dst), sizeof(T2) * ldb, src, sizeof(T) * lda, M * sizeof(T), N, hipMemcpyHostToDevice))
|
||||
throw std::runtime_error("Error: hipMemcpy2D returned error code in copy2D.");
|
||||
arch::memcopy2D(to_address(dst), sizeof(T2) * ldb, src, sizeof(T) * lda, M * sizeof(T), N,
|
||||
arch::memcopyH2D, "blas_hip_gpu_ptr::copy2D");
|
||||
}
|
||||
|
||||
template<typename T, typename T2>
|
||||
inline static void copy2D(int N, int M, device_pointer<T> src, int lda, T2* dst, int ldb)
|
||||
{
|
||||
static_assert(std::is_same<typename std::decay<T>::type, T2>::value, "Wrong dispatch.\n");
|
||||
if (hipSuccess !=
|
||||
hipMemcpy2D(dst, sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N, hipMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: hipMemcpy2D returned error code in copy2D.");
|
||||
arch::memcopy2D(dst, sizeof(T2) * ldb, to_address(src), sizeof(T) * lda, M * sizeof(T), N,
|
||||
arch::memcopyD2H, "blas_hip_gpu_ptr::copy2D");
|
||||
}
|
||||
|
||||
template<typename T, typename T2>
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
namespace qmc_hip
|
||||
{
|
||||
void hip_check(hipError_t sucess, std::string message)
|
||||
void hip_kernel_check(hipError_t sucess, std::string message)
|
||||
{
|
||||
if (hipSuccess != sucess)
|
||||
{
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
namespace qmc_hip
|
||||
{
|
||||
void hip_check(hipError_t sucess, std::string message = "");
|
||||
void hip_kernel_check(hipError_t sucess, std::string message = "");
|
||||
void rocrand_check(rocrand_status sucess, std::string message = "");
|
||||
} // namespace qmc_hip
|
||||
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
#include "hipblas.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hipblas_utils.h"
|
||||
|
||||
|
||||
namespace hipblas
|
||||
{
|
||||
hipblasStatus_t rocBLASStatusToHIPStatusAFQMC(rocblas_status_ error)
|
||||
{
|
||||
switch (error)
|
||||
{
|
||||
case rocblas_status_success:
|
||||
return HIPBLAS_STATUS_SUCCESS;
|
||||
case rocblas_status_invalid_handle:
|
||||
return HIPBLAS_STATUS_NOT_INITIALIZED;
|
||||
case rocblas_status_not_implemented:
|
||||
return HIPBLAS_STATUS_NOT_SUPPORTED;
|
||||
case rocblas_status_invalid_pointer:
|
||||
return HIPBLAS_STATUS_INVALID_VALUE;
|
||||
case rocblas_status_invalid_size:
|
||||
return HIPBLAS_STATUS_INVALID_VALUE;
|
||||
case rocblas_status_memory_error:
|
||||
return HIPBLAS_STATUS_ALLOC_FAILED;
|
||||
case rocblas_status_internal_error:
|
||||
return HIPBLAS_STATUS_INTERNAL_ERROR;
|
||||
default:
|
||||
throw "Unimplemented status";
|
||||
}
|
||||
}
|
||||
} // namespace hipblas
|
|
@ -1,28 +0,0 @@
|
|||
//////////////////////////////////////////////////////////////////////
|
||||
// This file is distributed under the University of Illinois/NCSA Open Source
|
||||
// License. See LICENSE file in top directory for details.
|
||||
//
|
||||
// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
|
||||
//
|
||||
// File developed by:
|
||||
// Lawrence Livermore National Laboratory
|
||||
//
|
||||
// File created by:
|
||||
// Miguel A. Morales, moralessilva2@llnl.gov
|
||||
// Lawrence Livermore National Laboratory
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AFQMC_HIPBLAS_UTILS_H
|
||||
#define AFQMC_HIPBLAS_UTILS_H
|
||||
|
||||
#include "hipblas.h"
|
||||
#include "rocblas.h"
|
||||
|
||||
namespace hipblas
|
||||
{
|
||||
// TODO: Temporary hack waiting for upstream version of hipblas
|
||||
hipblasStatus_t rocBLASStatusToHIPStatusAFQMC(rocblas_status_ error);
|
||||
|
||||
} // namespace hipblas
|
||||
|
||||
#endif
|
|
@ -17,12 +17,10 @@
|
|||
#include "hipblas.h"
|
||||
#include "rocsolver.h"
|
||||
#include "AFQMC/Memory/HIP/hip_utilities.h"
|
||||
#include "AFQMC/Numerics/detail/HIP/hipblas_utils.h"
|
||||
|
||||
namespace hipblas
|
||||
{
|
||||
using qmc_hip::hipblasOperation;
|
||||
using qmc_hip::rocblasOperation;
|
||||
|
||||
// Level-1
|
||||
inline hipblasStatus_t hipblas_copy(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy)
|
||||
|
@ -821,13 +819,11 @@ inline hipblasStatus_t hipblas_geam(hipblasHandle_t handle,
|
|||
std::complex<float>* C,
|
||||
int ldc)
|
||||
{
|
||||
hipblasStatus_t success = rocBLASStatusToHIPStatusAFQMC(
|
||||
rocblas_cgeam((rocblas_handle)handle, rocblasOperation(Atrans), rocblasOperation(Btrans), M, N,
|
||||
reinterpret_cast<rocblas_float_complex const*>(&alpha),
|
||||
reinterpret_cast<rocblas_float_complex const*>(A), lda,
|
||||
reinterpret_cast<rocblas_float_complex const*>(&beta),
|
||||
reinterpret_cast<rocblas_float_complex const*>(B), ldb, reinterpret_cast<rocblas_float_complex*>(C),
|
||||
ldc));
|
||||
hipblasStatus_t success =
|
||||
hipblasCgeam(handle, hipblasOperation(Atrans), hipblasOperation(Btrans), M, N,
|
||||
reinterpret_cast<hipblasComplex const*>(&alpha), reinterpret_cast<hipblasComplex const*>(A), lda,
|
||||
reinterpret_cast<hipblasComplex const*>(&beta), reinterpret_cast<hipblasComplex const*>(B), ldb,
|
||||
reinterpret_cast<hipblasComplex*>(C), ldc);
|
||||
hipDeviceSynchronize();
|
||||
return success;
|
||||
}
|
||||
|
@ -846,13 +842,12 @@ inline hipblasStatus_t hipblas_geam(hipblasHandle_t handle,
|
|||
std::complex<double>* C,
|
||||
int ldc)
|
||||
{
|
||||
hipblasStatus_t success = rocBLASStatusToHIPStatusAFQMC(
|
||||
rocblas_zgeam((rocblas_handle)handle, rocblasOperation(Atrans), rocblasOperation(Btrans), M, N,
|
||||
reinterpret_cast<rocblas_double_complex const*>(&alpha),
|
||||
reinterpret_cast<rocblas_double_complex const*>(A), lda,
|
||||
reinterpret_cast<rocblas_double_complex const*>(&beta),
|
||||
reinterpret_cast<rocblas_double_complex const*>(B), ldb,
|
||||
reinterpret_cast<rocblas_double_complex*>(C), ldc));
|
||||
hipblasStatus_t success = hipblasZgeam(handle, hipblasOperation(Atrans), hipblasOperation(Btrans), M, N,
|
||||
reinterpret_cast<hipblasDoubleComplex const*>(&alpha),
|
||||
reinterpret_cast<hipblasDoubleComplex const*>(A), lda,
|
||||
reinterpret_cast<hipblasDoubleComplex const*>(&beta),
|
||||
reinterpret_cast<hipblasDoubleComplex const*>(B), ldb,
|
||||
reinterpret_cast<hipblasDoubleComplex*>(C), ldc);
|
||||
hipDeviceSynchronize();
|
||||
return success;
|
||||
}
|
||||
|
|
|
@ -69,21 +69,15 @@ inline static void getrf(const int n,
|
|||
int& st,
|
||||
device_pointer<R> work)
|
||||
{
|
||||
//rocsolver_handle handle;
|
||||
//rocsolver_create_handle(&handle);
|
||||
//std::cout << (*a.handles.rocsolver_handle_)->device_arch_id() << std::endl;
|
||||
rocsolverStatus_t status = rocsolver::rocsolver_getrf(*a.handles.rocsolver_handle_, n, m, to_address(a), lda,
|
||||
to_address(work), to_address(piv), to_address(piv) + n);
|
||||
//rocsolverStatus_t status = rocsolver::rocsolver_getrf(handle, n, m,
|
||||
//to_address(a), lda, to_address(work), to_address(piv), to_address(piv)+n);
|
||||
hipMemcpy(&st, to_address(piv) + n, sizeof(int), hipMemcpyDeviceToHost);
|
||||
arch::memcopy(&st, to_address(piv) + n, sizeof(int), arch::memcopyD2H);
|
||||
if (rocblas_status_success != status)
|
||||
{
|
||||
std::cerr << " hipblas_getrf status, info: " << status << " " << st << std::endl;
|
||||
std::cerr.flush();
|
||||
throw std::runtime_error("Error: hipblas_getrf returned error code.");
|
||||
}
|
||||
//rocsolver_destroy_handle(handle);
|
||||
}
|
||||
|
||||
// getrfBatched
|
||||
|
@ -100,13 +94,13 @@ inline static void getrfBatched(const int n,
|
|||
A_h = new T*[batchSize];
|
||||
for (int i = 0; i < batchSize; i++)
|
||||
A_h[i] = to_address(a[i]);
|
||||
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
hipblasStatus_t status = hipblas::hipblas_getrfBatched(*(a[0]).handles.hipblas_handle, n, A_d, lda, to_address(piv),
|
||||
to_address(info), batchSize);
|
||||
if (HIPBLAS_STATUS_SUCCESS != status)
|
||||
throw std::runtime_error("Error: hipblas_getrf returned error code.");
|
||||
hipFree(A_d);
|
||||
arch::free(A_d);
|
||||
delete[] A_h;
|
||||
}
|
||||
|
||||
|
@ -135,11 +129,7 @@ inline static void getri(int n,
|
|||
|
||||
// info isn't returned from ?getrs.
|
||||
int* info;
|
||||
if (hipSuccess != hipMalloc((void**)&info, sizeof(int)))
|
||||
{
|
||||
std::cerr << " Error getri: Error allocating on GPU." << std::endl;
|
||||
throw std::runtime_error("Error: hipMalloc returned error code.");
|
||||
}
|
||||
arch::malloc((void**)&info, sizeof(int), "lapack_hip_gpu_ptr::getri");
|
||||
|
||||
kernels::set_identity(n, n, to_address(work), n);
|
||||
//std::cout << (*work).num_elements() << " " << (*a).num_elements() << std::endl;
|
||||
|
@ -147,10 +137,10 @@ inline static void getri(int n,
|
|||
rocsolver::rocsolver_getrs(*a.handles.rocsolver_handle_, rocblas_operation_none, n, n, to_address(a), lda,
|
||||
to_address(piv), to_address(work), n, info))
|
||||
throw std::runtime_error("Error: rocsolver_getrs returned error code.");
|
||||
hipMemcpy(to_address(a), to_address(work), n * n * sizeof(T), hipMemcpyDeviceToDevice);
|
||||
//hipMemcpy(&status,info,sizeof(int),hipMemcpyDeviceToHost);
|
||||
arch::memcopy(to_address(a), to_address(work), n * n * sizeof(T), arch::memcopyD2D);
|
||||
//arch::memcopy(&status,info,sizeof(int),arch::memcopyD2H);
|
||||
status = 0;
|
||||
hipFree(info);
|
||||
arch::free(info);
|
||||
}
|
||||
|
||||
// getriBatched
|
||||
|
@ -174,16 +164,16 @@ inline static void getriBatched(int n,
|
|||
A_h[i] = to_address(a[i]);
|
||||
C_h[i] = to_address(ainv[i]);
|
||||
}
|
||||
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
hipMalloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
|
||||
hipMemcpy(C_d, C_h, batchSize * sizeof(*C_h), hipMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
|
||||
hipblasStatus_t status = hipblas::hipblas_getriBatched(*(a[0]).handles.hipblas_handle, HIPBLAS_OP_N, n, n, A_d, lda,
|
||||
to_address(piv), C_d, ldc, to_address(info), batchSize);
|
||||
if (HIPBLAS_STATUS_SUCCESS != status)
|
||||
throw std::runtime_error("Error: hipblas_getri returned error code.");
|
||||
hipFree(A_d);
|
||||
hipFree(C_d);
|
||||
arch::free(A_d);
|
||||
arch::free(C_d);
|
||||
delete[] A_h;
|
||||
delete[] C_h;
|
||||
}
|
||||
|
@ -207,16 +197,16 @@ inline static void matinvBatched(int n,
|
|||
A_h[i] = to_address(a[i]);
|
||||
C_h[i] = to_address(ainv[i]);
|
||||
}
|
||||
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
hipMalloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
|
||||
hipMemcpy(C_d, C_h, batchSize * sizeof(*C_h), hipMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::malloc((void**)&C_d, batchSize * sizeof(*C_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
arch::memcopy(C_d, C_h, batchSize * sizeof(*C_h), arch::memcopyH2D);
|
||||
hipblasStatus_t status = hipblas::hipblas_matinvBatched(*(a[0]).handles.hipblas_handle, n, A_d, lda, C_d, lda_inv,
|
||||
to_address(info), batchSize);
|
||||
if (HIPBLAS_STATUS_SUCCESS != status)
|
||||
throw std::runtime_error("Error: hipblas_matinv returned error code.");
|
||||
hipFree(A_d);
|
||||
hipFree(C_d);
|
||||
arch::free(A_d);
|
||||
arch::free(C_d);
|
||||
delete[] A_h;
|
||||
delete[] C_h;
|
||||
}
|
||||
|
@ -244,20 +234,20 @@ inline static void geqrf(int M,
|
|||
//size_t dim = std::min(M,N);
|
||||
//std::vector<T> piv(dim,0.0);
|
||||
//T* dpiv;
|
||||
//if(hipSuccess != hipMalloc(&dpiv,sizeof(T)*dim)) {
|
||||
//if(hipSuccess != arch::malloc(&dpiv,sizeof(T)*dim)) {
|
||||
//std::cerr << " Error gqr: Error allocating piv on GPU." << std::endl;
|
||||
//throw std::runtime_error("Error: hipMalloc returned error code.");
|
||||
//throw std::runtime_error("Error: arch::malloc returned error code.");
|
||||
//}
|
||||
rocsolverStatus_t status =
|
||||
rocsolver::rocsolver_geqrf(*A.handles.rocsolver_handle_, M, N, to_address(A), LDA, to_address(TAU));
|
||||
//hipMemcpy(piv.data(),dpiv,sizeof(T)*dim,hipMemcpyDeviceToHost);
|
||||
//arch::memcopy(piv.data(),dpiv,sizeof(T)*dim,arch::memcopyD2H);
|
||||
if (rocblas_status_success != status)
|
||||
{
|
||||
std::cerr << " hipblas_geqrf status, info: " << status << std::endl;
|
||||
std::cerr.flush();
|
||||
throw std::runtime_error("Error: hipblas_geqrf returned error code.");
|
||||
}
|
||||
//hipFree(dpiv);
|
||||
//arch::free(dpiv);
|
||||
INFO = 0;
|
||||
}
|
||||
|
||||
|
@ -313,7 +303,7 @@ void static gqr(int M,
|
|||
}
|
||||
// Not returned from rocm
|
||||
INFO = 0;
|
||||
//hipFree(piv);
|
||||
//arch::free(piv);
|
||||
}
|
||||
|
||||
template<typename T, typename I>
|
||||
|
@ -375,8 +365,8 @@ void static glq(int M,
|
|||
//T_h[i] = to_address(TAU[i]);
|
||||
//T **B_d;
|
||||
//std::vector<int> inf(batchSize);
|
||||
//hipMalloc((void **)&B_d, 2*batchSize*sizeof(*B_h));
|
||||
//hipMemcpy(B_d, B_h, 2*batchSize*sizeof(*B_h), hipMemcpyHostToDevice);
|
||||
//arch::malloc((void **)&B_d, 2*batchSize*sizeof(*B_h));
|
||||
//arch::memcopy(B_d, B_h, 2*batchSize*sizeof(*B_h), arch::memcopyH2D);
|
||||
//T **A_d(B_d);
|
||||
//T **T_d(B_d+batchSize);
|
||||
//int pstride = std::min(M,N);
|
||||
|
@ -385,7 +375,7 @@ void static glq(int M,
|
|||
//pstride, batchSize);
|
||||
//if(rocblas_status_success != status)
|
||||
//throw std::runtime_error("Error: hipblas_geqrfBatched returned error code.");
|
||||
//hipFree(B_d);
|
||||
//arch::free(B_d);
|
||||
//delete [] B_h;
|
||||
//}
|
||||
|
||||
|
@ -404,13 +394,13 @@ inline static void geqrfStrided(int M,
|
|||
for (int i = 0; i < batchSize; i++)
|
||||
A_h[i] = to_address(A) + i * Astride;
|
||||
T** A_d;
|
||||
hipMalloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
hipMemcpy(A_d, A_h, batchSize * sizeof(*A_h), hipMemcpyHostToDevice);
|
||||
arch::malloc((void**)&A_d, batchSize * sizeof(*A_h));
|
||||
arch::memcopy(A_d, A_h, batchSize * sizeof(*A_h), arch::memcopyH2D);
|
||||
rocsolverStatus_t status = rocsolver::rocsolver_geqrf_batched(*A.handles.rocsolver_handle_, M, N, A_d, LDA,
|
||||
to_address(TAU), Tstride, batchSize);
|
||||
if (rocblas_status_success != status)
|
||||
throw std::runtime_error("Error: hipblas_geqrfStrided returned error code.");
|
||||
hipFree(A_d);
|
||||
arch::free(A_d);
|
||||
delete[] A_h;
|
||||
}
|
||||
|
||||
|
@ -438,18 +428,18 @@ inline static void gesvd(char jobU,
|
|||
int& st)
|
||||
{
|
||||
int* devSt;
|
||||
hipMalloc((void**)&devSt, sizeof(int));
|
||||
arch::malloc((void**)&devSt, sizeof(int));
|
||||
rocsolverStatus_t status =
|
||||
rocsolver::rocsolver_gesvd(*A.handles.rocsolver_handle_, jobU, jobVT, m, n, to_address(A), lda, to_address(S),
|
||||
to_address(U), ldu, to_address(VT), ldvt, to_address(W), lw, devSt);
|
||||
hipMemcpy(&st, devSt, sizeof(int), hipMemcpyDeviceToHost);
|
||||
arch::memcopy(&st, devSt, sizeof(int), arch::memcopyD2H);
|
||||
if (rocblas_status_success != status)
|
||||
{
|
||||
std::cerr << " hipblas_gesvd status, info: " << status << " " << st << std::endl;
|
||||
std::cerr.flush();
|
||||
throw std::runtime_error("Error: hipblas_gesvd returned error code.");
|
||||
}
|
||||
hipFree(devSt);
|
||||
arch::free(devSt);
|
||||
}
|
||||
|
||||
template<typename T, typename R>
|
||||
|
@ -470,18 +460,18 @@ inline static void gesvd(char jobU,
|
|||
int& st)
|
||||
{
|
||||
int* devSt;
|
||||
hipMalloc((void**)&devSt, sizeof(int));
|
||||
arch::malloc((void**)&devSt, sizeof(int));
|
||||
rocsolverStatus_t status =
|
||||
rocsolver::rocsolver_gesvd(*A.handles.rocsolver_handle_, jobU, jobVT, m, n, to_address(A), lda, to_address(S),
|
||||
to_address(U), ldu, to_address(VT), ldvt, to_address(W), lw, devSt);
|
||||
hipMemcpy(&st, devSt, sizeof(int), hipMemcpyDeviceToHost);
|
||||
arch::memcopy(&st, devSt, sizeof(int), arch::memcopyD2H);
|
||||
if (rocblas_status_success != status)
|
||||
{
|
||||
std::cerr << " hipblas_gesvd status, info: " << status << " " << st << std::endl;
|
||||
std::cerr.flush();
|
||||
throw std::runtime_error("Error: hipblas_gesvd returned error code.");
|
||||
}
|
||||
hipFree(devSt);
|
||||
arch::free(devSt);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -467,7 +467,7 @@ inline rocsolverStatus_t rocsolver_gqr_strided(rocsolverHandle_t handle,
|
|||
qmc_hip::hip_check(hipDeviceSynchronize(), "rocsolver_gqr_strided");
|
||||
qmc_hip::hipsolver_check(success, "rocsolver_gqr_strided");
|
||||
}
|
||||
//qmc_hip::hip_check(hipDeviceSynchronize(),"rocsolver_gqr_strided");
|
||||
qmc_hip::hip_check(hipDeviceSynchronize(),"rocsolver_gqr_strided");
|
||||
qmc_hip::hip_check(hipGetLastError(), "rocsolver_gqr_strided");
|
||||
//qmc_hip::hipsolver_check(rocsolver_set_stream(handle,s0), "rocsolver_setStream");
|
||||
|
||||
|
|
|
@ -46,10 +46,8 @@ void csrmv(const char transa,
|
|||
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
|
||||
// somehow need to check if the matrix is compact!
|
||||
int pb, pe;
|
||||
if (hipSuccess != hipMemcpy(std::addressof(pb), to_address(pntrb), sizeof(int), hipMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: hipMemcpy returned error code in csrmv.");
|
||||
if (hipSuccess != hipMemcpy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), hipMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: hipMemcpy returned error code in csrmv.");
|
||||
arch::memcopy(std::addressof(pb), to_address(pntrb), sizeof(int), arch::memcopyD2H, "sparse_hip_gpu_ptr::csrmv");
|
||||
arch::memcopy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), arch::memcopyD2H, "sparse_hip_gpu_ptr::csrmv");
|
||||
int nnz = pe - pb;
|
||||
if (HIPSPARSE_STATUS_SUCCESS !=
|
||||
hipsparse::hipsparse_csrmv(*A.handles.hipsparse_handle, transa, M, K, nnz, alpha,
|
||||
|
@ -78,10 +76,8 @@ void csrmm(const char transa,
|
|||
static_assert(std::is_same<typename std::decay<Q>::type, T>::value, "Wrong dispatch.\n");
|
||||
// somehow need to check if the matrix is compact!
|
||||
int pb, pe;
|
||||
if (hipSuccess != hipMemcpy(std::addressof(pb), to_address(pntrb), sizeof(int), hipMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: hipMemcpy returned error code in csrmm.");
|
||||
if (hipSuccess != hipMemcpy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), hipMemcpyDeviceToHost))
|
||||
throw std::runtime_error("Error: hipMemcpy returned error code in csrmm.");
|
||||
arch::memcopy(std::addressof(pb), to_address(pntrb), sizeof(int), arch::memcopyD2H, "sparse_hip_gpu_ptr::csrmm");
|
||||
arch::memcopy(std::addressof(pe), to_address(pntre + (M - 1)), sizeof(int), arch::memcopyD2H, "sparse_hip_gpu_ptr::csrmm");
|
||||
int nnz = pe - pb;
|
||||
if (transa == 'N')
|
||||
{
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
#////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#// This file is distributed under the University of Illinois/NCSA Open Source License.
|
||||
#// See LICENSE file in top directory for details.
|
||||
#//
|
||||
#// Copyright (c) 2020 QMCPACK developers.
|
||||
#//
|
||||
#// File developed by: Fionn Malone malone14@llnl.gov, Lawrence Livermore National Laboratory
|
||||
#//
|
||||
#// File created by: Fionn Malone malone14@llnl.gov, Lawrence Livermore National Laboratory
|
||||
#////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MESSAGE("Building AFQMC performance executable ")
|
||||
|
||||
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${qmcpack_BINARY_DIR}/bin)
|
||||
|
||||
ADD_EXECUTABLE(qmc-afqmc-performance performance.cpp)
|
||||
TARGET_LINK_LIBRARIES(qmc-afqmc-performance afqmc Math::BLAS_LAPACK)
|
|
@ -0,0 +1,424 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
// This file is distributed under the University of Illinois/NCSA Open Source
|
||||
// License. See LICENSE file in top directory for details.
|
||||
//
|
||||
// Copyright (c) 2020 QMCPACK developers.
|
||||
//
|
||||
// File developed by: Fionn Malone, malone14@llnl.gov, LLNL
|
||||
//
|
||||
// File created by: Fionn Malone, malone14@llnl.gov, LLNL
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#include "Configuration.h"
|
||||
|
||||
#include <vector>
|
||||
#include <random>
|
||||
|
||||
#include "Utilities/Timer.h"
|
||||
|
||||
#include "AFQMC/config.h"
|
||||
#include "AFQMC/config.0.h"
|
||||
#include "AFQMC/Numerics/ma_blas.hpp"
|
||||
#include "AFQMC/Numerics/batched_operations.hpp"
|
||||
#include "AFQMC/Numerics/ma_operations.hpp"
|
||||
#include "AFQMC/Matrix/tests/matrix_helpers.h"
|
||||
#include "AFQMC/Memory/buffer_allocators.h"
|
||||
#include "AFQMC/Memory/arch.hpp"
|
||||
|
||||
#include "multi/array.hpp"
|
||||
#include "multi/array_ref.hpp"
|
||||
|
||||
using namespace qmcplusplus;
|
||||
using namespace afqmc;
|
||||
|
||||
using std::copy_n;
|
||||
|
||||
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
|
||||
template<typename T>
|
||||
using Alloc = device::device_allocator<T>;
|
||||
#else
|
||||
template<typename T>
|
||||
using Alloc = std::allocator<T>;
|
||||
#endif
|
||||
template<typename T>
|
||||
using pointer = typename Alloc<T>::pointer;
|
||||
|
||||
|
||||
//template<typename T>
|
||||
//using buffer_alloc_type = device_buffer_type<T>;
|
||||
|
||||
//using buffer_ialloc_type = device_buffer_type<int>;
|
||||
|
||||
template<typename T>
|
||||
using Tensor1D = boost::multi::array<T, 1, Alloc<T>>;
|
||||
template<typename T>
|
||||
using Tensor2D = boost::multi::array<T, 2, Alloc<T>>;
|
||||
template<typename T>
|
||||
using Tensor3D = boost::multi::array<T, 3, Alloc<T>>;
|
||||
|
||||
template<typename T>
|
||||
using Tensor3D_ref = boost::multi::array_ref<T, 3, pointer<T>>;
|
||||
template<typename T>
|
||||
using Tensor2D_ref = boost::multi::array_ref<T, 2, pointer<T>>;
|
||||
template<typename T>
|
||||
using Tensor1D_ref = boost::multi::array_ref<T, 1, pointer<T>>;
|
||||
|
||||
template<typename T>
|
||||
void fillRandomMatrix(std::vector<T>& vec)
|
||||
{
|
||||
std::mt19937 generator(0);
|
||||
std::normal_distribution<T> distribution(0.0, 1.0);
|
||||
// avoid uninitialized warning
|
||||
T tmp = distribution(generator);
|
||||
for (int i = 0; i < vec.size(); i++)
|
||||
{
|
||||
T val = distribution(generator);
|
||||
vec[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void fillRandomMatrix(std::vector<std::complex<T>>& vec)
|
||||
{
|
||||
std::mt19937 generator(0);
|
||||
std::normal_distribution<T> distribution(0.0, 1.0);
|
||||
T tmp = distribution(generator);
|
||||
for (int i = 0; i < vec.size(); i++)
|
||||
{
|
||||
T re = distribution(generator);
|
||||
T im = distribution(generator);
|
||||
vec[i] = std::complex<T>(re, im);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Allocator, class Buff>
|
||||
void timeBatchedQR(std::ostream& out, Allocator& alloc, Buff& buffer, int nbatch, int m, int n)
|
||||
{
|
||||
using T = typename Allocator::value_type;
|
||||
int offset = 0;
|
||||
Tensor3D_ref<T> A(buffer.origin(), {nbatch, m, n});
|
||||
offset += A.num_elements();
|
||||
Tensor3D_ref<T> AT(buffer.origin() + offset, {nbatch, n, m});
|
||||
offset += AT.num_elements();
|
||||
Tensor2D_ref<T> T_(buffer.origin() + offset, {nbatch, m});
|
||||
offset += T_.num_elements();
|
||||
Tensor2D_ref<T> scl(buffer.origin() + offset, {nbatch, m});
|
||||
offset += T_.num_elements();
|
||||
int sz = ma::gqr_optimal_workspace_size(AT[0]);
|
||||
//std::cout << buffer.num_elements() << " " << 2*nbatch*m*n + 2*nbatch*m + nbatch*sz << " " << offset << std::endl;
|
||||
Tensor1D_ref<T> WORK(buffer.origin() + offset, boost::multi::iextensions<1u>{nbatch * sz});
|
||||
Alloc<int> ialloc{};
|
||||
std::vector<pointer<T>> Aarray;
|
||||
Tensor1D<int> IWORK(boost::multi::iextensions<1u>{nbatch * (m + 1)}, ialloc);
|
||||
using std::copy_n;
|
||||
for (int i = 0; i < nbatch; i++)
|
||||
{
|
||||
Aarray.emplace_back(A[i].origin());
|
||||
}
|
||||
// Actual profile.
|
||||
Timer timer;
|
||||
for (int i = 0; i < nbatch; i++)
|
||||
ma::transpose(A[i], AT[i]);
|
||||
double ttrans = timer.elapsed();
|
||||
timer.restart();
|
||||
geqrfStrided(m, n, AT.origin(), m, m * n, T_.origin(), m, IWORK.origin(), nbatch);
|
||||
double tgeqrf = timer.elapsed();
|
||||
timer.restart();
|
||||
for (int i = 0; i < nbatch; i++)
|
||||
determinant_from_geqrf(n, AT[i].origin(), m, scl[i].origin(), T(0.0));
|
||||
double tdet = timer.elapsed();
|
||||
timer.restart();
|
||||
gqrStrided(m, n, n, AT.origin(), m, m * n, T_.origin(), m, WORK.origin(), sz, IWORK.origin(), nbatch);
|
||||
double tgqr = timer.elapsed();
|
||||
out << " " << std::setw(5) << nbatch << " " << std::setw(5) << m << " " << std::setw(5) << n << " "
|
||||
<< std::scientific << ttrans << " " << tgeqrf << " " << tdet << " " << tgqr << "\n";
|
||||
}
|
||||
|
||||
template<class Allocator, class Buff>
|
||||
void timeQR(std::ostream& out, Allocator& alloc, Buff& buffer, int m)
|
||||
{
|
||||
using T = typename Allocator::value_type;
|
||||
int offset = 0;
|
||||
Tensor2D_ref<T> A(buffer.origin(), {m, m});
|
||||
offset += A.num_elements();
|
||||
Tensor1D_ref<T> TAU(buffer.origin() + offset, {m});
|
||||
offset += TAU.num_elements();
|
||||
int sz = ma::gqr_optimal_workspace_size(A);
|
||||
Tensor1D_ref<T> WORK(buffer.origin() + offset, boost::multi::iextensions<1u>{sz});
|
||||
Timer timer;
|
||||
using ma::geqrf;
|
||||
geqrf(A, TAU, WORK);
|
||||
double tgeqrf = timer.elapsed();
|
||||
using ma::gqr;
|
||||
timer.restart();
|
||||
gqr(A, TAU, WORK);
|
||||
double tgqr = timer.elapsed();
|
||||
out << " " << std::setw(5) << m << " " << std::setw(5) << m << " " << std::scientific << tgeqrf << " "
|
||||
<< " " << tgqr << "\n";
|
||||
}
|
||||
|
||||
template<class Allocator, class Buff>
|
||||
void timeExchangeKernel(std::ostream& out, Allocator& alloc, Buff& buffer, int nbatch, int nwalk, int nocc, int nchol)
|
||||
{
|
||||
using T = typename Allocator::value_type;
|
||||
int offset = 0;
|
||||
Tensor3D_ref<T> Twabn(buffer.origin(), {2 * nbatch, nwalk * nocc, nocc * nchol});
|
||||
offset += Twabn.num_elements();
|
||||
Tensor1D_ref<T> scal(buffer.origin() + offset, boost::multi::iextensions<1u>{nbatch});
|
||||
offset += scal.num_elements();
|
||||
Tensor1D_ref<T> result(buffer.origin() + offset, boost::multi::iextensions<1u>{nwalk});
|
||||
using ma::batched_dot_wabn_wban;
|
||||
Timer timer;
|
||||
batched_dot_wabn_wban(nbatch, nwalk, nocc, nchol, scal.origin(), Twabn.origin(), to_address(result.data()), 1);
|
||||
double time = timer.elapsed();
|
||||
out << " " << std::setw(5) << nbatch << " " << std::setw(5) << nwalk << " " << std::setw(5) << nocc << " "
|
||||
<< std::setw(5) << nchol << " " << std::scientific << time << "\n";
|
||||
}
|
||||
|
||||
template<class Allocator, class Buff>
|
||||
void timeBatchedGemm(std::ostream& out, Allocator& alloc, Buff& buffer, int nbatch, int m)
|
||||
{
|
||||
using T = typename Allocator::value_type;
|
||||
int offset = 0;
|
||||
Tensor2D_ref<T> a(buffer.origin(), {m, m});
|
||||
offset += a.num_elements();
|
||||
Tensor2D_ref<T> b(buffer.origin() + offset, {m, m});
|
||||
offset += b.num_elements();
|
||||
Tensor3D_ref<T> c(buffer.origin() + offset, {nbatch, m, m});
|
||||
//float scale = float(100.0);
|
||||
std::vector<pointer<T>> A_array;
|
||||
std::vector<pointer<T>> B_array;
|
||||
std::vector<pointer<T>> C_array;
|
||||
float alpha = 1.0;
|
||||
float beta = 0.0;
|
||||
for (int i = 0; i < nbatch; i++)
|
||||
{
|
||||
A_array.emplace_back(a.origin());
|
||||
B_array.emplace_back(b.origin());
|
||||
C_array.emplace_back(c[i].origin());
|
||||
}
|
||||
using ma::gemmBatched;
|
||||
Timer timer;
|
||||
gemmBatched('N', 'N', m, m, m, alpha, A_array.data(), m, B_array.data(), m, beta, C_array.data(), m, nbatch);
|
||||
double tgemm = timer.elapsed();
|
||||
out << " " << std::setw(6) << nbatch << " " << std::setw(5) << m << " " << std::scientific << tgemm
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
template<class Allocator, class Buff>
|
||||
void timeGemm(std::ostream& out, Allocator& alloc, Buff& buffer, int m, int n)
|
||||
{
|
||||
using T = typename Allocator::value_type;
|
||||
int offset = 0;
|
||||
Tensor2D_ref<T> a(buffer.origin(), {m, m});
|
||||
offset += a.num_elements();
|
||||
Tensor2D_ref<T> b(buffer.origin() + offset, {m, n});
|
||||
offset += b.num_elements();
|
||||
Tensor2D_ref<T> c(buffer.origin() + offset, {m, n});
|
||||
using ma::product;
|
||||
Timer timer;
|
||||
product(a, b, c);
|
||||
double tproduct = timer.elapsed();
|
||||
out << " " << std::setw(6) << m << " " << std::setw(5) << n << " " << std::scientific << tproduct
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
template<class Allocator, class Buff>
|
||||
void timeBatchedMatrixInverse(std::ostream& out, Allocator& alloc, Buff& buffer, int nbatch, int m)
|
||||
{
|
||||
using T = typename Allocator::value_type;
|
||||
int offset = 0;
|
||||
Tensor3D_ref<T> a(buffer.origin(), {nbatch, m, m});
|
||||
Tensor3D_ref<T> b(buffer.origin() + a.num_elements(), {nbatch, m, m});
|
||||
Alloc<int> ialloc{};
|
||||
Tensor1D<int> IWORK(boost::multi::iextensions<1u>{nbatch * (m + 1)}, ialloc);
|
||||
std::vector<pointer<T>> A_array, B_array;
|
||||
A_array.reserve(nbatch);
|
||||
B_array.reserve(nbatch);
|
||||
for (int i = 0; i < nbatch; i++)
|
||||
{
|
||||
A_array.emplace_back(a[i].origin());
|
||||
B_array.emplace_back(b[i].origin());
|
||||
}
|
||||
using ma::getrfBatched;
|
||||
Timer timer;
|
||||
getrfBatched(m, A_array.data(), m, ma::pointer_dispatch(IWORK.origin()),
|
||||
ma::pointer_dispatch(IWORK.origin()) + nbatch * m, nbatch);
|
||||
double tgetrf = timer.elapsed();
|
||||
using ma::getriBatched;
|
||||
timer.restart();
|
||||
getriBatched(m, A_array.data(), m, ma::pointer_dispatch(IWORK.origin()), B_array.data(), m,
|
||||
ma::pointer_dispatch(IWORK.origin()) + nbatch * m, nbatch);
|
||||
double tgetri = timer.elapsed();
|
||||
out << " " << std::setw(6) << nbatch << " " << std::setw(5) << m << " " << std::scientific
|
||||
<< tgetrf << " " << tgetri << "\n";
|
||||
}
|
||||
|
||||
template<class Allocator, class Buff>
|
||||
void timeMatrixInverse(std::ostream& out, Allocator& alloc, Buff& buffer, int m)
|
||||
{
|
||||
using T = typename Allocator::value_type;
|
||||
int offset = 0;
|
||||
Tensor2D_ref<T> a(buffer.origin(), {m, m});
|
||||
Tensor1D_ref<T> WORK(buffer.origin() + a.num_elements(), boost::multi::iextensions<1u>{m * m});
|
||||
Alloc<int> ialloc{};
|
||||
Tensor1D<int> IWORK(boost::multi::iextensions<1u>{m + 1}, ialloc);
|
||||
using ma::getrf;
|
||||
Timer timer;
|
||||
getrf(a, IWORK, WORK);
|
||||
double tgetrf = timer.elapsed();
|
||||
using ma::getri;
|
||||
timer.restart();
|
||||
getri(a, IWORK, WORK);
|
||||
double tgetri = timer.elapsed();
|
||||
out << " " << std::setw(6) << m << " " << std::scientific << tgetrf << " " << tgetri
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
boost::mpi3::environment env(argc, argv);
|
||||
auto world = boost::mpi3::environment::get_world_instance();
|
||||
auto node = world.split_shared(world.rank());
|
||||
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
|
||||
arch::INIT(node);
|
||||
#endif
|
||||
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
|
||||
{
|
||||
std::ofstream out;
|
||||
out.open("time_batched_zqr.dat");
|
||||
std::cout << " - Batched zQR (nbatch, MxN)" << std::endl;
|
||||
out << " nbatch M N ttrans tgeqrf tgetdet tgqr\n";
|
||||
std::vector<int> batches = {1, 5, 10, 20};
|
||||
std::vector<int> num_rows = {200, 400, 800};
|
||||
int max_batch = batches[batches.size() - 1];
|
||||
int max_rows = num_rows[num_rows.size() - 1];
|
||||
int size = (2 * max_batch * max_rows * (max_rows / 2.0) + 3 * max_batch * max_rows);
|
||||
Alloc<std::complex<double>> alloc{};
|
||||
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, 1.0, alloc);
|
||||
for (auto nb : batches)
|
||||
{
|
||||
for (auto m : num_rows)
|
||||
{
|
||||
timeBatchedQR(out, alloc, buffer, nb, m, m / 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
std::ofstream out;
|
||||
out.open("time_zqr.dat");
|
||||
std::cout << " - zQR (MxM)" << std::endl;
|
||||
out << " M M tzgeqrf tzungqr\n";
|
||||
int size = 3 * 1000 * 1000;
|
||||
Alloc<std::complex<double>> alloc{};
|
||||
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, 1.0, alloc);
|
||||
std::vector<int> dims = {100, 200, 500, 800, 1000};
|
||||
for (auto d : dims)
|
||||
{
|
||||
timeQR(out, alloc, buffer, d);
|
||||
}
|
||||
}
|
||||
{
|
||||
std::ofstream out;
|
||||
out.open("time_sgemm.dat");
|
||||
std::cout << " - sgemm (MxM)" << std::endl;
|
||||
out << " M M tsgemm\n";
|
||||
int size = 3 * 8000 * 8000;
|
||||
Alloc<float> alloc{};
|
||||
Tensor1D<float> buffer(iextensions<1u>{size}, 1.0, alloc);
|
||||
std::vector<int> dims = {200, 500, 800, 1000, 2000, 3000, 4000, 8000};
|
||||
for (auto d : dims)
|
||||
{
|
||||
timeGemm(out, alloc, buffer, d, d);
|
||||
}
|
||||
}
|
||||
{
|
||||
std::ofstream out;
|
||||
out.open("time_batched_sgemm.dat");
|
||||
std::cout << " - batched sgemm (nbatch, MxM)" << std::endl;
|
||||
out << " nbatch M tsgemm\n";
|
||||
Alloc<float> alloc{};
|
||||
std::vector<int> num_rows = {100, 200, 300, 400, 500, 600};
|
||||
std::vector<int> batches = {128, 256, 512, 1024};
|
||||
int max_batch = batches[batches.size() - 1];
|
||||
int max_rows = num_rows[num_rows.size() - 1];
|
||||
int size = 3 * max_batch * max_rows * max_rows;
|
||||
Tensor1D<float> buffer(iextensions<1u>{size}, 1.0, alloc);
|
||||
for (auto nb : batches)
|
||||
{
|
||||
for (auto m : num_rows)
|
||||
{
|
||||
timeBatchedGemm(out, alloc, buffer, nb, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
std::ofstream out;
|
||||
out.open("time_exchange_kernel.dat");
|
||||
std::cout << " - exchange kernel (E[w] = sum_{abn} Twabn Twanb)" << std::endl;
|
||||
out << " nbatch nwalk nocc nchol tExchangeKernel\n";
|
||||
Alloc<std::complex<double>> alloc{};
|
||||
int nwalk = 5;
|
||||
int nocc = 20;
|
||||
int nchol = 270;
|
||||
std::vector<int> batches = {100, 200, 400, 800};
|
||||
int nbatch_max = batches[batches.size() - 1];
|
||||
int size = 2 * nbatch_max * nwalk * nocc * nocc * nchol + nbatch_max + nwalk;
|
||||
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, 1.0, alloc);
|
||||
for (auto b : batches)
|
||||
{
|
||||
timeExchangeKernel(out, alloc, buffer, b, nwalk, nocc, nchol);
|
||||
}
|
||||
}
|
||||
#if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
|
||||
{
|
||||
std::ofstream out;
|
||||
out.open("time_batched_matrix_inverse.dat");
|
||||
std::cout << " - batched matrix inverse (nbatch, MxM)" << std::endl;
|
||||
out << " nbatch M tgetrf tgetri\n";
|
||||
Alloc<std::complex<double>> alloc{};
|
||||
std::vector<int> batches = {1, 5, 10, 20};
|
||||
std::vector<int> num_rows = {100, 110, 120, 200, 210, 300, 400, 500, 600, 700};
|
||||
int max_batch = batches[batches.size() - 1];
|
||||
int max_rows = num_rows[num_rows.size() - 1];
|
||||
int size = 2 * max_batch * max_rows * max_rows;
|
||||
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, alloc);
|
||||
{
|
||||
std::vector<std::complex<double>> tmp(size);
|
||||
fillRandomMatrix(tmp);
|
||||
using std::copy_n;
|
||||
copy_n(tmp.data(), tmp.size(), buffer.origin());
|
||||
}
|
||||
for (auto b : batches)
|
||||
{
|
||||
for (auto m : num_rows)
|
||||
{
|
||||
timeBatchedMatrixInverse(out, alloc, buffer, b, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
std::ofstream out;
|
||||
out.open("time_matrix_inverse.dat");
|
||||
std::cout << " - matrix inverse (nbatch, MxM)" << std::endl;
|
||||
out << " M tgetrf tgetri\n";
|
||||
Alloc<std::complex<double>> alloc{};
|
||||
std::vector<int> num_rows = {100, 110, 120, 200, 210, 300, 400, 500, 600, 700, 800, 1000, 2000, 4000};
|
||||
int max_rows = num_rows[num_rows.size() - 1];
|
||||
int size = 2 * max_rows * max_rows;
|
||||
Tensor1D<std::complex<double>> buffer(iextensions<1u>{size}, alloc);
|
||||
{
|
||||
std::vector<std::complex<double>> tmp(size);
|
||||
fillRandomMatrix(tmp);
|
||||
using std::copy_n;
|
||||
copy_n(tmp.data(), tmp.size(), buffer.origin());
|
||||
}
|
||||
for (auto m : num_rows)
|
||||
{
|
||||
timeMatrixInverse(out, alloc, buffer, m);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -74,8 +74,7 @@ void test_dense_matrix_mult()
|
|||
{
|
||||
vector<double> m = {
|
||||
9., 24., 30., 4., 10.,
|
||||
12., 14., 16., 36. //,
|
||||
// 9., 6., 1.
|
||||
12., 14., 16., 36.
|
||||
};
|
||||
array_ref<double, 2> M(m.data(), {3, 3});
|
||||
REQUIRE(M.num_elements() == m.size());
|
||||
|
@ -602,6 +601,7 @@ void test_dense_gerf_gqr_device(Allocator& alloc)
|
|||
verify_approx(Id, Id2);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
void test_dense_gerf_gqr_strided_device(Allocator& alloc)
|
||||
{
|
||||
|
@ -633,6 +633,38 @@ void test_dense_gerf_gqr_strided_device(Allocator& alloc)
|
|||
}
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
void test_dense_batched_gemm(Allocator& alloc)
|
||||
{
|
||||
using T = typename Allocator::value_type;
|
||||
using pointer = typename Allocator::pointer;
|
||||
{
|
||||
int nbatch = 3;
|
||||
array<T, 2, Allocator> a = {{0.0, 1.0, 2.0}, {3.0, 4.0, 5.0}, {6.0, 7.0, 8.0}};
|
||||
array<T, 2, Allocator> b = {{0.0, 1.0, 2.0}, {3.0, 4.0, 5.0}, {6.0, 7.0, 8.0}};
|
||||
array<T, 2, Allocator> res = {{15.0, 18.0, 21.0}, {42.0, 54.0, 66.0}, {69.0, 90.0, 111.0}};
|
||||
array<T, 3, Allocator> c({3, 3, 3}, 0.0, alloc);
|
||||
T alpha = 1.0;
|
||||
T beta = 0.0;
|
||||
|
||||
std::vector<pointer> A_array;
|
||||
std::vector<pointer> B_array;
|
||||
std::vector<pointer> C_array;
|
||||
for (int i = 0; i < nbatch; i++)
|
||||
{
|
||||
A_array.emplace_back(a.origin());
|
||||
B_array.emplace_back(b.origin());
|
||||
C_array.emplace_back(c[i].origin());
|
||||
}
|
||||
using ma::gemmBatched;
|
||||
gemmBatched('N', 'N', 3, 3, 3, alpha, A_array.data(), 3, B_array.data(), 3, beta, C_array.data(), 3, nbatch);
|
||||
for (int i = 0; i < nbatch; i++)
|
||||
{
|
||||
verify_approx(c[i], res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
void test_dense_geqrf_getri_batched_device(Allocator& alloc)
|
||||
{
|
||||
|
@ -719,6 +751,7 @@ TEST_CASE("dense_ma_operations_device_double", "[matrix_operations]")
|
|||
test_dense_mat_vec_device<Alloc>(alloc);
|
||||
test_dense_mat_mul_device<Alloc>(alloc);
|
||||
test_dense_gerf_gqr_device<Alloc>(alloc);
|
||||
test_dense_batched_gemm<Alloc>(alloc);
|
||||
}
|
||||
}
|
||||
TEST_CASE("dense_ma_operations_device_complex", "[matrix_operations]")
|
||||
|
@ -734,6 +767,8 @@ TEST_CASE("dense_ma_operations_device_complex", "[matrix_operations]")
|
|||
test_dense_gerf_gqr_device<Alloc>(alloc);
|
||||
test_dense_gerf_gqr_strided_device<Alloc>(alloc);
|
||||
test_dense_geqrf_getri_batched_device<Alloc>(alloc);
|
||||
test_dense_geqrf_getri_batched_device<Alloc>(alloc);
|
||||
test_dense_batched_gemm<Alloc>(alloc);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -183,7 +183,7 @@ protected:
|
|||
|
||||
Wavefunction& wfn;
|
||||
|
||||
// P1 = exp(-0.5*dt*H1), so H1 includes terms from MF substraction
|
||||
// P1 = exp(-0.5*dt*H1), so H1 includes terms from MF subtraction
|
||||
// and the exchange term from the cholesky decomposition (e.g. vn0)
|
||||
mpi3CMatrix H1;
|
||||
mpi3CTensor H1ext;
|
||||
|
|
|
@ -64,7 +64,7 @@ Propagator PropagatorFactory::buildAFQMCPropagator(TaskGroup_& TG,
|
|||
substractMF = false;
|
||||
|
||||
if (substractMF)
|
||||
app_log() << " Using mean-field substraction in propagator: " << name << "\n";
|
||||
app_log() << " Using mean-field subtraction in propagator: " << name << "\n";
|
||||
|
||||
// buld mean field expectation value of the Cholesky matrix
|
||||
CVector vMF(iextensions<1u>{wfn.local_number_of_cholesky_vectors()}, allocator{});
|
||||
|
@ -89,9 +89,9 @@ Propagator PropagatorFactory::buildAFQMCPropagator(TaskGroup_& TG,
|
|||
for (int i = 0; i < vMF_.size(); i++)
|
||||
v_ = std::max(v_, std::abs(vMF_[i]));
|
||||
TG.Global().all_reduce_n(&v_, 1, &vmax, boost::mpi3::max<>());
|
||||
app_log() << " Largest component of Mean-field substraction potential: " << vmax << std::endl;
|
||||
app_log() << " Largest component of Mean-field subtraction potential: " << vmax << std::endl;
|
||||
if (vmax > vbias_bound)
|
||||
app_log() << " WARNING: Mean-field substraction potential has components outside vbias_bound.\n"
|
||||
app_log() << " WARNING: Mean-field subtraction potential has components outside vbias_bound.\n"
|
||||
<< " Consider increasing vbias_bound. max(vMF[n]), vbias_bound: " << vmax << " " << vbias_bound
|
||||
<< std::endl;
|
||||
|
||||
|
|
|
@ -28,8 +28,6 @@
|
|||
#include "multi/memory/fallback.hpp"
|
||||
|
||||
#include "Utilities/TimerManager.h"
|
||||
#include "AFQMC/Utilities/myTimer.h"
|
||||
extern myTimer Timer;
|
||||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
|
|
|
@ -66,7 +66,7 @@ inline std::ostream& app_log() { return infoLog.getStream(); }
|
|||
|
||||
inline std::ostream& app_error() { return infoError.getStream() << "ERROR "; }
|
||||
|
||||
inline std::ostream& app_warning() { return infoError.getStream() << "WARNING "; }
|
||||
inline std::ostream& app_warning() { return infoLog.getStream() << "WARNING "; }
|
||||
|
||||
inline std::ostream& app_debug_stream() { return infoDebug.getStream(); }
|
||||
|
||||
|
|
|
@ -99,7 +99,7 @@ public:
|
|||
private:
|
||||
/** @name Walker Vectors
|
||||
*
|
||||
* A single index into these ordered lists constitue a complete
|
||||
* A single index into these ordered lists constitutes a complete
|
||||
* walker context.
|
||||
* @{
|
||||
*/
|
||||
|
|
|
@ -70,7 +70,7 @@ private:
|
|||
// std::shared_ptr<QMCHamiltonian> hamiltonian_;
|
||||
|
||||
// This is necessary MCPopulation is constructed in a simple call scope in QMCDriverFactory from the legacy MCWalkerConfiguration
|
||||
// MCPopulation should have QMCMain scope eventually and the driver will just have a refrence to it.
|
||||
// MCPopulation should have QMCMain scope eventually and the driver will just have a reference to it.
|
||||
TrialWaveFunction* trial_wf_;
|
||||
ParticleSet* elec_particle_set_;
|
||||
QMCHamiltonian* hamiltonian_;
|
||||
|
|
|
@ -83,7 +83,7 @@ protected:
|
|||
IndexType walkers_per_rank_ = 0;
|
||||
IndexType requested_samples_ = 0;
|
||||
IndexType sub_steps_ = 1;
|
||||
// max unecessary in this context
|
||||
// max unnecessary in this context
|
||||
IndexType max_blocks_ = 1;
|
||||
IndexType max_steps_ = 1;
|
||||
IndexType warmup_steps_ = 0;
|
||||
|
|
|
@ -356,7 +356,7 @@ protected:
|
|||
|
||||
/** Observables manager
|
||||
* Has very problematic owner ship and life cycle.
|
||||
* Can be transfered via branch manager one driver to the next indefinitely
|
||||
* Can be transferred via branch manager one driver to the next indefinitely
|
||||
* TODO: Modify Branch manager and others to clear this up.
|
||||
*/
|
||||
EstimatorManagerNew* estimator_manager_;
|
||||
|
|
|
@ -181,7 +181,7 @@ public:
|
|||
|
||||
/** unified: do the actual adjustment
|
||||
*
|
||||
* unfortunately right now this requires knowledge of the global context, seems unecessary
|
||||
* unfortunately right now this requires knowledge of the global context, seems unnecessary
|
||||
* but this is why MCPopulation is handed in.
|
||||
* This does was applyNmaxNmin used to.
|
||||
*/
|
||||
|
|
|
@ -71,7 +71,7 @@ app_log() << "Descent engine test of parameter update" << std::endl;
|
|||
app_log() << "First parameter: " << results[0] << std::endl;
|
||||
app_log() << "Second parameter: " << results[1] << std::endl;
|
||||
|
||||
//The engine should update the parameters using the generic defualt step size of .001 and obtain these values.
|
||||
//The engine should update the parameters using the generic default step size of .001 and obtain these values.
|
||||
REQUIRE(std::real(results[0]) == Approx(.995));
|
||||
REQUIRE(std::real(results[1]) == Approx(-2.001));
|
||||
|
||||
|
|
|
@ -173,7 +173,7 @@ private:
|
|||
/** mark all the electrons affected by Tmoves and update ElecNeighborIons and IonNeighborElecs
|
||||
* @param myTable electron ion distance table
|
||||
* @param iel reference electron
|
||||
* Note this funtion should be called before acceptMove for a Tmove
|
||||
* Note this function should be called before acceptMove for a Tmove
|
||||
*/
|
||||
void markAffectedElecs(const DistanceTableData& myTable, int iel);
|
||||
};
|
||||
|
|
|
@ -192,7 +192,7 @@ void GamesXmlParser::getGeometry(std::vector<xmlNodePtr>& aPtrList)
|
|||
while (tcur != NULL)
|
||||
{
|
||||
std::string tname((const char*)tcur->name);
|
||||
double x, y, z;
|
||||
double x(0), y(0), z(0);
|
||||
if (tname == "XCOORD")
|
||||
putContent(x, tcur);
|
||||
else if (tname == "YCOORD")
|
||||
|
|
|
@ -934,7 +934,7 @@ void EshdfFile::handleKpt(int kpt_num,
|
|||
}
|
||||
}
|
||||
|
||||
// now all the states are writen, so write out eigenvalues and number of states
|
||||
// now all the states are written, so write out eigenvalues and number of states
|
||||
vector<double> eigval = eigenvalues;
|
||||
if (spinpol == 0)
|
||||
{
|
||||
|
|
|
@ -61,7 +61,7 @@ int main(int argc, char** argv)
|
|||
bool show_usage = false;
|
||||
bool show_warn = false;
|
||||
/* For a successful execution of the code, atleast 3 arguments will need to be
|
||||
* provided along with the exectuable. Therefore, print usage information if
|
||||
* provided along with the executable. Therefore, print usage information if
|
||||
* argc is less than 4.
|
||||
*/
|
||||
if (argc < 4)
|
||||
|
|
|
@ -87,9 +87,6 @@ void AGPDeterminant::reportStatus(std::ostream& os)
|
|||
//do nothing
|
||||
}
|
||||
|
||||
|
||||
void AGPDeterminant::resetTargetParticleSet(ParticleSet& P) { GeminalBasis->resetTargetParticleSet(P); }
|
||||
|
||||
/** Calculate the log value of the Dirac determinant for particles
|
||||
*@param P input configuration containing N particles
|
||||
*@param G a vector containing N gradients
|
||||
|
@ -418,7 +415,6 @@ WaveFunctionComponentPtr AGPDeterminant::makeClone(ParticleSet& tqp) const
|
|||
{
|
||||
AGPDeterminant* myclone = new AGPDeterminant(0);
|
||||
myclone->GeminalBasis = GeminalBasis->makeClone();
|
||||
myclone->GeminalBasis->resetTargetParticleSet(tqp);
|
||||
myclone->resize(Nup, Ndown);
|
||||
myclone->Lambda = Lambda;
|
||||
if (Nup != Ndown)
|
||||
|
|
|
@ -51,8 +51,6 @@ public:
|
|||
void resetParameters(const opt_variables_type& active);
|
||||
void reportStatus(std::ostream& os);
|
||||
|
||||
void resetTargetParticleSet(ParticleSet& P);
|
||||
|
||||
///reset the size: with the number of particles and number of orbtials
|
||||
void resize(int nup, int ndown);
|
||||
|
||||
|
|
|
@ -117,8 +117,6 @@ struct BasisSetBase : public OrbitalSetTraits<T>
|
|||
/**@}*/
|
||||
///resize the basis set
|
||||
virtual void setBasisSetSize(int nbs) = 0;
|
||||
///reset the target particle set
|
||||
virtual void resetTargetParticleSet(ParticleSet& P) = 0;
|
||||
|
||||
virtual void evaluateWithHessian(const ParticleSet& P, int iat) = 0;
|
||||
virtual void evaluateWithThirdDeriv(const ParticleSet& P, int iat) = 0;
|
||||
|
|
|
@ -113,8 +113,6 @@ public:
|
|||
|
||||
void resetParameters(const opt_variables_type& active) override {}
|
||||
|
||||
void resetTargetParticleSet(ParticleSet& e) override {}
|
||||
|
||||
void setOrbitalSetSize(int norbs) override { OrbitalSetSize = norbs; }
|
||||
|
||||
virtual void evaluate_notranspose(const ParticleSet& P,
|
||||
|
|
|
@ -94,13 +94,6 @@ void CompositeSPOSet::report()
|
|||
}
|
||||
|
||||
|
||||
void CompositeSPOSet::resetTargetParticleSet(ParticleSet& P)
|
||||
{
|
||||
for (int c = 0; c < components.size(); ++c)
|
||||
components[c]->resetTargetParticleSet(P);
|
||||
}
|
||||
|
||||
|
||||
SPOSet* CompositeSPOSet::makeClone() const
|
||||
{
|
||||
// base class and shallow copy
|
||||
|
|
|
@ -48,8 +48,6 @@ public:
|
|||
///size is determined by component sposets and nothing else
|
||||
inline void setOrbitalSetSize(int norbs) {}
|
||||
|
||||
void resetTargetParticleSet(ParticleSet& P);
|
||||
|
||||
SPOSet* makeClone() const;
|
||||
|
||||
/** add sposet clones from another Composite SPOSet
|
||||
|
|
|
@ -24,7 +24,6 @@ public:
|
|||
virtual void checkOutVariables(const opt_variables_type& active) override {}
|
||||
virtual void resetParameters(const opt_variables_type& active) override {}
|
||||
virtual void reportStatus(std::ostream& os) override {}
|
||||
virtual void resetTargetParticleSet(ParticleSet& P) override {}
|
||||
|
||||
PsiValueType FakeGradRatio;
|
||||
|
||||
|
|
|
@ -39,17 +39,6 @@ void DiffWaveFunctionComponent::evaluateDerivRatios(ParticleSet& VP,
|
|||
APP_ABORT("Implement DiffWaveFunctionComponent::evaluateDerivRatios for this orbital");
|
||||
}
|
||||
|
||||
void NumericalDiffOrbital::resetTargetParticleSet(ParticleSet& P)
|
||||
{
|
||||
int nptcls = P.getTotalNum();
|
||||
dg_p.resize(nptcls);
|
||||
dl_p.resize(nptcls);
|
||||
dg_m.resize(nptcls);
|
||||
dl_m.resize(nptcls);
|
||||
gradLogPsi.resize(nptcls);
|
||||
lapLogPsi.resize(nptcls);
|
||||
}
|
||||
|
||||
void NumericalDiffOrbital::checkOutVariables(const opt_variables_type& optvars)
|
||||
{
|
||||
//do nothing
|
||||
|
@ -117,20 +106,6 @@ void AnalyticDiffOrbital::resetParameters(const opt_variables_type& optvars)
|
|||
refOrbital[i]->resetParameters(optvars);
|
||||
}
|
||||
|
||||
void AnalyticDiffOrbital::resetTargetParticleSet(ParticleSet& P)
|
||||
{
|
||||
if (MyIndex < 0)
|
||||
return;
|
||||
for (int i = 0; i < refOrbital.size(); ++i)
|
||||
refOrbital[i]->resetTargetParticleSet(P);
|
||||
int nptcls = P.getTotalNum();
|
||||
if (gradLogPsi.size() != nptcls)
|
||||
{
|
||||
gradLogPsi.resize(nptcls);
|
||||
lapLogPsi.resize(nptcls);
|
||||
}
|
||||
}
|
||||
|
||||
void AnalyticDiffOrbital::checkOutVariables(const opt_variables_type& optvars)
|
||||
{
|
||||
MyIndex = -1;
|
||||
|
|
|
@ -69,9 +69,6 @@ struct DiffWaveFunctionComponent
|
|||
*/
|
||||
virtual void initialize() {}
|
||||
|
||||
///prepare internal data for a new particle sets
|
||||
virtual void resetTargetParticleSet(ParticleSet& P) = 0;
|
||||
|
||||
/** evaluate derivatives at \f$\{R\}\f$
|
||||
* @param P current configuration
|
||||
* @param optvars optimizable variables
|
||||
|
@ -130,7 +127,6 @@ struct NumericalDiffOrbital : public DiffWaveFunctionComponent
|
|||
{
|
||||
NumericalDiffOrbital(WaveFunctionComponent* orb = 0) : DiffWaveFunctionComponent(orb) {}
|
||||
|
||||
void resetTargetParticleSet(ParticleSet& P);
|
||||
void evaluateDerivatives(ParticleSet& P,
|
||||
const opt_variables_type& optvars,
|
||||
std::vector<RealType>& dlogpsi,
|
||||
|
@ -150,7 +146,6 @@ struct AnalyticDiffOrbital : public DiffWaveFunctionComponent
|
|||
{
|
||||
AnalyticDiffOrbital(WaveFunctionComponent* orb = 0) : DiffWaveFunctionComponent(orb) {}
|
||||
|
||||
void resetTargetParticleSet(ParticleSet& P);
|
||||
void evaluateDerivatives(ParticleSet& P,
|
||||
const opt_variables_type& optvars,
|
||||
std::vector<RealType>& dlogpsi,
|
||||
|
|
|
@ -47,8 +47,6 @@ inline void EinsplineSetExtended<StorageType>::computePhaseFactors(const TinyVec
|
|||
|
||||
EinsplineSet::UnitCellType EinsplineSet::GetLattice() { return SuperLattice; }
|
||||
|
||||
void EinsplineSet::resetTargetParticleSet(ParticleSet& e) {}
|
||||
|
||||
void EinsplineSet::resetSourceParticleSet(ParticleSet& ions) {}
|
||||
|
||||
void EinsplineSet::setOrbitalSetSize(int norbs) { OrbitalSetSize = norbs; }
|
||||
|
@ -139,10 +137,6 @@ template<typename StorageType>
|
|||
void EinsplineSetExtended<StorageType>::resetParameters(const opt_variables_type& active)
|
||||
{}
|
||||
|
||||
template<typename StorageType>
|
||||
void EinsplineSetExtended<StorageType>::resetTargetParticleSet(ParticleSet& e)
|
||||
{}
|
||||
|
||||
template<typename StorageType>
|
||||
void EinsplineSetExtended<StorageType>::setOrbitalSetSize(int norbs)
|
||||
{
|
||||
|
|
|
@ -78,7 +78,6 @@ public:
|
|||
public:
|
||||
UnitCellType GetLattice();
|
||||
virtual void resetParameters(const opt_variables_type& active) {}
|
||||
void resetTargetParticleSet(ParticleSet& e);
|
||||
void resetSourceParticleSet(ParticleSet& ions);
|
||||
void setOrbitalSetSize(int norbs);
|
||||
inline std::string Type() { return "EinsplineSet"; }
|
||||
|
@ -486,7 +485,6 @@ public:
|
|||
#endif
|
||||
|
||||
void resetParameters(const opt_variables_type& active);
|
||||
void resetTargetParticleSet(ParticleSet& e);
|
||||
void setOrbitalSetSize(int norbs);
|
||||
std::string Type();
|
||||
|
||||
|
|
|
@ -37,7 +37,6 @@ struct EGOSet : public SPOSet
|
|||
SPOSet* makeClone() const override { return new EGOSet(*this); }
|
||||
|
||||
void resetParameters(const opt_variables_type& optVariables) override {}
|
||||
inline void resetTargetParticleSet(ParticleSet& P) override {}
|
||||
void setOrbitalSetSize(int norbs) override {}
|
||||
|
||||
inline void evaluateValue(const ParticleSet& P, int iat, ValueVector_t& psi) override
|
||||
|
|
|
@ -146,7 +146,6 @@ WaveFunctionComponent* ElectronGasOrbitalBuilder::buildComponent(xmlNodePtr cur)
|
|||
sdet->setBF(BFTrans);
|
||||
if (BFTrans->isOptimizable())
|
||||
sdet->Optimizable = true;
|
||||
sdet->resetTargetParticleSet(targetPtcl);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue