mirror of https://github.com/QMCPACK/qmcpack.git
Build scripts and nexus machine config update for archer2 (#3999)
* Modified nexus/lib/physical_system.py to read poscar files with a different name, specifying "format" * machine archer2 * config for archer2 * corrected error in nu. cpus per node * config archer2 updates, right mpi libs * config for archer2 https://docs.archer2.ac.uk/user-guide/hardware/ * more efficient mpi moduli in Archer2 * build script for archer2, some modificationsWq * update config/build_archer2.sh * undo change to "physical_system.py" * changes to machines and test_machines for Archer2 * fix error test on archer2 machine Co-authored-by: Andrea Zen <andrea.zen@unina.it> Co-authored-by: Ye Luo <yeluo@anl.gov>
This commit is contained in:
parent
e6e5c80c9b
commit
c7fabc875b
|
@ -0,0 +1,63 @@
|
|||
#!/bin/bash
|
||||
|
||||
echo "ARCHER2: Information on hardware and software"
|
||||
echo "https://www.archer2.ac.uk/about/hardware.html"
|
||||
echo "and documentation:"
|
||||
echo "https://docs.archer2.ac.uk"
|
||||
echo
|
||||
|
||||
echo "Loading QMCPACK dependency modules for archer2"
|
||||
echo
|
||||
module restore
|
||||
module load PrgEnv-gnu
|
||||
module load cray-hdf5-parallel
|
||||
module load cray-fftw
|
||||
export FFTW_ROOT=$FFTW_DIR/..
|
||||
module load libxml2
|
||||
module load cmake
|
||||
module load boost
|
||||
module load cray-python
|
||||
echo
|
||||
echo "Loaded moduli:"
|
||||
module list
|
||||
|
||||
echo
|
||||
echo "In the running scipt (but not in compilation) also load the following two modules:"
|
||||
echo " module load craype-network-ucx"
|
||||
echo " module load cray-mpich-ucx"
|
||||
echo "which improves a lot the scaling efficiency. "
|
||||
echo
|
||||
echo
|
||||
|
||||
|
||||
declare -A builds=( ["cpu"]="-DBUILD_PPCONVERT=1" \
|
||||
["complex_cpu"]="-DQMC_COMPLEX=1" \
|
||||
)
|
||||
|
||||
mkdir bin
|
||||
|
||||
for build in "${!builds[@]}"
|
||||
do
|
||||
echo "building: $build with ${builds[$build]}"
|
||||
rm bin/qmcpack_${build}
|
||||
mkdir build_${build}
|
||||
cd build_${build}
|
||||
cmake -DCMAKE_C_COMPILER="cc" \
|
||||
-DCMAKE_CXX_COMPILER="CC" \
|
||||
-DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment \
|
||||
-D LibXml2_ROOT=$LIBXML2_ROOT \
|
||||
-DBUILD_LMYENGINE_INTERFACE=0 \
|
||||
${builds[$build]} \
|
||||
..
|
||||
make -j 20
|
||||
if [ $? -eq 0 ]; then
|
||||
build_dir=$(pwd)
|
||||
if [ -e ${build_dir}/bin/qmcpack_complex ]; then
|
||||
ln -sf ${build_dir}/bin/qmcpack_complex ${build_dir}/../bin/qmcpack_${build}
|
||||
else
|
||||
ln -sf ${build_dir}/bin/qmcpack ${build_dir}/../bin/qmcpack_${build}
|
||||
fi
|
||||
fi
|
||||
cd ..
|
||||
done
|
||||
|
|
@ -3160,10 +3160,6 @@ class Andes(Supercomputer):
|
|||
errfile_extension = '.error'
|
||||
|
||||
def post_process_job(self,job):
|
||||
job.run_options.add(
|
||||
N='-N {}'.format(job.nodes),
|
||||
n='-n {}'.format(job.processes),
|
||||
)
|
||||
if job.threads>1:
|
||||
job.run_options.add(
|
||||
c = '-c {}'.format(job.threads),
|
||||
|
@ -3179,6 +3175,10 @@ class Andes(Supercomputer):
|
|||
)
|
||||
#end if
|
||||
#end if
|
||||
job.run_options.add(
|
||||
N='-N {}'.format(job.nodes),
|
||||
n='-n {}'.format(job.processes),
|
||||
)
|
||||
#end def post_process_job
|
||||
|
||||
def write_job_header(self,job):
|
||||
|
@ -3225,6 +3225,97 @@ class Andes(Supercomputer):
|
|||
#end class Andes
|
||||
|
||||
|
||||
## Added 05/04/2022 by A Zen
|
||||
class Archer2(Supercomputer):
|
||||
# https://docs.archer2.ac.uk/user-guide/hardware/
|
||||
|
||||
name = 'archer2'
|
||||
requires_account = True
|
||||
batch_capable = True
|
||||
#executable_subfile = True
|
||||
prefixed_output = True
|
||||
outfile_extension = '.output'
|
||||
errfile_extension = '.error'
|
||||
|
||||
def post_process_job(self,job):
|
||||
job.run_options.add(
|
||||
distribution='--distribution=block:block',
|
||||
hint='--hint=nomultithread',
|
||||
N='-N {}'.format(job.nodes),
|
||||
n='-n {}'.format(job.processes),
|
||||
)
|
||||
if job.threads>1:
|
||||
job.run_options.add(
|
||||
c = '-c {}'.format(job.threads),
|
||||
)
|
||||
# if 'cpu_bind' not in job.run_options:
|
||||
# if job.processes_per_node==self.cores_per_node:
|
||||
# cpu_bind = '--cpu-bind=threads'
|
||||
# else:
|
||||
# cpu_bind = '--cpu-bind=cores'
|
||||
# #end if
|
||||
# job.run_options.add(
|
||||
# cpu_bind = cpu_bind
|
||||
# )
|
||||
#end if
|
||||
#end if
|
||||
#end def post_process_job
|
||||
|
||||
def write_job_header(self,job):
|
||||
if job.qos is None:
|
||||
job.qos='standard'
|
||||
#end if
|
||||
base_partition = None
|
||||
if job.qos == 'long':
|
||||
max_time = 48
|
||||
max_partition = 64
|
||||
elif 'short' in job.qos:
|
||||
max_time = 20.0/60.0
|
||||
max_partition = 32
|
||||
else:
|
||||
max_time = 24
|
||||
max_partition = 1024
|
||||
#end if
|
||||
job.total_hours = job.days*24 + job.hours + job.minutes/60.0 + job.seconds/3600.0
|
||||
if job.total_hours > max_time:
|
||||
self.warn('!!! ATTENTION !!!\n the maximum runtime on {0} should not be more than {1}\n you requested: {2}'.format(job.queue,max_time,job.total_hours))
|
||||
job.hours = max_time
|
||||
job.minutes =0
|
||||
job.seconds =0
|
||||
#end if
|
||||
if job.nodes > max_partition:
|
||||
self.warn('!!! ATTENTION !!!\n the maximum nodes on {0} should not be more than {1}\n you requested: {2}'.format(job.queue,max_partition,job.nodes))
|
||||
job.nodes = max_partition
|
||||
#end if
|
||||
|
||||
c='#!/bin/bash\n'
|
||||
c+='#SBATCH --job-name '+str(job.name)+'\n'
|
||||
c+='#SBATCH --account='+str(job.account)+'\n'
|
||||
c+='#SBATCH -N '+str(job.nodes)+'\n'
|
||||
c+='#SBATCH --ntasks-per-node={0}\n'.format(job.processes_per_node)
|
||||
c+='#SBATCH --cpus-per-task={0}\n'.format(job.threads)
|
||||
c+='#SBATCH -t {0}:{1}:{2}\n'.format(str(job.hours+24*job.days).zfill(2),str(job.minutes).zfill(2),str(job.seconds).zfill(2))
|
||||
c+='#SBATCH -o {0}\n'.format(job.outfile)
|
||||
c+='#SBATCH -e {0}\n'.format(job.errfile)
|
||||
c+='#SBATCH --partition=standard\n'
|
||||
c+='#SBATCH --qos={0}\n'.format(job.qos)
|
||||
if job.email is not None:
|
||||
c+='#SBATCH --mail-user {}\n'.format(job.email)
|
||||
c+='#SBATCH --mail-type ALL\n'
|
||||
#c+='#SBATCH --mail-type FAIL\n'
|
||||
#end if
|
||||
c+='\n'
|
||||
#c+='cd $SLURM_SUBMIT_DIR\n'
|
||||
#c+='\n'
|
||||
c+='echo JobID : $SLURM_JOBID\n'
|
||||
c+='echo Number of nodes requested: $SLURM_JOB_NUM_NODES\n'
|
||||
c+='echo List of nodes assigned to the job: $SLURM_NODELIST\n'
|
||||
c+='\n'
|
||||
return c
|
||||
#end def write_job_header
|
||||
#end class Archer2
|
||||
|
||||
|
||||
class Tomcat3(Supercomputer):
|
||||
name = 'tomcat3'
|
||||
requires_account = False
|
||||
|
@ -3297,6 +3388,7 @@ Rhea( 512, 2, 8, 128, 1000, 'srun', 'sbatch', 'squeue', 'sc
|
|||
Andes( 704, 2, 16, 256, 1000, 'srun', 'sbatch', 'squeue', 'scancel')
|
||||
Tomcat3( 8, 1, 64, 192, 1000, 'mpirun', 'sbatch', 'sacct', 'scancel')
|
||||
SuperMUC_NG( 6336, 1, 48, 96, 1000,'mpiexec', 'sbatch', 'sacct', 'scancel')
|
||||
Archer2( 5860, 2, 64, 512, 1000, 'srun', 'sbatch', 'squeue', 'scancel')
|
||||
|
||||
|
||||
#machine accessor functions
|
||||
|
|
|
@ -1033,6 +1033,12 @@ def test_job_run_command():
|
|||
('andes' , 'n2_t2' ) : 'srun -N 2 -c 2 --cpu-bind=cores -n 32 test.x',
|
||||
('andes' , 'n2_t2_e' ) : 'srun -N 2 -c 2 --cpu-bind=cores -n 32 test.x',
|
||||
('andes' , 'n2_t2_p2' ) : 'srun -N 2 -c 2 --cpu-bind=cores -n 4 test.x',
|
||||
('archer2' , 'n1' ) : 'srun --distribution=block:block --hint=nomultithread -N 1 -n 128 test.x',
|
||||
('archer2' , 'n1_p1' ) : 'srun --distribution=block:block --hint=nomultithread -N 1 -n 1 test.x',
|
||||
('archer2' , 'n2' ) : 'srun --distribution=block:block --hint=nomultithread -N 2 -n 256 test.x',
|
||||
('archer2' , 'n2_t2' ) : 'srun --distribution=block:block --hint=nomultithread -N 2 -c 2 -n 128 test.x',
|
||||
('archer2' , 'n2_t2_e' ) : 'srun --distribution=block:block --hint=nomultithread -N 2 -c 2 -n 128 test.x',
|
||||
('archer2' , 'n2_t2_p2' ) : 'srun --distribution=block:block --hint=nomultithread -N 2 -c 2 -n 4 test.x',
|
||||
('attaway' , 'n1' ) : 'srun test.x',
|
||||
('attaway' , 'n1_p1' ) : 'srun test.x',
|
||||
('attaway' , 'n2' ) : 'srun test.x',
|
||||
|
@ -1382,6 +1388,26 @@ echo List of nodes assigned to the job: $SLURM_NODELIST
|
|||
export ENV_VAR=1
|
||||
export OMP_NUM_THREADS=1
|
||||
srun -N 2 -n 64 test.x''',
|
||||
archer2 = '''#!/bin/bash
|
||||
#SBATCH --job-name jobname
|
||||
#SBATCH --account=ABC123
|
||||
#SBATCH -N 2
|
||||
#SBATCH --ntasks-per-node=128
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH -t 06:30:00
|
||||
#SBATCH -o test.out
|
||||
#SBATCH -e test.err
|
||||
#SBATCH --partition=standard
|
||||
#SBATCH --qos=standard
|
||||
|
||||
echo JobID : $SLURM_JOBID
|
||||
echo Number of nodes requested: $SLURM_JOB_NUM_NODES
|
||||
echo List of nodes assigned to the job: $SLURM_NODELIST
|
||||
|
||||
export ENV_VAR=1
|
||||
export OMP_NUM_THREADS=1
|
||||
|
||||
srun --distribution=block:block --hint=nomultithread -N 2 -n 256 test.x''',
|
||||
attaway = '''#!/bin/bash
|
||||
#SBATCH -p batch
|
||||
#SBATCH --job-name jobname
|
||||
|
@ -1893,6 +1919,7 @@ runjob --np 32 -p 16 $LOCARGS --verbose=INFO --envs OMP_NUM_THREADS=1 ENV_VAR=1
|
|||
def job_files_same(jf1,jf2):
|
||||
jf1 = process_job_file(jf1)
|
||||
jf2 = process_job_file(jf2)
|
||||
if not object_eq(jf1,jf2): print(f"compare --------------------\n * wj *\n{jf1}\n * ref_wj *\n{jf2}\n")
|
||||
return object_eq(jf1,jf2)
|
||||
#end def job_files_same
|
||||
|
||||
|
|
Loading…
Reference in New Issue