# make.sys. Edited by Filippo Spiga (filippo.spiga@quantum-espresso.org) # based on ORNL OCLF CRAY XK7 TITAN (March 22nd, 2014) # # First do # $ cd GPU # $ ./configure --enable-parallel --enable-openmp --enable-cuda --with-gpu-arch=35 --with-cuda-dir=${CRAY_CUDATOOLKIT_DIR} --withou-magma --with-phigemm --with-scalapack ARCH=crayxt # $ cd .. # # then replace the make.sys # # Modules: # # Currently Loaded Modulefiles: # 1) modules/3.2.6.7 11) xpmem/0.1-2.0402.45248.1.5.gem 21) pdsh/2.26-1.0402.45278.1.1.gem 31) moab/7.1.3 # 2) craype-network-gemini 12) job/1.5.5-0.1_2.0402.45272.1.5.gem 22) shared-root/1.0-1.0402.46893.3.17.gem 32) lustredu/1.3 # 3) xt-asyncpe/5.24 13) csa/3.0.0-1_2.0402.45268.1.90.gem 23) switch/1.0-1.0402.45840.2.63.gem 33) DefApps # 4) pgi/13.10.0 14) rca/1.0.0-2.0402.47290.7.1.gem 24) xe-sysroot/4.2.34 34) site-aprun/1.0 # 5) cray-libsci/12.1.3 15) audit/1.0.0-1.0402.45273.1.86.gem 25) atp/1.7.1 35) aprun-usage/1.0 # 6) udreg/2.3.2-1.0402.7546.1.5.gem 16) ccm/2.2.0-1.0402.46086.4.120 26) PrgEnv-pgi/4.2.34 36) altd/1.0 # 7) ugni/5.0-1.0402.7551.1.10.gem 17) configuration/1.0-1.0402.45284.1.2.gem 27) cray-mpich/6.2.0 37) cudatoolkit/5.5.20-1.0402.7700.8.1 # 8) pmi/5.0.1-1.0000.9799.94.9.gem 18) hosts/1.0-1.0402.45251.1.86.gem 28) craype-interlagos # 9) dmapp/4.0.1-1.0402.7784.4.1.gem 19) lbcd/2.1-1.0402.45245.1.2.gem 29) eswrap/1.0.15 # 10) gni-headers/2.1-1.0402.7541.1.5.gem 20) nodehealth/5.1-1.0402.45895.3.76.gem 30) torque/4.2.5-snap.201308291703 # # make.sys. Generated from make.sys.in by configure. # compilation rules .SUFFIXES : .SUFFIXES : .o .c .f .f90 .cu # most fortran compilers can directly preprocess c-like directives: use # $(MPIF90) $(F90FLAGS) -c $< # if explicit preprocessing by the C preprocessor is needed, use: # $(CPP) $(CPPFLAGS) $< -o $*.F90 # $(MPIF90) $(F90FLAGS) -c $*.F90 -o $*.o # remember the tabulator in the first column !!! .f90.o: $(MPIF90) $(F90FLAGS) -c $< # .f.o and .c.o: do not modify .f.o: $(F77) $(FFLAGS) -c $< .c.o: $(CC) $(CFLAGS) -c $< # CUDA files .cu.o: $(NVCC) $(NVCCFLAGS) -I../../include $(IFLAGS) $(DFLAGS) -c $< # topdir for linking espresso libs with plugins TOPDIR = /ccs/home/spigafi/espresso/GPU/../ # DFLAGS = precompilation options (possible arguments to -D and -U) # used by the C compiler and preprocessor # FDFLAGS = as DFLAGS, for the f90 compiler # See include/defs.h.README for a list of options and their meaning # With the exception of IBM xlf, FDFLAGS = $(DFLAGS) # For IBM xlf, FDFLAGS is the same as DFLAGS with separating commas # MANUAL_DFLAGS = additional precompilation option(s), if desired # You may use this instead of tweaking DFLAGS and FDFLAGS # BEWARE: will not work for IBM xlf! Manually edit FDFLAGS MANUAL_DFLAGS = DFLAGS = -D__PGI -D__IOTK_SAFEST -D__FFTW -D__MPI -D__PARA -D__SCALAPACK -D__CUDA -D__OPENMP -D__PHIGEMM $(MANUAL_DFLAGS) FDFLAGS = $(DFLAGS) $(MANUAL_DFLAGS) # IFLAGS = how to locate directories where files to be included are # In most cases, IFLAGS = -I../include IFLAGS = -I$(MPICH_DIR)/include -I../include -I/ccs/home/spigafi/espresso/GPU/..//phiGEMM/include -I/opt/nvidia/cudatoolkit/5.5.20-1.0402.7700.8.1/include # MOD_FLAGS = flag used by f90 compiler to locate modules # Each Makefile defines the list of needed modules in MODFLAGS MOD_FLAG = -I # Compilers: fortran-90, fortran-77, C # If a parallel compilation is desired, MPIF90 should be a fortran-90 # compiler that produces executables for parallel execution using MPI # (such as for instance mpif90, mpf90, mpxlf90,...); # otherwise, an ordinary fortran-90 compiler (f90, g95, xlf90, ifort,...) # If you have a parallel machine but no suitable candidate for MPIF90, # try to specify the directory containing "mpif.h" in IFLAGS # and to specify the location of MPI libraries in MPI_LIBS MPIF90 = pgf90 #F90 = pgf90 CC = pgcc F77 = pgf77 # C preprocessor and preprocessing flags - for explicit preprocessing, # if needed (see the compilation rules above) # preprocessing flags must include DFLAGS and IFLAGS CPP = cpp CPPFLAGS = -P -traditional $(DFLAGS) $(IFLAGS) # compiler flags: C, F90, F77 # C flags must include DFLAGS and IFLAGS # F90 flags must include MODFLAGS, IFLAGS, and FDFLAGS with appropriate syntax CFLAGS = -O3 -D__align__\(n\)=__attribute__\(\(aligned\(n\)\)\) -D__location__\(a\)=__annotate__\(a\) -DCUDARTAPI= -D__x86_64 $(DFLAGS) $(IFLAGS) F90FLAGS = -O3 -Mcache_align -r8 -Mpreprocess -mp=nonuma $(FDFLAGS) $(IFLAGS) $(MODFLAGS) FFLAGS = -O3 -r8 -mp=nonuma # compiler flags without optimization for fortran-77 # the latter is NEEDED to properly compile dlamch.f, used by lapack FFLAGS_NOOPT = -O0 # compiler flag needed by some compilers when the main is not fortran # Currently used for Yambo FFLAGS_NOMAIN = -Mnomain # Linker, linker-specific flags (if any) # Typically LD coincides with F90 or MPIF90, LD_LIBS is empty LD = pgf90 LDFLAGS = -v -mp=nonuma LD_LIBS = $(MPICH_DIR)/lib/libmpich_pgi.so /opt/cray/libsci/12.0.00/pgi/119/interlagos/lib/libsci_pgi_mp.a $(CRAY_CUDATOOLKIT_POST_LINK_OPTS) -lcublas -lcufft # External Libraries (if any) : blas, lapack, fft, MPI # If you have nothing better, use the local copy : # BLAS_LIBS = /your/path/to/espresso/BLAS/blas.a # BLAS_LIBS_SWITCH = internal BLAS_LIBS = /ccs/home/spigafi/espresso/GPU/..//phiGEMM/lib/libphigemm.a BLAS_LIBS_SWITCH = external # OpenBLAS is used to exploit multi-core CPU if a multi-threaded BLAS # is not used or installed in the system (i.e. MKL is missing) OPENBLAS_INTERNAL = 0 # If you have nothing better, use the local copy : # LAPACK_LIBS = /your/path/to/espresso/lapack-3.2/lapack.a # LAPACK_LIBS_SWITCH = internal # For IBM machines with essl (-D__ESSL): load essl BEFORE lapack ! # remember that LAPACK_LIBS precedes BLAS_LIBS in loading order # CBLAS is used in case the C interface for BLAS is missing (i.e. ACML) CBLAS_ENABLED = 0 LAPACK_LIBS = LAPACK_LIBS_SWITCH = external ELPA_LIBS_SWITCH = disabled SCALAPACK_LIBS = # nothing needed here if the the internal copy of FFTW is compiled # (needs -D__FFTW in DFLAGS) FFT_LIBS = # For parallel execution, the correct path to MPI libraries must # be specified in MPI_LIBS (except for IBM if you use mpxlf) MPI_LIBS = # IBM-specific: MASS libraries, if available and if -D__MASS is defined in FDFLAGS MASS_LIBS = # ar command and flags - for most architectures: AR = ar, ARFLAGS = ruv AR = ar ARFLAGS = ruv # ranlib command. If ranlib is not needed (it isn't in most cases) use # RANLIB = echo RANLIB = ranlib # all internal and external libraries - do not modify FLIB_TARGETS = all # CUDA section NVCC = /opt/nvidia/cudatoolkit/5.0.35.102/bin/nvcc NVCCFLAGS = -O3 -gencode arch=compute_35,code=sm_35 PHIGEMM_INTERNAL = 1 PHIGEMM_SYMBOLS = 1 MAGMA_INTERNAL = 0 LIBOBJS = ../flib/ptools.a ../flib/flib.a ../clib/clib.a ../iotk/src/libiotk.a LIBS = $(SCALAPACK_LIBS) $(LAPACK_LIBS) $(FFT_LIBS) $(BLAS_LIBS) $(MPI_LIBS) $(MASS_LIBS) $(LD_LIBS) # wget or curl - useful to download from network WGET = wget -O