MEASURE_R2N macro is made for measuring peformance of reciprocal_to_normal_squared_openmp by JS

This commit is contained in:
Atsushi Togo 2015-10-03 13:27:16 +09:00
parent 22bb439159
commit fcdcd6aaf4
4 changed files with 117 additions and 57 deletions

View File

@ -47,24 +47,24 @@ static const int index_exchange[6][3] = {{0, 1, 2},
{2, 1, 0}, {2, 1, 0},
{0, 2, 1}, {0, 2, 1},
{1, 0, 2}}; {1, 0, 2}};
static void get_interaction_at_triplet static void get_interaction_at_triplet(Darray *fc3_normal_squared,
(Darray *fc3_normal_squared, const int i,
const int i, const Darray *frequencies,
const Darray *frequencies, const Carray *eigenvectors,
const Carray *eigenvectors, const Iarray *triplets,
const Iarray *triplets, const int *grid_address,
const int *grid_address, const int *mesh,
const int *mesh, const Darray *fc3,
const Darray *fc3, const Darray *shortest_vectors,
const Darray *shortest_vectors, const Iarray *multiplicity,
const Iarray *multiplicity, const double *masses,
const double *masses, const int *p2s_map,
const int *p2s_map, const int *s2p_map,
const int *s2p_map, const int *band_indices,
const int *band_indices, const int symmetrize_fc3_q,
const int symmetrize_fc3_q, const double cutoff_frequency,
const double cutoff_frequency, const int num_triplets,
const int openmp_at_bands); const int openmp_at_bands);
static void real_to_normal(double *fc3_normal_squared, static void real_to_normal(double *fc3_normal_squared,
const double *freqs0, const double *freqs0,
const double *freqs1, const double *freqs1,
@ -83,6 +83,8 @@ static void real_to_normal(double *fc3_normal_squared,
const int num_band0, const int num_band0,
const int num_band, const int num_band,
const double cutoff_frequency, const double cutoff_frequency,
const int triplet_index,
const int num_triplets,
const int openmp_at_bands); const int openmp_at_bands);
static void real_to_normal_sym_q(double *fc3_normal_squared, static void real_to_normal_sym_q(double *fc3_normal_squared,
double *freqs[3], double *freqs[3],
@ -98,6 +100,8 @@ static void real_to_normal_sym_q(double *fc3_normal_squared,
const int num_band0, const int num_band0,
const int num_band, const int num_band,
const double cutoff_frequency, const double cutoff_frequency,
const int triplet_index,
const int num_triplets,
const int openmp_at_bands); const int openmp_at_bands);
/* fc3_normal_squared[num_triplets, num_band0, num_band, num_band] */ /* fc3_normal_squared[num_triplets, num_band0, num_band, num_band] */
@ -140,6 +144,7 @@ void get_interaction(Darray *fc3_normal_squared,
band_indices, band_indices,
symmetrize_fc3_q, symmetrize_fc3_q,
cutoff_frequency, cutoff_frequency,
triplets->dims[0],
0); 0);
} }
} else { } else {
@ -160,29 +165,30 @@ void get_interaction(Darray *fc3_normal_squared,
band_indices, band_indices,
symmetrize_fc3_q, symmetrize_fc3_q,
cutoff_frequency, cutoff_frequency,
triplets->dims[0],
1); 1);
} }
} }
} }
static void get_interaction_at_triplet static void get_interaction_at_triplet(Darray *fc3_normal_squared,
(Darray *fc3_normal_squared, const int i,
const int i, const Darray *frequencies,
const Darray *frequencies, const Carray *eigenvectors,
const Carray *eigenvectors, const Iarray *triplets,
const Iarray *triplets, const int *grid_address,
const int *grid_address, const int *mesh,
const int *mesh, const Darray *fc3,
const Darray *fc3, const Darray *shortest_vectors,
const Darray *shortest_vectors, const Iarray *multiplicity,
const Iarray *multiplicity, const double *masses,
const double *masses, const int *p2s_map,
const int *p2s_map, const int *s2p_map,
const int *s2p_map, const int *band_indices,
const int *band_indices, const int symmetrize_fc3_q,
const int symmetrize_fc3_q, const double cutoff_frequency,
const double cutoff_frequency, const int num_triplets,
const int openmp_at_bands) const int openmp_at_bands)
{ {
int j, k, gp, num_band, num_band0; int j, k, gp, num_band, num_band0;
double *freqs[3]; double *freqs[3];
@ -217,6 +223,8 @@ static void get_interaction_at_triplet
num_band0, num_band0,
num_band, num_band,
cutoff_frequency, cutoff_frequency,
i,
num_triplets,
openmp_at_bands); openmp_at_bands);
} else { } else {
real_to_normal((fc3_normal_squared->data + real_to_normal((fc3_normal_squared->data +
@ -238,6 +246,8 @@ static void get_interaction_at_triplet
num_band0, num_band0,
num_band, num_band,
cutoff_frequency, cutoff_frequency,
i,
num_triplets,
openmp_at_bands); openmp_at_bands);
} }
} }
@ -260,6 +270,8 @@ static void real_to_normal(double *fc3_normal_squared,
const int num_band0, const int num_band0,
const int num_band, const int num_band,
const double cutoff_frequency, const double cutoff_frequency,
const int triplet_index,
const int num_triplets,
const int openmp_at_bands) const int openmp_at_bands)
{ {
int num_patom; int num_patom;
@ -280,6 +292,10 @@ static void real_to_normal(double *fc3_normal_squared,
s2p_map); s2p_map);
if (openmp_at_bands) { if (openmp_at_bands) {
#ifdef MEASURE_R2N
printf("At triplet %d/%d (# of bands=%d):\n",
triplet_index, num_triplets, num_band0);
#endif
reciprocal_to_normal_squared_openmp(fc3_normal_squared, reciprocal_to_normal_squared_openmp(fc3_normal_squared,
fc3_reciprocal, fc3_reciprocal,
freqs0, freqs0,
@ -326,6 +342,8 @@ static void real_to_normal_sym_q(double *fc3_normal_squared,
const int num_band0, const int num_band0,
const int num_band, const int num_band,
const double cutoff_frequency, const double cutoff_frequency,
const int triplet_index,
const int num_triplets,
const int openmp_at_bands) const int openmp_at_bands)
{ {
int i, j, k, l; int i, j, k, l;
@ -364,6 +382,8 @@ static void real_to_normal_sym_q(double *fc3_normal_squared,
num_band, num_band,
num_band, num_band,
cutoff_frequency, cutoff_frequency,
triplet_index,
num_triplets,
openmp_at_bands); openmp_at_bands);
for (j = 0; j < num_band0; j++) { for (j = 0; j < num_band0; j++) {
for (k = 0; k < num_band; k++) { for (k = 0; k < num_band; k++) {

View File

@ -38,6 +38,9 @@
#include <phonoc_array.h> #include <phonoc_array.h>
#include <phonon3_h/reciprocal_to_normal.h> #include <phonon3_h/reciprocal_to_normal.h>
#ifdef MEASURE_R2N
#include <time.h>
#endif
static lapack_complex_double fc3_sum_in_reciprocal_to_normal static lapack_complex_double fc3_sum_in_reciprocal_to_normal
(const int bi0, (const int bi0,
@ -128,11 +131,23 @@ void reciprocal_to_normal_squared_openmp
{ {
int i, j, k, jk, bi, num_atom; int i, j, k, jk, bi, num_atom;
#ifdef MEASURE_R2N
double loopTotalCPUTime, loopTotalWallTime;
time_t loopStartWallTime;
clock_t loopStartCPUTime;
#endif
num_atom = num_band / 3; num_atom = num_band / 3;
for (i = 0; i < num_band0; i++) { for (i = 0; i < num_band0; i++) {
bi = band_indices[i]; bi = band_indices[i];
if (freqs0[bi] > cutoff_frequency) { if (freqs0[bi] > cutoff_frequency) {
#ifdef MEASURE_R2N
loopStartWallTime = time(NULL);
loopStartCPUTime = clock();
#endif
#pragma omp parallel for private(j, k) #pragma omp parallel for private(j, k)
for (jk = 0; jk < num_band * num_band; jk++) { for (jk = 0; jk < num_band * num_band; jk++) {
j = jk / num_band; j = jk / num_band;
@ -147,10 +162,32 @@ void reciprocal_to_normal_squared_openmp
num_band, num_band,
cutoff_frequency); cutoff_frequency);
} }
#ifdef MEASURE_R2N
loopTotalCPUTime = (double)(clock() - loopStartCPUTime) / CLOCKS_PER_SEC;
loopTotalWallTime = difftime(time(NULL), loopStartWallTime);
printf(" Band index %d/%d %1.3fs (%1.3fs CPU)\n",
i + 1, num_band0, loopTotalWallTime, loopTotalCPUTime);
/* #else */
/* printf("*"); */
/* if (i == (num_band0 - 1)) { */
/* printf("\n"); */
/* } */
/* if ((i % 20) == 0 && i != 0) { */
/* printf("\n"); */
/* } */
#endif
} else { } else {
for (j = 0; j < num_band * num_band; j++) { for (j = 0; j < num_band * num_band; j++) {
fc3_normal_squared[i * num_band * num_band + j] = 0; fc3_normal_squared[i * num_band * num_band + j] = 0;
} }
#ifdef MEASURE_R2N
printf(" Band index %d/%d skipped due to frequency cutoff...\n",
i + 1, num_band0);
#endif
} }
} }
} }
@ -173,30 +210,23 @@ static void set_fc3_sum
const int num_band, const int num_band,
const double cutoff_frequency) const double cutoff_frequency)
{ {
int l;
double fff, sum_real, sum_imag; double fff, sum_real, sum_imag;
lapack_complex_double fc3_sum; lapack_complex_double fc3_sum;
if (freqs1[j] > cutoff_frequency) { if (freqs1[j] > cutoff_frequency && freqs2[k] > cutoff_frequency) {
if (freqs2[k] > cutoff_frequency) { fff = freqs0[bi] * freqs1[j] * freqs2[k];
fff = freqs0[bi] * freqs1[j] * freqs2[k]; fc3_sum = fc3_sum_in_reciprocal_to_normal
fc3_sum = fc3_sum_in_reciprocal_to_normal (bi, j, k,
(bi, j, k, eigvecs0, eigvecs1, eigvecs2,
eigvecs0, eigvecs1, eigvecs2, fc3_reciprocal,
fc3_reciprocal, masses,
masses, num_atom);
num_atom); sum_real = lapack_complex_double_real(fc3_sum);
sum_real = lapack_complex_double_real(fc3_sum); sum_imag = lapack_complex_double_imag(fc3_sum);
sum_imag = lapack_complex_double_imag(fc3_sum); fc3_normal_squared[i * num_band * num_band + j * num_band + k] =
fc3_normal_squared[i * num_band * num_band + j * num_band + k] = (sum_real * sum_real + sum_imag * sum_imag) / fff;
(sum_real * sum_real + sum_imag * sum_imag) / fff;
} else {
fc3_normal_squared[i * num_band * num_band + j * num_band + k] = 0;
}
} else { } else {
for (l = 0; l < num_band; l++) { fc3_normal_squared[i * num_band * num_band + j * num_band + k] = 0;
fc3_normal_squared[i * num_band * num_band + j * num_band + l] = 0;
}
} }
} }

View File

@ -34,12 +34,17 @@ include_dirs = (['c/harmonic_h',
'c/spglib_h', 'c/spglib_h',
'c/kspclib_h'] + 'c/kspclib_h'] +
include_dirs_numpy) include_dirs_numpy)
define_macros = []
## ##
## Uncomment and modify below if lapacke is prepared in a special location ## Uncomment and modify below if lapacke is prepared in a special location
# #
include_dirs += ['../lapack-3.5.0/lapacke/include'] include_dirs += ['../lapack-3.5.0/lapacke/include']
extra_link_args = ['-lgomp', '../lapack-3.5.0/liblapacke.a'] extra_link_args = ['-lgomp', '../lapack-3.5.0/liblapacke.a']
## Uncomment below to measure reciprocal_to_normal_squared_openmp performance
# define_macros = [('MEASURE_R2N', None)]
## ##
## This is for the test of libflame ## This is for the test of libflame
## ##
@ -49,12 +54,13 @@ extra_link_args = ['-lgomp', '../lapack-3.5.0/liblapacke.a']
# extra_link_args.append('../libflame-bin/lib/libflame.a') # extra_link_args.append('../libflame-bin/lib/libflame.a')
# include_dirs_libflame = ['../libflame-bin/include'] # include_dirs_libflame = ['../libflame-bin/include']
# include_dirs += include_dirs_libflame # include_dirs += include_dirs_libflame
extension_phono3py = Extension( extension_phono3py = Extension(
'anharmonic._phono3py', 'anharmonic._phono3py',
include_dirs=include_dirs, include_dirs=include_dirs,
extra_compile_args=extra_compile_args, extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args, extra_link_args=extra_link_args,
define_macros=define_macros,
sources=sources) sources=sources)
packages_phono3py = ['anharmonic', packages_phono3py = ['anharmonic',

View File

@ -40,6 +40,9 @@ include_dirs = (['c/harmonic_h',
# include_dirs += ['../lapack-3.5.0/lapacke/include'] # include_dirs += ['../lapack-3.5.0/lapacke/include']
# extra_link_args = ['-lgomp', '../lapack-3.5.0/liblapacke.a'] # extra_link_args = ['-lgomp', '../lapack-3.5.0/liblapacke.a']
## Uncomment below to measure reciprocal_to_normal_squared_openmp performance
# define_macros = [('MEASURE_R2N', None)]
## ##
## This is for the test of libflame ## This is for the test of libflame
## ##
@ -55,6 +58,7 @@ extension_phono3py = Extension(
include_dirs=include_dirs, include_dirs=include_dirs,
extra_compile_args=extra_compile_args, extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args, extra_link_args=extra_link_args,
define_macros=define_macros,
sources=sources) sources=sources)
packages_phono3py = ['anharmonic', packages_phono3py = ['anharmonic',