mirror of https://github.com/phonopy/phonopy.git
MEASURE_R2N macro is made for measuring peformance of reciprocal_to_normal_squared_openmp by JS
This commit is contained in:
parent
22bb439159
commit
fcdcd6aaf4
|
@ -47,24 +47,24 @@ static const int index_exchange[6][3] = {{0, 1, 2},
|
|||
{2, 1, 0},
|
||||
{0, 2, 1},
|
||||
{1, 0, 2}};
|
||||
static void get_interaction_at_triplet
|
||||
(Darray *fc3_normal_squared,
|
||||
const int i,
|
||||
const Darray *frequencies,
|
||||
const Carray *eigenvectors,
|
||||
const Iarray *triplets,
|
||||
const int *grid_address,
|
||||
const int *mesh,
|
||||
const Darray *fc3,
|
||||
const Darray *shortest_vectors,
|
||||
const Iarray *multiplicity,
|
||||
const double *masses,
|
||||
const int *p2s_map,
|
||||
const int *s2p_map,
|
||||
const int *band_indices,
|
||||
const int symmetrize_fc3_q,
|
||||
const double cutoff_frequency,
|
||||
const int openmp_at_bands);
|
||||
static void get_interaction_at_triplet(Darray *fc3_normal_squared,
|
||||
const int i,
|
||||
const Darray *frequencies,
|
||||
const Carray *eigenvectors,
|
||||
const Iarray *triplets,
|
||||
const int *grid_address,
|
||||
const int *mesh,
|
||||
const Darray *fc3,
|
||||
const Darray *shortest_vectors,
|
||||
const Iarray *multiplicity,
|
||||
const double *masses,
|
||||
const int *p2s_map,
|
||||
const int *s2p_map,
|
||||
const int *band_indices,
|
||||
const int symmetrize_fc3_q,
|
||||
const double cutoff_frequency,
|
||||
const int num_triplets,
|
||||
const int openmp_at_bands);
|
||||
static void real_to_normal(double *fc3_normal_squared,
|
||||
const double *freqs0,
|
||||
const double *freqs1,
|
||||
|
@ -83,6 +83,8 @@ static void real_to_normal(double *fc3_normal_squared,
|
|||
const int num_band0,
|
||||
const int num_band,
|
||||
const double cutoff_frequency,
|
||||
const int triplet_index,
|
||||
const int num_triplets,
|
||||
const int openmp_at_bands);
|
||||
static void real_to_normal_sym_q(double *fc3_normal_squared,
|
||||
double *freqs[3],
|
||||
|
@ -98,6 +100,8 @@ static void real_to_normal_sym_q(double *fc3_normal_squared,
|
|||
const int num_band0,
|
||||
const int num_band,
|
||||
const double cutoff_frequency,
|
||||
const int triplet_index,
|
||||
const int num_triplets,
|
||||
const int openmp_at_bands);
|
||||
|
||||
/* fc3_normal_squared[num_triplets, num_band0, num_band, num_band] */
|
||||
|
@ -140,6 +144,7 @@ void get_interaction(Darray *fc3_normal_squared,
|
|||
band_indices,
|
||||
symmetrize_fc3_q,
|
||||
cutoff_frequency,
|
||||
triplets->dims[0],
|
||||
0);
|
||||
}
|
||||
} else {
|
||||
|
@ -160,29 +165,30 @@ void get_interaction(Darray *fc3_normal_squared,
|
|||
band_indices,
|
||||
symmetrize_fc3_q,
|
||||
cutoff_frequency,
|
||||
triplets->dims[0],
|
||||
1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void get_interaction_at_triplet
|
||||
(Darray *fc3_normal_squared,
|
||||
const int i,
|
||||
const Darray *frequencies,
|
||||
const Carray *eigenvectors,
|
||||
const Iarray *triplets,
|
||||
const int *grid_address,
|
||||
const int *mesh,
|
||||
const Darray *fc3,
|
||||
const Darray *shortest_vectors,
|
||||
const Iarray *multiplicity,
|
||||
const double *masses,
|
||||
const int *p2s_map,
|
||||
const int *s2p_map,
|
||||
const int *band_indices,
|
||||
const int symmetrize_fc3_q,
|
||||
const double cutoff_frequency,
|
||||
const int openmp_at_bands)
|
||||
static void get_interaction_at_triplet(Darray *fc3_normal_squared,
|
||||
const int i,
|
||||
const Darray *frequencies,
|
||||
const Carray *eigenvectors,
|
||||
const Iarray *triplets,
|
||||
const int *grid_address,
|
||||
const int *mesh,
|
||||
const Darray *fc3,
|
||||
const Darray *shortest_vectors,
|
||||
const Iarray *multiplicity,
|
||||
const double *masses,
|
||||
const int *p2s_map,
|
||||
const int *s2p_map,
|
||||
const int *band_indices,
|
||||
const int symmetrize_fc3_q,
|
||||
const double cutoff_frequency,
|
||||
const int num_triplets,
|
||||
const int openmp_at_bands)
|
||||
{
|
||||
int j, k, gp, num_band, num_band0;
|
||||
double *freqs[3];
|
||||
|
@ -217,6 +223,8 @@ static void get_interaction_at_triplet
|
|||
num_band0,
|
||||
num_band,
|
||||
cutoff_frequency,
|
||||
i,
|
||||
num_triplets,
|
||||
openmp_at_bands);
|
||||
} else {
|
||||
real_to_normal((fc3_normal_squared->data +
|
||||
|
@ -238,6 +246,8 @@ static void get_interaction_at_triplet
|
|||
num_band0,
|
||||
num_band,
|
||||
cutoff_frequency,
|
||||
i,
|
||||
num_triplets,
|
||||
openmp_at_bands);
|
||||
}
|
||||
}
|
||||
|
@ -260,6 +270,8 @@ static void real_to_normal(double *fc3_normal_squared,
|
|||
const int num_band0,
|
||||
const int num_band,
|
||||
const double cutoff_frequency,
|
||||
const int triplet_index,
|
||||
const int num_triplets,
|
||||
const int openmp_at_bands)
|
||||
{
|
||||
int num_patom;
|
||||
|
@ -280,6 +292,10 @@ static void real_to_normal(double *fc3_normal_squared,
|
|||
s2p_map);
|
||||
|
||||
if (openmp_at_bands) {
|
||||
#ifdef MEASURE_R2N
|
||||
printf("At triplet %d/%d (# of bands=%d):\n",
|
||||
triplet_index, num_triplets, num_band0);
|
||||
#endif
|
||||
reciprocal_to_normal_squared_openmp(fc3_normal_squared,
|
||||
fc3_reciprocal,
|
||||
freqs0,
|
||||
|
@ -326,6 +342,8 @@ static void real_to_normal_sym_q(double *fc3_normal_squared,
|
|||
const int num_band0,
|
||||
const int num_band,
|
||||
const double cutoff_frequency,
|
||||
const int triplet_index,
|
||||
const int num_triplets,
|
||||
const int openmp_at_bands)
|
||||
{
|
||||
int i, j, k, l;
|
||||
|
@ -364,6 +382,8 @@ static void real_to_normal_sym_q(double *fc3_normal_squared,
|
|||
num_band,
|
||||
num_band,
|
||||
cutoff_frequency,
|
||||
triplet_index,
|
||||
num_triplets,
|
||||
openmp_at_bands);
|
||||
for (j = 0; j < num_band0; j++) {
|
||||
for (k = 0; k < num_band; k++) {
|
||||
|
|
|
@ -38,6 +38,9 @@
|
|||
#include <phonoc_array.h>
|
||||
#include <phonon3_h/reciprocal_to_normal.h>
|
||||
|
||||
#ifdef MEASURE_R2N
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
static lapack_complex_double fc3_sum_in_reciprocal_to_normal
|
||||
(const int bi0,
|
||||
|
@ -128,11 +131,23 @@ void reciprocal_to_normal_squared_openmp
|
|||
{
|
||||
int i, j, k, jk, bi, num_atom;
|
||||
|
||||
#ifdef MEASURE_R2N
|
||||
double loopTotalCPUTime, loopTotalWallTime;
|
||||
time_t loopStartWallTime;
|
||||
clock_t loopStartCPUTime;
|
||||
#endif
|
||||
|
||||
num_atom = num_band / 3;
|
||||
|
||||
for (i = 0; i < num_band0; i++) {
|
||||
bi = band_indices[i];
|
||||
if (freqs0[bi] > cutoff_frequency) {
|
||||
|
||||
#ifdef MEASURE_R2N
|
||||
loopStartWallTime = time(NULL);
|
||||
loopStartCPUTime = clock();
|
||||
#endif
|
||||
|
||||
#pragma omp parallel for private(j, k)
|
||||
for (jk = 0; jk < num_band * num_band; jk++) {
|
||||
j = jk / num_band;
|
||||
|
@ -147,10 +162,32 @@ void reciprocal_to_normal_squared_openmp
|
|||
num_band,
|
||||
cutoff_frequency);
|
||||
}
|
||||
|
||||
#ifdef MEASURE_R2N
|
||||
loopTotalCPUTime = (double)(clock() - loopStartCPUTime) / CLOCKS_PER_SEC;
|
||||
loopTotalWallTime = difftime(time(NULL), loopStartWallTime);
|
||||
printf(" Band index %d/%d %1.3fs (%1.3fs CPU)\n",
|
||||
i + 1, num_band0, loopTotalWallTime, loopTotalCPUTime);
|
||||
/* #else */
|
||||
/* printf("*"); */
|
||||
/* if (i == (num_band0 - 1)) { */
|
||||
/* printf("\n"); */
|
||||
/* } */
|
||||
/* if ((i % 20) == 0 && i != 0) { */
|
||||
/* printf("\n"); */
|
||||
/* } */
|
||||
#endif
|
||||
|
||||
} else {
|
||||
for (j = 0; j < num_band * num_band; j++) {
|
||||
fc3_normal_squared[i * num_band * num_band + j] = 0;
|
||||
}
|
||||
|
||||
#ifdef MEASURE_R2N
|
||||
printf(" Band index %d/%d skipped due to frequency cutoff...\n",
|
||||
i + 1, num_band0);
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -173,30 +210,23 @@ static void set_fc3_sum
|
|||
const int num_band,
|
||||
const double cutoff_frequency)
|
||||
{
|
||||
int l;
|
||||
double fff, sum_real, sum_imag;
|
||||
lapack_complex_double fc3_sum;
|
||||
|
||||
if (freqs1[j] > cutoff_frequency) {
|
||||
if (freqs2[k] > cutoff_frequency) {
|
||||
fff = freqs0[bi] * freqs1[j] * freqs2[k];
|
||||
fc3_sum = fc3_sum_in_reciprocal_to_normal
|
||||
(bi, j, k,
|
||||
eigvecs0, eigvecs1, eigvecs2,
|
||||
fc3_reciprocal,
|
||||
masses,
|
||||
num_atom);
|
||||
sum_real = lapack_complex_double_real(fc3_sum);
|
||||
sum_imag = lapack_complex_double_imag(fc3_sum);
|
||||
fc3_normal_squared[i * num_band * num_band + j * num_band + k] =
|
||||
(sum_real * sum_real + sum_imag * sum_imag) / fff;
|
||||
} else {
|
||||
fc3_normal_squared[i * num_band * num_band + j * num_band + k] = 0;
|
||||
}
|
||||
if (freqs1[j] > cutoff_frequency && freqs2[k] > cutoff_frequency) {
|
||||
fff = freqs0[bi] * freqs1[j] * freqs2[k];
|
||||
fc3_sum = fc3_sum_in_reciprocal_to_normal
|
||||
(bi, j, k,
|
||||
eigvecs0, eigvecs1, eigvecs2,
|
||||
fc3_reciprocal,
|
||||
masses,
|
||||
num_atom);
|
||||
sum_real = lapack_complex_double_real(fc3_sum);
|
||||
sum_imag = lapack_complex_double_imag(fc3_sum);
|
||||
fc3_normal_squared[i * num_band * num_band + j * num_band + k] =
|
||||
(sum_real * sum_real + sum_imag * sum_imag) / fff;
|
||||
} else {
|
||||
for (l = 0; l < num_band; l++) {
|
||||
fc3_normal_squared[i * num_band * num_band + j * num_band + l] = 0;
|
||||
}
|
||||
fc3_normal_squared[i * num_band * num_band + j * num_band + k] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,12 +34,17 @@ include_dirs = (['c/harmonic_h',
|
|||
'c/spglib_h',
|
||||
'c/kspclib_h'] +
|
||||
include_dirs_numpy)
|
||||
define_macros = []
|
||||
|
||||
##
|
||||
## Uncomment and modify below if lapacke is prepared in a special location
|
||||
#
|
||||
include_dirs += ['../lapack-3.5.0/lapacke/include']
|
||||
extra_link_args = ['-lgomp', '../lapack-3.5.0/liblapacke.a']
|
||||
|
||||
## Uncomment below to measure reciprocal_to_normal_squared_openmp performance
|
||||
# define_macros = [('MEASURE_R2N', None)]
|
||||
|
||||
##
|
||||
## This is for the test of libflame
|
||||
##
|
||||
|
@ -49,12 +54,13 @@ extra_link_args = ['-lgomp', '../lapack-3.5.0/liblapacke.a']
|
|||
# extra_link_args.append('../libflame-bin/lib/libflame.a')
|
||||
# include_dirs_libflame = ['../libflame-bin/include']
|
||||
# include_dirs += include_dirs_libflame
|
||||
|
||||
|
||||
extension_phono3py = Extension(
|
||||
'anharmonic._phono3py',
|
||||
include_dirs=include_dirs,
|
||||
extra_compile_args=extra_compile_args,
|
||||
extra_link_args=extra_link_args,
|
||||
define_macros=define_macros,
|
||||
sources=sources)
|
||||
|
||||
packages_phono3py = ['anharmonic',
|
||||
|
|
|
@ -40,6 +40,9 @@ include_dirs = (['c/harmonic_h',
|
|||
# include_dirs += ['../lapack-3.5.0/lapacke/include']
|
||||
# extra_link_args = ['-lgomp', '../lapack-3.5.0/liblapacke.a']
|
||||
|
||||
## Uncomment below to measure reciprocal_to_normal_squared_openmp performance
|
||||
# define_macros = [('MEASURE_R2N', None)]
|
||||
|
||||
##
|
||||
## This is for the test of libflame
|
||||
##
|
||||
|
@ -55,6 +58,7 @@ extension_phono3py = Extension(
|
|||
include_dirs=include_dirs,
|
||||
extra_compile_args=extra_compile_args,
|
||||
extra_link_args=extra_link_args,
|
||||
define_macros=define_macros,
|
||||
sources=sources)
|
||||
|
||||
packages_phono3py = ['anharmonic',
|
||||
|
|
Loading…
Reference in New Issue