mirror of https://github.com/phonopy/phono3py.git
Memory layout of fc3_reciprocal was reorderd.
This commit is contained in:
parent
c3b7724b98
commit
7fe6007f90
|
@ -312,14 +312,11 @@ static void real_to_normal(double *fc3_normal_squared,
|
|||
const long num_triplets,
|
||||
const long openmp_at_bands)
|
||||
{
|
||||
long num_patom;
|
||||
lapack_complex_double *fc3_reciprocal;
|
||||
|
||||
num_patom = num_band / 3;
|
||||
|
||||
fc3_reciprocal =
|
||||
(lapack_complex_double *)malloc(sizeof(lapack_complex_double) *
|
||||
num_patom * num_patom * num_patom * 27);
|
||||
num_band * num_band * num_band);
|
||||
r2r_real_to_reciprocal(fc3_reciprocal,
|
||||
q_vecs,
|
||||
fc3,
|
||||
|
|
224
c/pp_collision.c
224
c/pp_collision.c
|
@ -114,16 +114,19 @@ void ppc_get_pp_collision(double *imag_self_energy,
|
|||
num_band = multi_dims[1] * 3;
|
||||
num_band_prod = num_band0 * num_band * num_band;
|
||||
num_temps = temperatures->dims[0];
|
||||
ise = (double*)malloc(sizeof(double) * num_triplets * num_temps * num_band0);
|
||||
freqs_at_gp = (double*)malloc(sizeof(double) * num_band0);
|
||||
for (i = 0; i < num_band0; i++) {
|
||||
freqs_at_gp[i] = frequencies[triplets[0][0] * num_band
|
||||
+ band_indices->data[i]];
|
||||
ise = (double *)malloc(sizeof(double) * num_triplets * num_temps * num_band0);
|
||||
freqs_at_gp = (double *)malloc(sizeof(double) * num_band0);
|
||||
for (i = 0; i < num_band0; i++)
|
||||
{
|
||||
freqs_at_gp[i] = frequencies[triplets[0][0] * num_band + band_indices->data[i]];
|
||||
}
|
||||
|
||||
if (num_triplets > num_band) {
|
||||
if (num_triplets > num_band)
|
||||
{
|
||||
openmp_per_triplets = 1;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
openmp_per_triplets = 0;
|
||||
}
|
||||
|
||||
|
@ -134,20 +137,21 @@ void ppc_get_pp_collision(double *imag_self_energy,
|
|||
#ifdef PHPYOPENMP
|
||||
#pragma omp parallel for schedule(guided) private(g, g_zero) if (openmp_per_triplets)
|
||||
#endif
|
||||
for (i = 0; i < num_triplets; i++) {
|
||||
g = (double*)malloc(sizeof(double) * 2 * num_band_prod);
|
||||
g_zero = (char*)malloc(sizeof(char) * num_band_prod);
|
||||
for (i = 0; i < num_triplets; i++)
|
||||
{
|
||||
g = (double *)malloc(sizeof(double) * 2 * num_band_prod);
|
||||
g_zero = (char *)malloc(sizeof(char) * num_band_prod);
|
||||
tpi_get_integration_weight(g,
|
||||
g_zero,
|
||||
freqs_at_gp, /* used as f0 */
|
||||
freqs_at_gp, /* used as f0 */
|
||||
num_band0,
|
||||
tp_relative_grid_address,
|
||||
triplets[i],
|
||||
1,
|
||||
bzgrid,
|
||||
frequencies, /* used as f1 */
|
||||
frequencies, /* used as f1 */
|
||||
num_band,
|
||||
frequencies, /* used as f2 */
|
||||
frequencies, /* used as f2 */
|
||||
num_band,
|
||||
2,
|
||||
1 - openmp_per_triplets);
|
||||
|
@ -199,28 +203,28 @@ void ppc_get_pp_collision(double *imag_self_energy,
|
|||
}
|
||||
|
||||
void ppc_get_pp_collision_with_sigma(
|
||||
double *imag_self_energy,
|
||||
const double sigma,
|
||||
const double sigma_cutoff,
|
||||
const double *frequencies,
|
||||
const lapack_complex_double *eigenvectors,
|
||||
const long (*triplets)[3],
|
||||
const long num_triplets,
|
||||
const long *triplet_weights,
|
||||
const ConstBZGrid *bzgrid,
|
||||
const double *fc3,
|
||||
const long is_compact_fc3,
|
||||
const double (*svecs)[3],
|
||||
const long multi_dims[2],
|
||||
const long (*multiplicity)[2],
|
||||
const double *masses,
|
||||
const long *p2s_map,
|
||||
const long *s2p_map,
|
||||
const Larray *band_indices,
|
||||
const Darray *temperatures,
|
||||
const long is_NU,
|
||||
const long symmetrize_fc3_q,
|
||||
const double cutoff_frequency)
|
||||
double *imag_self_energy,
|
||||
const double sigma,
|
||||
const double sigma_cutoff,
|
||||
const double *frequencies,
|
||||
const lapack_complex_double *eigenvectors,
|
||||
const long (*triplets)[3],
|
||||
const long num_triplets,
|
||||
const long *triplet_weights,
|
||||
const ConstBZGrid *bzgrid,
|
||||
const double *fc3,
|
||||
const long is_compact_fc3,
|
||||
const double (*svecs)[3],
|
||||
const long multi_dims[2],
|
||||
const long (*multiplicity)[2],
|
||||
const double *masses,
|
||||
const long *p2s_map,
|
||||
const long *s2p_map,
|
||||
const Larray *band_indices,
|
||||
const Darray *temperatures,
|
||||
const long is_NU,
|
||||
const long symmetrize_fc3_q,
|
||||
const double cutoff_frequency)
|
||||
{
|
||||
long i;
|
||||
long num_band, num_band0, num_band_prod, num_temps;
|
||||
|
@ -240,16 +244,20 @@ void ppc_get_pp_collision_with_sigma(
|
|||
num_temps = temperatures->dims[0];
|
||||
const_adrs_shift = num_band_prod;
|
||||
|
||||
ise = (double*)malloc(sizeof(double) * num_triplets * num_temps * num_band0);
|
||||
freqs_at_gp = (double*)malloc(sizeof(double) * num_band0);
|
||||
for (i = 0; i < num_band0; i++) {
|
||||
ise = (double *)malloc(sizeof(double) * num_triplets * num_temps * num_band0);
|
||||
freqs_at_gp = (double *)malloc(sizeof(double) * num_band0);
|
||||
for (i = 0; i < num_band0; i++)
|
||||
{
|
||||
freqs_at_gp[i] = frequencies[triplets[0][0] * num_band +
|
||||
band_indices->data[i]];
|
||||
}
|
||||
|
||||
if (num_triplets > num_band) {
|
||||
if (num_triplets > num_band)
|
||||
{
|
||||
openmp_per_triplets = 1;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
openmp_per_triplets = 0;
|
||||
}
|
||||
|
||||
|
@ -258,9 +266,10 @@ void ppc_get_pp_collision_with_sigma(
|
|||
#ifdef PHPYOPENMP
|
||||
#pragma omp parallel for schedule(guided) private(g, g_zero) if (openmp_per_triplets)
|
||||
#endif
|
||||
for (i = 0; i < num_triplets; i++) {
|
||||
g = (double*)malloc(sizeof(double) * 2 * num_band_prod);
|
||||
g_zero = (char*)malloc(sizeof(char) * num_band_prod);
|
||||
for (i = 0; i < num_triplets; i++)
|
||||
{
|
||||
g = (double *)malloc(sizeof(double) * 2 * num_band_prod);
|
||||
g_zero = (char *)malloc(sizeof(char) * num_band_prod);
|
||||
tpi_get_integration_weight_with_sigma(g,
|
||||
g_zero,
|
||||
sigma,
|
||||
|
@ -348,16 +357,17 @@ static void get_collision(double *ise,
|
|||
long i;
|
||||
long num_band_prod, num_g_pos;
|
||||
double *fc3_normal_squared;
|
||||
long (*g_pos)[4];
|
||||
long(*g_pos)[4];
|
||||
|
||||
fc3_normal_squared = NULL;
|
||||
g_pos = NULL;
|
||||
|
||||
num_band_prod = num_band0 * num_band * num_band;
|
||||
fc3_normal_squared = (double*)malloc(sizeof(double) * num_band_prod);
|
||||
fc3_normal_squared = (double *)malloc(sizeof(double) * num_band_prod);
|
||||
g_pos = (long(*)[4])malloc(sizeof(long[4]) * num_band_prod);
|
||||
|
||||
for (i = 0; i < num_band_prod; i++) {
|
||||
for (i = 0; i < num_band_prod; i++)
|
||||
{
|
||||
fc3_normal_squared[i] = 0;
|
||||
}
|
||||
|
||||
|
@ -367,47 +377,47 @@ static void get_collision(double *ise,
|
|||
g_zero);
|
||||
|
||||
itr_get_interaction_at_triplet(
|
||||
fc3_normal_squared,
|
||||
num_band0,
|
||||
num_band,
|
||||
g_pos,
|
||||
num_g_pos,
|
||||
frequencies,
|
||||
eigenvectors,
|
||||
triplet,
|
||||
bzgrid,
|
||||
fc3,
|
||||
is_compact_fc3,
|
||||
svecs,
|
||||
multi_dims,
|
||||
multiplicity,
|
||||
masses,
|
||||
p2s_map,
|
||||
s2p_map,
|
||||
band_indices,
|
||||
symmetrize_fc3_q,
|
||||
cutoff_frequency,
|
||||
0,
|
||||
0,
|
||||
1 - openmp_per_triplets);
|
||||
fc3_normal_squared,
|
||||
num_band0,
|
||||
num_band,
|
||||
g_pos,
|
||||
num_g_pos,
|
||||
frequencies,
|
||||
eigenvectors,
|
||||
triplet,
|
||||
bzgrid,
|
||||
fc3,
|
||||
is_compact_fc3,
|
||||
svecs,
|
||||
multi_dims,
|
||||
multiplicity,
|
||||
masses,
|
||||
p2s_map,
|
||||
s2p_map,
|
||||
band_indices,
|
||||
symmetrize_fc3_q,
|
||||
cutoff_frequency,
|
||||
0,
|
||||
0,
|
||||
1 - openmp_per_triplets);
|
||||
|
||||
ise_imag_self_energy_at_triplet(
|
||||
ise,
|
||||
num_band0,
|
||||
num_band,
|
||||
fc3_normal_squared,
|
||||
frequencies,
|
||||
triplet,
|
||||
triplet_weight,
|
||||
g,
|
||||
g + num_band_prod,
|
||||
g_pos,
|
||||
num_g_pos,
|
||||
temperatures,
|
||||
num_temps,
|
||||
cutoff_frequency,
|
||||
1 - openmp_per_triplets,
|
||||
0);
|
||||
ise,
|
||||
num_band0,
|
||||
num_band,
|
||||
fc3_normal_squared,
|
||||
frequencies,
|
||||
triplet,
|
||||
triplet_weight,
|
||||
g,
|
||||
g + num_band_prod,
|
||||
g_pos,
|
||||
num_g_pos,
|
||||
temperatures,
|
||||
num_temps,
|
||||
cutoff_frequency,
|
||||
1 - openmp_per_triplets,
|
||||
0);
|
||||
|
||||
free(fc3_normal_squared);
|
||||
fc3_normal_squared = NULL;
|
||||
|
@ -427,33 +437,47 @@ static void finalize_ise(double *imag_self_energy,
|
|||
long i, j, k;
|
||||
long is_N;
|
||||
|
||||
if (is_NU) {
|
||||
for (i = 0; i < 2 * num_temps * num_band0; i++) {
|
||||
if (is_NU)
|
||||
{
|
||||
for (i = 0; i < 2 * num_temps * num_band0; i++)
|
||||
{
|
||||
imag_self_energy[i] = 0;
|
||||
}
|
||||
for (i = 0; i < num_triplets; i++) {
|
||||
for (i = 0; i < num_triplets; i++)
|
||||
{
|
||||
is_N = tpl_is_N(triplets[i], bz_grid_addresses);
|
||||
for (j = 0; j < num_temps; j++) {
|
||||
for (k = 0; k < num_band0; k++) {
|
||||
if (is_N) {
|
||||
for (j = 0; j < num_temps; j++)
|
||||
{
|
||||
for (k = 0; k < num_band0; k++)
|
||||
{
|
||||
if (is_N)
|
||||
{
|
||||
imag_self_energy[j * num_band0 + k] +=
|
||||
ise[i * num_temps * num_band0 + j * num_band0 + k];
|
||||
} else {
|
||||
ise[i * num_temps * num_band0 + j * num_band0 + k];
|
||||
}
|
||||
else
|
||||
{
|
||||
imag_self_energy[num_temps * num_band0 + j * num_band0 + k] +=
|
||||
ise[i * num_temps * num_band0 + j * num_band0 + k];
|
||||
ise[i * num_temps * num_band0 + j * num_band0 + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < num_temps * num_band0; i++) {
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < num_temps * num_band0; i++)
|
||||
{
|
||||
imag_self_energy[i] = 0;
|
||||
}
|
||||
for (i = 0; i < num_triplets; i++) {
|
||||
for (j = 0; j < num_temps; j++) {
|
||||
for (k = 0; k < num_band0; k++) {
|
||||
for (i = 0; i < num_triplets; i++)
|
||||
{
|
||||
for (j = 0; j < num_temps; j++)
|
||||
{
|
||||
for (k = 0; k < num_band0; k++)
|
||||
{
|
||||
imag_self_energy[j * num_band0 + k] +=
|
||||
ise[i * num_temps * num_band0 + j * num_band0 + k];
|
||||
ise[i * num_temps * num_band0 + j * num_band0 + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,7 +97,8 @@ void r2r_real_to_reciprocal(lapack_complex_double *fc3_reciprocal,
|
|||
const long *s2p_map,
|
||||
const long openmp_at_bands)
|
||||
{
|
||||
if (openmp_at_bands) {
|
||||
if (openmp_at_bands)
|
||||
{
|
||||
real_to_reciprocal_openmp(fc3_reciprocal,
|
||||
q_vecs,
|
||||
fc3,
|
||||
|
@ -107,7 +108,9 @@ void r2r_real_to_reciprocal(lapack_complex_double *fc3_reciprocal,
|
|||
multiplicity,
|
||||
p2s_map,
|
||||
s2p_map);
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
real_to_reciprocal_single_thread(fc3_reciprocal,
|
||||
q_vecs,
|
||||
fc3,
|
||||
|
@ -120,7 +123,6 @@ void r2r_real_to_reciprocal(lapack_complex_double *fc3_reciprocal,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
real_to_reciprocal_single_thread(lapack_complex_double *fc3_reciprocal,
|
||||
const double q_vecs[3][3],
|
||||
|
@ -132,19 +134,22 @@ real_to_reciprocal_single_thread(lapack_complex_double *fc3_reciprocal,
|
|||
const long *p2s_map,
|
||||
const long *s2p_map)
|
||||
{
|
||||
long i, j, k;
|
||||
long num_patom, adrs_shift;
|
||||
lapack_complex_double pre_phase_factor;
|
||||
long i, j, k, l, m, n;
|
||||
long num_patom, num_band;
|
||||
lapack_complex_double pre_phase_factor, fc3_rec_elem[27];
|
||||
|
||||
num_patom = multi_dims[1];
|
||||
num_band = num_patom * 3;
|
||||
|
||||
for (i = 0; i < num_patom; i++) {
|
||||
for (j = 0; j < num_patom; j++) {
|
||||
for (k = 0; k < num_patom; k++) {
|
||||
real_to_reciprocal_elements(fc3_reciprocal +
|
||||
i * 27 * num_patom * num_patom +
|
||||
j * 27 * num_patom +
|
||||
k * 27,
|
||||
for (i = 0; i < num_patom; i++)
|
||||
{
|
||||
pre_phase_factor = get_pre_phase_factor(
|
||||
i, q_vecs, svecs, multi_dims, multiplicity, p2s_map);
|
||||
for (j = 0; j < num_patom; j++)
|
||||
{
|
||||
for (k = 0; k < num_patom; k++)
|
||||
{
|
||||
real_to_reciprocal_elements(fc3_rec_elem,
|
||||
q_vecs,
|
||||
fc3,
|
||||
is_compact_fc3,
|
||||
|
@ -154,16 +159,19 @@ real_to_reciprocal_single_thread(lapack_complex_double *fc3_reciprocal,
|
|||
p2s_map,
|
||||
s2p_map,
|
||||
i, j, k);
|
||||
|
||||
for (l = 0; l < 3; l++)
|
||||
{
|
||||
for (m = 0; m < 3; m++)
|
||||
{
|
||||
for (n = 0; n < 3; n++)
|
||||
{
|
||||
fc3_reciprocal[(i * 3 + l) * num_band * num_band + (j * 3 + m) * num_band + k * 3 + n] =
|
||||
phonoc_complex_prod(fc3_rec_elem[l * 9 + m * 3 + n], pre_phase_factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pre_phase_factor = get_pre_phase_factor(
|
||||
i, q_vecs, svecs, multi_dims, multiplicity, p2s_map);
|
||||
adrs_shift = i * num_patom * num_patom * 27;
|
||||
for (j = 0; j < num_patom * num_patom * 27; j++) {
|
||||
fc3_reciprocal[adrs_shift + j] =
|
||||
phonoc_complex_prod(fc3_reciprocal[adrs_shift + j], pre_phase_factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -178,23 +186,25 @@ real_to_reciprocal_openmp(lapack_complex_double *fc3_reciprocal,
|
|||
const long *p2s_map,
|
||||
const long *s2p_map)
|
||||
{
|
||||
long i, j, k, jk;
|
||||
long num_patom, adrs_shift;
|
||||
lapack_complex_double pre_phase_factor;
|
||||
long i, j, k, l, m, n, jk;
|
||||
long num_patom, num_band;
|
||||
lapack_complex_double pre_phase_factor, fc3_rec_elem[27];
|
||||
|
||||
num_patom = multi_dims[1];
|
||||
num_band = num_patom * 3;
|
||||
|
||||
for (i = 0; i < num_patom; i++) {
|
||||
for (i = 0; i < num_patom; i++)
|
||||
{
|
||||
pre_phase_factor = get_pre_phase_factor(
|
||||
i, q_vecs, svecs, multi_dims, multiplicity, p2s_map);
|
||||
#ifdef PHPYOPENMP
|
||||
#pragma omp parallel for private(j, k)
|
||||
#pragma omp parallel for private(j, k, l, m, n, fc3_rec_elem)
|
||||
#endif
|
||||
for (jk = 0; jk < num_patom * num_patom; jk++) {
|
||||
for (jk = 0; jk < num_patom * num_patom; jk++)
|
||||
{
|
||||
j = jk / num_patom;
|
||||
k = jk % num_patom;
|
||||
real_to_reciprocal_elements(fc3_reciprocal +
|
||||
i * 27 * num_patom * num_patom +
|
||||
j * 27 * num_patom +
|
||||
k * 27,
|
||||
real_to_reciprocal_elements(fc3_rec_elem,
|
||||
q_vecs,
|
||||
fc3,
|
||||
is_compact_fc3,
|
||||
|
@ -204,17 +214,17 @@ real_to_reciprocal_openmp(lapack_complex_double *fc3_reciprocal,
|
|||
p2s_map,
|
||||
s2p_map,
|
||||
i, j, k);
|
||||
|
||||
}
|
||||
pre_phase_factor = get_pre_phase_factor(
|
||||
i, q_vecs, svecs, multi_dims, multiplicity, p2s_map);
|
||||
adrs_shift = i * num_patom * num_patom * 27;
|
||||
#ifdef PHPYOPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (j = 0; j < num_patom * num_patom * 27; j++) {
|
||||
fc3_reciprocal[adrs_shift + j] =
|
||||
phonoc_complex_prod(fc3_reciprocal[adrs_shift + j], pre_phase_factor);
|
||||
for (l = 0; l < 3; l++)
|
||||
{
|
||||
for (m = 0; m < 3; m++)
|
||||
{
|
||||
for (n = 0; n < 3; n++)
|
||||
{
|
||||
fc3_reciprocal[(i * 3 + l) * num_band * num_band + (j * 3 + m) * num_band + k * 3 + n] =
|
||||
phonoc_complex_prod(fc3_rec_elem[l * 9 + m * 3 + n], pre_phase_factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -233,25 +243,31 @@ static void real_to_reciprocal_elements(lapack_complex_double *fc3_rec_elem,
|
|||
const long pi2)
|
||||
{
|
||||
long i, j, k, l;
|
||||
long num_satom, adrs_shift, adrs_vec1, adrs_vec2 ;
|
||||
long num_satom, adrs_shift, adrs_vec1, adrs_vec2;
|
||||
lapack_complex_double phase_factor, phase_factor1, phase_factor2;
|
||||
double fc3_rec_real[27], fc3_rec_imag[27];
|
||||
|
||||
for (i = 0; i < 27; i++) {
|
||||
for (i = 0; i < 27; i++)
|
||||
{
|
||||
fc3_rec_real[i] = 0;
|
||||
fc3_rec_imag[i] = 0;
|
||||
}
|
||||
|
||||
num_satom = multi_dims[0];
|
||||
|
||||
if (is_compact_fc3) {
|
||||
if (is_compact_fc3)
|
||||
{
|
||||
i = pi0;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
i = p2s[pi0];
|
||||
}
|
||||
|
||||
for (j = 0; j < num_satom; j++) {
|
||||
if (s2p[j] != p2s[pi1]) {
|
||||
for (j = 0; j < num_satom; j++)
|
||||
{
|
||||
if (s2p[j] != p2s[pi1])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -260,8 +276,10 @@ static void real_to_reciprocal_elements(lapack_complex_double *fc3_rec_elem,
|
|||
1,
|
||||
svecs,
|
||||
multiplicity[adrs_vec1]);
|
||||
for (k = 0; k < num_satom; k++) {
|
||||
if (s2p[k] != p2s[pi2]) {
|
||||
for (k = 0; k < num_satom; k++)
|
||||
{
|
||||
if (s2p[k] != p2s[pi2])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
adrs_vec2 = k * multi_dims[1] + pi0;
|
||||
|
@ -271,18 +289,20 @@ static void real_to_reciprocal_elements(lapack_complex_double *fc3_rec_elem,
|
|||
multiplicity[adrs_vec2]);
|
||||
adrs_shift = i * 27 * num_satom * num_satom + j * 27 * num_satom + k * 27;
|
||||
phase_factor = phonoc_complex_prod(phase_factor1, phase_factor2);
|
||||
for (l = 0; l < 27; l++) {
|
||||
for (l = 0; l < 27; l++)
|
||||
{
|
||||
fc3_rec_real[l] +=
|
||||
lapack_complex_double_real(phase_factor) * fc3[adrs_shift + l];
|
||||
lapack_complex_double_real(phase_factor) * fc3[adrs_shift + l];
|
||||
fc3_rec_imag[l] +=
|
||||
lapack_complex_double_imag(phase_factor) * fc3[adrs_shift + l];
|
||||
lapack_complex_double_imag(phase_factor) * fc3[adrs_shift + l];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 27; i++) {
|
||||
for (i = 0; i < 27; i++)
|
||||
{
|
||||
fc3_rec_elem[i] =
|
||||
lapack_make_complex_double(fc3_rec_real[i], fc3_rec_imag[i]);
|
||||
lapack_make_complex_double(fc3_rec_real[i], fc3_rec_imag[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -301,11 +321,13 @@ get_pre_phase_factor(const long i_patom,
|
|||
svecs_adrs = p2s_map[i_patom] * multi_dims[1];
|
||||
sum_real = 0;
|
||||
sum_imag = 0;
|
||||
for (i = 0; i < multiplicity[svecs_adrs][0]; i++) {
|
||||
for (i = 0; i < multiplicity[svecs_adrs][0]; i++)
|
||||
{
|
||||
pre_phase = 0;
|
||||
for (j = 0; j < 3; j++) {
|
||||
for (j = 0; j < 3; j++)
|
||||
{
|
||||
pre_phase += svecs[multiplicity[svecs_adrs][1] + i][j] *
|
||||
(q_vecs[0][j] + q_vecs[1][j] + q_vecs[2][j]);
|
||||
(q_vecs[0][j] + q_vecs[1][j] + q_vecs[2][j]);
|
||||
}
|
||||
pre_phase *= M_2PI;
|
||||
sum_real += cos(pre_phase);
|
||||
|
@ -328,9 +350,11 @@ static lapack_complex_double get_phase_factor(const double q[3][3],
|
|||
|
||||
sum_real = 0;
|
||||
sum_imag = 0;
|
||||
for (i = 0; i < multi[0]; i++) {
|
||||
for (i = 0; i < multi[0]; i++)
|
||||
{
|
||||
phase = 0;
|
||||
for (j = 0; j < 3; j++) {
|
||||
for (j = 0; j < 3; j++)
|
||||
{
|
||||
phase += q[qi][j] * svecs[multi[1] + i][j];
|
||||
}
|
||||
phase *= M_2PI;
|
||||
|
|
|
@ -42,25 +42,11 @@
|
|||
#include <time.h>
|
||||
#endif
|
||||
|
||||
static double get_fc3_sum(const long bi0,
|
||||
const long bi1,
|
||||
const long bi2,
|
||||
const double *freqs0,
|
||||
const double *freqs1,
|
||||
const double *freqs2,
|
||||
const lapack_complex_double *e0,
|
||||
static double get_fc3_sum(const lapack_complex_double *e0,
|
||||
const lapack_complex_double *e1,
|
||||
const lapack_complex_double *e2,
|
||||
const lapack_complex_double *fc3_reciprocal,
|
||||
const long num_atom,
|
||||
const long num_band,
|
||||
const double cutoff_frequency);
|
||||
|
||||
static double fc3_sum_in_reciprocal_to_normal(const lapack_complex_double *e0,
|
||||
const lapack_complex_double *e1,
|
||||
const lapack_complex_double *e2,
|
||||
const lapack_complex_double *fc3_reciprocal,
|
||||
const long num_atom);
|
||||
const long num_band);
|
||||
|
||||
void reciprocal_to_normal_squared(double *fc3_normal_squared,
|
||||
const long (*g_pos)[4],
|
||||
|
@ -83,21 +69,13 @@ void reciprocal_to_normal_squared(double *fc3_normal_squared,
|
|||
double *inv_sqrt_masses;
|
||||
lapack_complex_double *e0, *e1, *e2;
|
||||
|
||||
num_atom = num_band / 3;
|
||||
|
||||
inv_sqrt_masses = (double *)malloc(sizeof(double) * num_atom);
|
||||
for (i = 0; i < num_atom; i++)
|
||||
{
|
||||
inv_sqrt_masses[i] = 1.0 / sqrt(masses[i]);
|
||||
}
|
||||
|
||||
/* Transpose eigenvectors for the better data alignment in memory. */
|
||||
/* Memory space for three eigenvector matrices is allocated at once */
|
||||
/* to make it contiguous. */
|
||||
e0 = (lapack_complex_double *)
|
||||
malloc(sizeof(lapack_complex_double) * num_band * num_band);
|
||||
e1 = (lapack_complex_double *)
|
||||
malloc(sizeof(lapack_complex_double) * num_band * num_band);
|
||||
e2 = (lapack_complex_double *)
|
||||
malloc(sizeof(lapack_complex_double) * num_band * num_band);
|
||||
malloc(sizeof(lapack_complex_double) * 3 * num_band * num_band);
|
||||
e1 = e0 + num_band * num_band;
|
||||
e2 = e1 + num_band * num_band;
|
||||
|
||||
for (i = 0; i < num_band; i++)
|
||||
{
|
||||
|
@ -109,7 +87,17 @@ void reciprocal_to_normal_squared(double *fc3_normal_squared,
|
|||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < num_band; i++)
|
||||
/* Inverse sqrt mass is multipled with eigenvectors to reduce number of */
|
||||
/* operations in get_fc3_sum. Three eigenvector matrices are looped by */
|
||||
/* first loop leveraging contiguous memory layout of [e0, e1, e2]. */
|
||||
num_atom = num_band / 3;
|
||||
inv_sqrt_masses = (double *)malloc(sizeof(double) * num_atom);
|
||||
for (i = 0; i < num_atom; i++)
|
||||
{
|
||||
inv_sqrt_masses[i] = 1.0 / sqrt(masses[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 3 * num_band; i++)
|
||||
{
|
||||
for (j = 0; j < num_atom; j++)
|
||||
{
|
||||
|
@ -118,12 +106,6 @@ void reciprocal_to_normal_squared(double *fc3_normal_squared,
|
|||
real = lapack_complex_double_real(e0[i * num_band + j * 3 + k]);
|
||||
imag = lapack_complex_double_imag(e0[i * num_band + j * 3 + k]);
|
||||
e0[i * num_band + j * 3 + k] = lapack_make_complex_double(real * inv_sqrt_masses[j], imag * inv_sqrt_masses[j]);
|
||||
real = lapack_complex_double_real(e1[i * num_band + j * 3 + k]);
|
||||
imag = lapack_complex_double_imag(e1[i * num_band + j * 3 + k]);
|
||||
e1[i * num_band + j * 3 + k] = lapack_make_complex_double(real * inv_sqrt_masses[j], imag * inv_sqrt_masses[j]);
|
||||
real = lapack_complex_double_real(e2[i * num_band + j * 3 + k]);
|
||||
imag = lapack_complex_double_imag(e2[i * num_band + j * 3 + k]);
|
||||
e2[i * num_band + j * 3 + k] = lapack_make_complex_double(real * inv_sqrt_masses[j], imag * inv_sqrt_masses[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -148,21 +130,21 @@ void reciprocal_to_normal_squared(double *fc3_normal_squared,
|
|||
#endif
|
||||
for (i = 0; i < num_g_pos; i++)
|
||||
{
|
||||
if (freqs0[band_indices[g_pos[i][0]]] > cutoff_frequency)
|
||||
if (freqs0[band_indices[g_pos[i][0]]] > cutoff_frequency &&
|
||||
freqs1[g_pos[i][1]] > cutoff_frequency &&
|
||||
freqs2[g_pos[i][2]] > cutoff_frequency)
|
||||
{
|
||||
fc3_normal_squared[g_pos[i][3]] = get_fc3_sum(band_indices[g_pos[i][0]],
|
||||
g_pos[i][1],
|
||||
g_pos[i][2],
|
||||
freqs0,
|
||||
freqs1,
|
||||
freqs2,
|
||||
e0,
|
||||
e1,
|
||||
e2,
|
||||
fc3_reciprocal,
|
||||
num_atom,
|
||||
num_band,
|
||||
cutoff_frequency);
|
||||
fc3_normal_squared[g_pos[i][3]] =
|
||||
get_fc3_sum(e0 + band_indices[g_pos[i][0]] * num_band,
|
||||
e1 + g_pos[i][1] * num_band,
|
||||
e2 + g_pos[i][2] * num_band,
|
||||
fc3_reciprocal,
|
||||
num_band) /
|
||||
(freqs0[band_indices[g_pos[i][0]]] * freqs1[g_pos[i][1]] * freqs2[g_pos[i][2]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
fc3_normal_squared[g_pos[i][3]] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -174,80 +156,34 @@ void reciprocal_to_normal_squared(double *fc3_normal_squared,
|
|||
|
||||
free(e0);
|
||||
e0 = NULL;
|
||||
free(e1);
|
||||
e1 = NULL;
|
||||
free(e2);
|
||||
e2 = NULL;
|
||||
}
|
||||
|
||||
static double get_fc3_sum(const long bi0,
|
||||
const long bi1,
|
||||
const long bi2,
|
||||
const double *freqs0,
|
||||
const double *freqs1,
|
||||
const double *freqs2,
|
||||
const lapack_complex_double *e0,
|
||||
static double get_fc3_sum(const lapack_complex_double *e0,
|
||||
const lapack_complex_double *e1,
|
||||
const lapack_complex_double *e2,
|
||||
const lapack_complex_double *fc3_reciprocal,
|
||||
const long num_atom,
|
||||
const long num_band,
|
||||
const double cutoff_frequency)
|
||||
const long num_band)
|
||||
{
|
||||
double fff;
|
||||
|
||||
if (freqs1[bi1] > cutoff_frequency && freqs2[bi2] > cutoff_frequency)
|
||||
{
|
||||
fff = freqs0[bi0] * freqs1[bi1] * freqs2[bi2];
|
||||
return fc3_sum_in_reciprocal_to_normal(e0 + bi0 * num_band,
|
||||
e1 + bi1 * num_band,
|
||||
e2 + bi2 * num_band,
|
||||
fc3_reciprocal,
|
||||
num_atom) /
|
||||
fff;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static double fc3_sum_in_reciprocal_to_normal(const lapack_complex_double *e0,
|
||||
const lapack_complex_double *e1,
|
||||
const lapack_complex_double *e2,
|
||||
const lapack_complex_double *fc3_reciprocal,
|
||||
const long num_atom)
|
||||
{
|
||||
long index_l, index_lm, index_lmn, i, j, k, l, m, n;
|
||||
long i, j, k;
|
||||
double sum_real, sum_imag;
|
||||
lapack_complex_double e_01, e_012, e_012_fc3;
|
||||
|
||||
sum_real = 0;
|
||||
sum_imag = 0;
|
||||
|
||||
for (l = 0; l < num_atom; l++)
|
||||
for (i = 0; i < num_band; i++)
|
||||
{
|
||||
index_l = l * num_atom * num_atom * 27;
|
||||
for (i = 0; i < 3; i++)
|
||||
for (j = 0; j < num_band; j++)
|
||||
{
|
||||
for (m = 0; m < num_atom; m++)
|
||||
e_01 = phonoc_complex_prod(e0[i], e1[j]);
|
||||
for (k = 0; k < num_band; k++)
|
||||
{
|
||||
index_lm = index_l + m * num_atom * 27;
|
||||
for (j = 0; j < 3; j++)
|
||||
{
|
||||
e_01 = phonoc_complex_prod(e0[l * 3 + i], e1[m * 3 + j]);
|
||||
for (n = 0; n < num_atom; n++)
|
||||
{
|
||||
index_lmn = index_lm + n * 27 + i * 9 + j * 3;
|
||||
for (k = 0; k < 3; k++)
|
||||
{
|
||||
e_012 = phonoc_complex_prod(e_01, e2[n * 3 + k]);
|
||||
e_012_fc3 = phonoc_complex_prod(e_012, fc3_reciprocal[index_lmn + k]);
|
||||
sum_real += lapack_complex_double_real(e_012_fc3);
|
||||
sum_imag += lapack_complex_double_imag(e_012_fc3);
|
||||
}
|
||||
}
|
||||
}
|
||||
e_012 = phonoc_complex_prod(e_01, e2[k]);
|
||||
e_012_fc3 = phonoc_complex_prod(e_012, fc3_reciprocal[i * num_band * num_band + j * num_band + k]);
|
||||
sum_real += lapack_complex_double_real(e_012_fc3);
|
||||
sum_imag += lapack_complex_double_imag(e_012_fc3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue