quantum-espresso/FFTXlib/scatter_mod.f90

1363 lines
48 KiB
Fortran
Raw Normal View History

!
! Copyright (C) Quantum ESPRESSO group
!
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
!----------------------------------------------------------------------
! FFT base Module.
! Written by Carlo Cavazzoni, modified by Paolo Giannozzi
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! Rewritten by Stefano de Gironcoli
!----------------------------------------------------------------------
!
!=----------------------------------------------------------------------=!
MODULE scatter_mod
!=----------------------------------------------------------------------=!
USE fft_types, ONLY: fft_type_descriptor
USE fft_param
IMPLICIT NONE
INTERFACE gather_grid
MODULE PROCEDURE gather_real_grid, gather_complex_grid
END INTERFACE
INTERFACE scatter_grid
MODULE PROCEDURE scatter_real_grid, scatter_complex_grid
END INTERFACE
SAVE
PRIVATE
PUBLIC :: fft_type_descriptor
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
PUBLIC :: gather_grid, scatter_grid
PUBLIC :: fft_scatter_xy, fft_scatter_yz, fft_scatter_tg, fft_scatter_tg_opt
PUBLIC :: cgather_sym, cgather_sym_many, cscatter_sym_many
!=----------------------------------------------------------------------=!
CONTAINS
!=----------------------------------------------------------------------=!
!
!
!-----------------------------------------------------------------------
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
SUBROUTINE fft_scatter_xy ( desc, f_in, f_aux, nxx_, isgn )
!-----------------------------------------------------------------------
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! transpose of the fft xy planes across the desc%comm2 communicator
!
! a) From Y-oriented columns to X-oriented partial slices (isgn > 0)
! Active columns along the Y direction corresponding to a subset of the
! active X values and a range of Z values (in this order) are stored
! consecutively for each processor and are such that the subgroup owns
! all data for a range of Z values.
!
! The Y pencil -> X-oriented partial slices transposition is performed
! in the subgroup of processors (desc%comm2) owning this range of Z values.
!
! The transpose takes place in two steps:
! 1) on each processor the columns are sliced into sections along Y
! that are stored one after the other. On each processor, slices for
! processor "iproc2" are desc%nr2p(iproc2)*desc%nr1p(me2)*desc%my_nr3p big.
! 2) all processors communicate to exchange slices (all sectin of columns with
! Y in the slice belonging to "me" must be received, all the others
! must be sent to "iproc2")
!
! Finally one gets the "partial slice" representation: each processor has
! all the X values of desc%my_nr2p Y and desc%my_nr3p Z values.
! Data are organized with the X index running fastest, then Y, then Z.
!
! f_in contains the input Y columns, is destroyed on output
! f_aux contains the output X-oriented partial slices.
!
! b) From planes to columns (isgn < 0)
!
! Quite the same in the opposite direction
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! f_aux contains the input X-oriented partial slices, is destroyed on output
! f_in contains the output Y columns.
!
IMPLICIT NONE
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
TYPE (fft_type_descriptor), INTENT(in) :: desc
INTEGER, INTENT(in) :: nxx_, isgn
COMPLEX (DP), INTENT(inout) :: f_in (nxx_), f_aux (nxx_)
#if defined(__MPI)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER :: ierr, me2, nproc2, iproc2, ncpx, my_nr2p, nr2px, ip, ip0
INTEGER :: i, it, j, k, kfrom, kdest, offset, ioff, mc, m1, m3, i1, icompact, sendsize
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER, ALLOCATABLE :: ncp_(:), nr1p_(:), indx(:,:)
!
#if defined(__NON_BLOCKING_SCATTER)
INTEGER :: sh(desc%nproc2), rh(desc%nproc2)
#endif
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
me2 = desc%mype2 + 1
nproc2 = desc%nproc2 ; if ( abs(isgn) == 3 ) nproc2 = 1
! allocate auxiliary array for columns distribution
ALLOCATE ( ncp_(nproc2), nr1p_(nproc2), indx(desc%nr1x,nproc2) )
if ( abs (isgn) == 1 ) then ! It's a potential FFT
ncp_ = desc%nr1p * desc%my_nr3p
nr1p_= desc%nr1p
indx = desc%indp
my_nr2p=desc%my_nr2p
else if ( abs (isgn) == 2 ) then ! It's a wavefunction FFT
ncp_ = desc%nr1w * desc%my_nr3p
nr1p_= desc%nr1w
indx = desc%indw
my_nr2p=desc%my_nr2p
else if ( abs (isgn) == 3 ) then ! It's a wavefunction FFT with task group
ncp_ = desc%nr1w_tg * desc%my_nr3p!
nr1p_= desc%nr1w_tg !
indx(:,1) = desc%indw_tg !
my_nr2p=desc%nr2x ! in task group FFTs whole Y colums are distributed
end if
!
CALL start_clock ('fft_scatt_xy')
!
! calculate the message size
!
if ( abs (isgn) == 3 ) then
nr2px = desc%nr2x ! if it's a task group FFT whole planes are distributed
else
nr2px = MAXVAL ( desc%nr2p ) ! maximum number of Y values to be disributed
end if
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ncpx = MAXVAL ( ncp_ ) ! maximum number of Y columns to be disributed
sendsize = ncpx * nr2px ! dimension of the scattered chunks (safe value)
ierr = 0
IF (isgn.gt.0) THEN
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
IF (nproc2==1) GO TO 10
!
! "forward" scatter from columns to planes
!
! step one: store contiguously the slices
!
offset = 0
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO iproc2 = 1, nproc2
kdest = ( iproc2 - 1 ) * sendsize
kfrom = offset
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO k = 1, ncp_(me2)
DO i = 1, desc%nr2p( iproc2 )
f_aux ( kdest + i ) = f_in ( kfrom + i )
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
kdest = kdest + nr2px
kfrom = kfrom + desc%nr2x
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
offset = offset + desc%nr2p( iproc2 )
#if defined(__NON_BLOCKING_SCATTER)
CALL mpi_isend( f_aux( (iproc2-1)*sendsize + 1 ), sendsize, &
MPI_DOUBLE_COMPLEX, iproc2-1, me2, desc%comm2, &
sh( iproc2 ), ierr )
#endif
ENDDO
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! step two: communication across the nproc3 group
!
#if defined(__NON_BLOCKING_SCATTER)
DO iproc2 = 1, nproc2
!
! now post the receive
!
CALL mpi_irecv( f_in( (iproc2-1)*sendsize + 1 ), sendsize, &
MPI_DOUBLE_COMPLEX, iproc2-1, MPI_ANY_TAG, &
desc%comm2, rh( iproc2 ), ierr )
!IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', ' forward receive info<>0', abs(ierr) )
ENDDO
call mpi_waitall( nproc2, sh, MPI_STATUSES_IGNORE, ierr )
!
#else
CALL mpi_alltoall (f_aux(1), sendsize, MPI_DOUBLE_COMPLEX, f_in(1), &
sendsize, MPI_DOUBLE_COMPLEX, desc%comm2, ierr)
!
IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', 'info<>0', abs(ierr) )
#endif
!
10 CONTINUE
!
f_aux = (0.0_DP, 0.0_DP)
!
#if defined(__NON_BLOCKING_SCATTER)
if (nproc2 > 1) call mpi_waitall( nproc2, rh, MPI_STATUSES_IGNORE, ierr )
#endif
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO iproc2 = 1, nproc2
it = ( iproc2 - 1 ) * sendsize
DO i = 1, ncp_( iproc2 )
m3 = (i-1)/nr1p_(iproc2)+1 ; i1 = mod(i-1,nr1p_(iproc2))+1 ; m1 = indx(i1,iproc2)
icompact = m1 + (m3-1)*desc%nr1x*my_nr2p
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO j = 1, my_nr2p
!f_aux( m1 + (j-1)*desc%nr1x + (m3-1)*desc%nr1x*my_nr2p ) = f_in( j + it )
f_aux( icompact ) = f_in( j + it )
icompact = icompact + desc%nr1x
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
it = it + nr2px
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ENDDO
ELSE
!
! "backward" scatter from planes to columns
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO iproc2 = 1, nproc2
it = ( iproc2 - 1 ) * sendsize
DO i = 1, ncp_( iproc2 )
m3 = (i-1)/nr1p_(iproc2)+1 ; i1 = mod(i-1,nr1p_(iproc2))+1 ; m1 = indx(i1,iproc2)
icompact = m1 + (m3-1)*desc%nr1x*my_nr2p
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO j = 1, my_nr2p
!f_in( j + it ) = f_aux( m1 + (j-1)*desc%nr1x + (m3-1)*desc%nr1x*my_nr2p )
f_in( j + it ) = f_aux( icompact )
icompact = icompact + desc%nr1x
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
it = it + nr2px
ENDDO
#if defined(__NON_BLOCKING_SCATTER)
IF( nproc2 > 1 ) CALL mpi_isend( f_in( ( iproc2 - 1 ) * sendsize + 1 ), &
sendsize, MPI_DOUBLE_COMPLEX, iproc2-1, me2, &
desc%comm2, sh( iproc2 ), ierr )
#endif
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ENDDO
IF (nproc2==1) GO TO 20
!
! step two: communication
!
#if ! defined(__NON_BLOCKING_SCATTER)
CALL mpi_alltoall (f_in(1), sendsize, MPI_DOUBLE_COMPLEX, f_aux(1), &
sendsize, MPI_DOUBLE_COMPLEX, desc%comm2, ierr)
IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', 'info<>0', abs(ierr) )
#else
DO iproc2 = 1, nproc2
CALL mpi_irecv( f_aux( (iproc2-1)*sendsize + 1 ), sendsize, &
MPI_DOUBLE_COMPLEX, iproc2-1, MPI_ANY_TAG, &
desc%comm2, rh(iproc2), ierr )
! IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', ' backward receive info<>0', abs(ierr) )
ENDDO
call mpi_waitall( nproc2, sh, MPI_STATUSES_IGNORE, ierr )
call mpi_waitall( nproc2, rh, MPI_STATUSES_IGNORE, ierr )
#endif
!
! step one: store contiguously the columns
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! ensures that no garbage is present in the output
! not useless ... clean the array to be returned from the garbage of previous A2A step
f_in = (0.0_DP, 0.0_DP) !
!
offset = 0
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO iproc2 = 1, nproc2
kdest = ( iproc2 - 1 ) * sendsize
kfrom = offset
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO k = 1, ncp_(me2)
DO i = 1, desc%nr2p( iproc2 )
f_in ( kfrom + i ) = f_aux ( kdest + i )
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
kdest = kdest + nr2px
kfrom = kfrom + desc%nr2x
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
offset = offset + desc%nr2p( iproc2 )
ENDDO
20 CONTINUE
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ENDIF
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DEALLOCATE ( ncp_ , nr1p_, indx )
CALL stop_clock ('fft_scatt_xy')
#endif
RETURN
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
99 format ( 20 ('(',2f12.9,')') )
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
END SUBROUTINE fft_scatter_xy
!
!-----------------------------------------------------------------------
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
SUBROUTINE fft_scatter_yz ( desc, f_in, f_aux, nxx_, isgn )
!-----------------------------------------------------------------------
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! transpose of the fft yz planes across the desc%comm3 communicator
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! a) From Z-oriented columns to Y-oriented colums (isgn > 0)
! Active columns (or sticks or pencils) along the Z direction for each
! processor are stored consecutively and are such that they correspond
! to a subset of the active X values.
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! The pencil -> slices transposition is performed in the subgroup
! of processors (desc%comm3) owning these X values.
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! The transpose takes place in two steps:
! 1) on each processor the columns are sliced into sections along Z
! that are stored one after the other. On each processor, slices for
! processor "iproc3" are desc%nr3p(iproc3)*desc%nsw/nsp(me) big.
! 2) all processors communicate to exchange slices (all columns with
! Z in the slice belonging to "me" must be received, all the others
! must be sent to "iproc3")
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! Finally one gets the "slice" representation: each processor has
! desc%nr3p(mype3) Z values of all the active pencils along Y for the
! X values of the current group. Data are organized with the Y index
! running fastest, then the reordered X values, then Z.
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! f_in contains the input Z columns, is destroyed on output
! f_aux contains the output Y colums.
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! b) From planes to columns (isgn < 0)
!
! Quite the same in the opposite direction
! f_aux contains the input Y columns, is destroyed on output
! f_in contains the output Z columns.
!
IMPLICIT NONE
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
TYPE (fft_type_descriptor), INTENT(in) :: desc
INTEGER, INTENT(in) :: nxx_, isgn
COMPLEX (DP), INTENT(inout) :: f_in (nxx_), f_aux (nxx_)
#if defined(__MPI)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER :: ierr, me, me2, me2_start, me2_end, me3, nproc3, iproc3, ncpx, nr3px, ip, ip0
INTEGER :: i, it, k, kfrom, kdest, offset, ioff, mc, m1, m2, i1, sendsize
INTEGER, ALLOCATABLE :: ncp_(:), ir1p_(:)
INTEGER :: my_nr1p_
!
#if defined(__NON_BLOCKING_SCATTER)
INTEGER :: sh(desc%nproc3), rh(desc%nproc3)
#endif
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
me = desc%mype + 1
me2 = desc%mype2 + 1
me3 = desc%mype3 + 1
nproc3 = desc%nproc3
! allocate auxiliary array for columns distribution
ALLOCATE ( ncp_( desc%nproc), ir1p_(desc%nr1x) )
me2_start = me2 ; me2_end = me2
if ( abs (isgn) == 1 ) then ! It's a potential FFT
ncp_ = desc%nsp
ir1p_= desc%ir1p
else if ( abs (isgn) == 2 ) then ! It's a wavefunction FFT
ncp_ = desc%nsw
ir1p_= desc%ir1w
else if ( abs (isgn) == 3 ) then ! It's a wavefunction FFT with task group
ncp_ = desc%nsw
ir1p_= desc%ir1w_tg
me2_start = 1 ; me2_end = desc%nproc2
end if
my_nr1p_ = count (ir1p_ > 0)
!
CALL start_clock ('fft_scatt_yz')
!
! calculate the message size
!
nr3px = MAXVAL ( desc%nr3p ) ! maximum number of Z values to be exchanged
ncpx = MAXVAL ( ncp_ ) ! maximum number of Z columns to be exchanged
if (abs(isgn)==3) then
ncpx = ncpx * desc%nproc2 ! if it's a task group FFT groups of columns are exchanged
end if
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
sendsize = ncpx * nr3px ! dimension of the scattered chunks
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ierr = 0
IF (isgn.gt.0) THEN
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
IF (nproc3==1) GO TO 10
!
! "forward" scatter from columns to planes
!
! step one: store contiguously the slices
!
offset = 0
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO iproc3 = 1, nproc3
kdest = ( iproc3 - 1 ) * sendsize
kfrom = offset
DO me2 = me2_start, me2_end
ip = desc%iproc(me2,me3)
DO k = 1, ncp_ (ip) ! was ncp_(me3)
DO i = 1, desc%nr3p( iproc3 )
f_aux ( kdest + i ) = f_in ( kfrom + i )
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
kdest = kdest + nr3px
kfrom = kfrom + desc%nr3x
ENDDO
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
offset = offset + desc%nr3p( iproc3 )
#if defined(__NON_BLOCKING_SCATTER)
CALL mpi_isend( f_aux( ( iproc3 - 1 ) * sendsize + 1 ), sendsize, &
MPI_DOUBLE_COMPLEX, iproc3-1, me3, desc%comm3, &
sh( iproc3 ), ierr )
#endif
ENDDO
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! ensures that no garbage is present in the output
! useless; the later accessed elements are overwritten by the A2A step
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! step two: communication across the nproc3 group
!
#if defined(__NON_BLOCKING_SCATTER)
DO iproc3 = 1, nproc3
!
! now post the receive
!
CALL mpi_irecv( f_in( (iproc3-1)*sendsize + 1 ), sendsize, &
MPI_DOUBLE_COMPLEX, iproc3-1, MPI_ANY_TAG, &
desc%comm3, rh( iproc3 ), ierr )
!IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', ' forward receive info<>0', abs(ierr) )
!
!
ENDDO
call mpi_waitall( nproc3, sh, MPI_STATUSES_IGNORE, ierr )
!
#else
CALL mpi_alltoall (f_aux(1), sendsize, MPI_DOUBLE_COMPLEX, f_in(1), &
sendsize, MPI_DOUBLE_COMPLEX, desc%comm3, ierr)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', 'info<>0', abs(ierr) )
#endif
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
10 CONTINUE
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
f_aux = (0.0_DP, 0.0_DP) !
#if defined(__NON_BLOCKING_SCATTER)
if (nproc3 > 1) call mpi_waitall( nproc3, rh, MPI_STATUSES_IGNORE, ierr )
#endif
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO iproc3 = 1, desc%nproc3
it = ( iproc3 - 1 ) * sendsize
DO me2 = me2_start, me2_end
ip = desc%iproc( me2, iproc3)
ioff = desc%iss(ip)
DO i = 1, ncp_( ip ) ! was ncp_(iproc3)
mc = desc%ismap( i + ioff ) ! this is m1+(m2-1)*nr1x of the current pencil
m1 = mod (mc-1,desc%nr1x) + 1 ; m2 = (mc-1)/desc%nr1x + 1
i1 = m2 + ( ir1p_(m1) - 1 ) * desc%nr2x
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO k = 1, desc%my_nr3p
f_aux( i1 ) = f_in( k + it )
i1 = i1 + desc%nr2x*my_nr1p_
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
it = it + nr3px
ENDDO
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ENDDO
ELSE
!
! "backward" scatter from planes to columns
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO iproc3 = 1, desc%nproc3
it = ( iproc3 - 1 ) * sendsize
DO me2 = me2_start, me2_end
ip = desc%iproc(me2, iproc3)
ioff = desc%iss(ip)
DO i = 1, ncp_( ip )
mc = desc%ismap( i + ioff ) ! this is m1+(m2-1)*nr1x of the current pencil
m1 = mod (mc-1,desc%nr1x) + 1 ; m2 = (mc-1)/desc%nr1x + 1
i1 = m2 + ( ir1p_(m1) - 1 ) * desc%nr2x
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO k = 1, desc%my_nr3p
f_in( k + it ) = f_aux( i1 )
i1 = i1 + desc%nr2x * my_nr1p_
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
it = it + nr3px
ENDDO
ENDDO
#if defined(__NON_BLOCKING_SCATTER)
IF( nproc3 > 1 ) CALL mpi_isend( f_in( ( iproc3 - 1 ) * sendsize + 1 ), &
sendsize, MPI_DOUBLE_COMPLEX, iproc3-1, &
me3, desc%comm3, sh( iproc3 ), ierr )
#endif
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ENDDO
IF( nproc3 == 1 ) GO TO 20
!
! step two: communication
!
#if ! defined(__NON_BLOCKING_SCATTER)
CALL mpi_alltoall (f_in(1), sendsize, MPI_DOUBLE_COMPLEX, f_aux(1), &
sendsize, MPI_DOUBLE_COMPLEX, desc%comm3, ierr)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', 'info<>0', abs(ierr) )
#else
DO iproc3 = 1, desc%nproc3
CALL mpi_irecv( f_aux( (iproc3-1)*sendsize + 1 ), sendsize, &
MPI_DOUBLE_COMPLEX, iproc3-1, MPI_ANY_TAG, &
desc%comm3, rh(iproc3), ierr )
! IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', ' backward receive info<>0', abs(ierr) )
ENDDO
call mpi_waitall( desc%nproc3, sh, MPI_STATUSES_IGNORE, ierr )
call mpi_waitall( desc%nproc3, rh, MPI_STATUSES_IGNORE, ierr )
#endif
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! step one: store contiguously the columns
!
offset = 0
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO iproc3 = 1, nproc3
kdest = ( iproc3 - 1 ) * sendsize
kfrom = offset
DO me2 = me2_start, me2_end
ip = desc%iproc(me2,me3)
DO k = 1, ncp_ (ip)
DO i = 1, desc%nr3p( iproc3 )
f_in ( kfrom + i ) = f_aux ( kdest + i )
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
kdest = kdest + nr3px
kfrom = kfrom + desc%nr3x
ENDDO
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
offset = offset + desc%nr3p( iproc3 )
ENDDO
! clean extra array elements in each stick
IF( desc%nr3x /= desc%nr3 ) THEN
DO k = 1, ncp_ ( desc%mype+1 )
DO i = desc%nr3, desc%nr3x
f_in( (k-1)*desc%nr3x + i ) = 0.0d0
END DO
END DO
END IF
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
20 CONTINUE
ENDIF
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DEALLOCATE ( ncp_ , ir1p_ )
CALL stop_clock ('fft_scatt_yz')
#endif
RETURN
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
98 format ( 10 ('(',2f12.9,')') )
99 format ( 20 ('(',2f12.9,')') )
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
END SUBROUTINE fft_scatter_yz
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
!-----------------------------------------------------------------------
SUBROUTINE fft_scatter_tg ( desc, f_in, f_aux, nxx_, isgn )
!-----------------------------------------------------------------------
!
! task group wavefunction redistribution
!
! a) (isgn >0 ) From many-wfc partial-plane arrangement to single-wfc whole-plane one
!
! b) (isgn <0 ) From single-wfc whole-plane arrangement to many-wfc partial-plane one
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! in both cases:
! f_in contains the input data, is overwritten with the desired output
! f_aux is used as working array, may contain garbage in output
!
IMPLICIT NONE
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
TYPE (fft_type_descriptor), INTENT(in) :: desc
INTEGER, INTENT(in) :: nxx_, isgn
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
COMPLEX (DP), INTENT(inout) :: f_in (nxx_), f_aux (nxx_)
INTEGER :: ierr
CALL start_clock ('fft_scatt_tg')
if ( abs (isgn) /= 3 ) call fftx_error__ ('fft_scatter_tg', 'wrong call', 1 )
#if defined(__MPI)
!
f_aux = f_in
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
if ( isgn > 0 ) then
CALL MPI_ALLTOALLV( f_aux, desc%tg_snd, desc%tg_sdsp, MPI_DOUBLE_COMPLEX, &
f_in, desc%tg_rcv, desc%tg_rdsp, MPI_DOUBLE_COMPLEX, desc%comm2, ierr)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
IF( ierr /= 0 ) CALL fftx_error__( 'fft_scatter_tg', ' alltoall error 1 ', abs(ierr) )
else
CALL MPI_ALLTOALLV( f_aux, desc%tg_rcv, desc%tg_rdsp, MPI_DOUBLE_COMPLEX, &
f_in, desc%tg_snd, desc%tg_sdsp, MPI_DOUBLE_COMPLEX, desc%comm2, ierr)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
IF( ierr /= 0 ) CALL fftx_error__( 'fft_scatter_tg', ' alltoall error 2 ', abs(ierr) )
end if
#endif
CALL stop_clock ('fft_scatt_tg')
RETURN
99 format ( 20 ('(',2f12.9,')') )
END SUBROUTINE fft_scatter_tg
!
!-----------------------------------------------------------------------
SUBROUTINE fft_scatter_tg_opt ( desc, f_in, f_out, nxx_, isgn )
!-----------------------------------------------------------------------
!
! task group wavefunction redistribution
!
! a) (isgn >0 ) From many-wfc partial-plane arrangement to single-wfc whole-plane one
!
! b) (isgn <0 ) From single-wfc whole-plane arrangement to many-wfc partial-plane one
!
! in both cases:
! f_in contains the input data
! f_out contains the output data
!
IMPLICIT NONE
TYPE (fft_type_descriptor), INTENT(in) :: desc
INTEGER, INTENT(in) :: nxx_, isgn
COMPLEX (DP), INTENT(inout) :: f_in (nxx_), f_out (nxx_)
INTEGER :: ierr
CALL start_clock ('fft_scatt_tg')
if ( abs (isgn) /= 3 ) call fftx_error__ ('fft_scatter_tg', 'wrong call', 1 )
#if defined(__MPI)
!
if ( isgn > 0 ) then
CALL MPI_ALLTOALLV( f_in, desc%tg_snd, desc%tg_sdsp, MPI_DOUBLE_COMPLEX, &
f_out, desc%tg_rcv, desc%tg_rdsp, MPI_DOUBLE_COMPLEX, desc%comm2, ierr)
IF( ierr /= 0 ) CALL fftx_error__( 'fft_scatter_tg', ' alltoall error 1 ', abs(ierr) )
else
CALL MPI_ALLTOALLV( f_in, desc%tg_rcv, desc%tg_rdsp, MPI_DOUBLE_COMPLEX, &
f_out, desc%tg_snd, desc%tg_sdsp, MPI_DOUBLE_COMPLEX, desc%comm2, ierr)
IF( ierr /= 0 ) CALL fftx_error__( 'fft_scatter_tg', ' alltoall error 2 ', abs(ierr) )
end if
#endif
CALL stop_clock ('fft_scatt_tg')
RETURN
99 format ( 20 ('(',2f12.9,')') )
END SUBROUTINE fft_scatter_tg_opt
!
!----------------------------------------------------------------------------
SUBROUTINE gather_real_grid ( dfft, f_in, f_out )
!----------------------------------------------------------------------------
!
! ... gathers a distributed real-space FFT grid to dfft%root, that is,
! ... the first processor of input descriptor dfft - version for real arrays
!
! ... REAL*8 f_in = distributed variable (dfft%nnr)
! ... REAL*8 f_out = gathered variable (dfft%nr1x*dfft%nr2x*dfft%nr3x)
!
IMPLICIT NONE
!
REAL(DP), INTENT(in) :: f_in (:)
REAL(DP), INTENT(inout):: f_out(:)
TYPE ( fft_type_descriptor ), INTENT(IN) :: dfft
!
#if defined(__MPI)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER :: proc, info, offset_in, offset_aux, ir3
! ... the following are automatic arrays
INTEGER :: displs(0:dfft%nproc-1), recvcount(0:dfft%nproc-1)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
REAL(DP), ALLOCATABLE :: f_aux(:)
!
IF( size( f_in ) < dfft%nnr ) &
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL fftx_error__( ' gather_real_grid ', ' f_in too small ', dfft%nnr-size( f_in ) )
!
CALL start_clock( 'rgather_grid' )
!write (6,*) 'gather grid ok 0 ', dfft%nproc, dfft%nproc2, dfft%nproc3
ALLOCATE ( f_aux(dfft%nr1x * dfft%nr2x * dfft%my_nr3p) )
! 1) gather within the comm2 communicator
displs = 0
DO proc =0, ( dfft%nproc2 -1 )
recvcount(proc) = dfft%nr1x * dfft%nr2p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + recvcount(proc-1)
end do
offset_in = 1; offset_aux = 1
do ir3 = 1, dfft%my_nr3p
!write (6,*) 'gather grid ok 1 ir3=', ir3
info = 0
CALL MPI_GATHERV( f_in(offset_in) , recvcount(dfft%mype2), MPI_DOUBLE_PRECISION, &
f_aux(offset_aux), recvcount, displs, MPI_DOUBLE_PRECISION, dfft%root, &
dfft%comm2, info )
CALL fftx_error__( 'gather_real_grid', 'info<>0', info )
!write (6,*) 'gather grid ok 2 ir3=', ir3
offset_in = offset_in + dfft%nr1x * dfft%my_nr2p
offset_aux = offset_aux + dfft%nr1x * dfft%nr2
end do
! 2) gather within the comm3 communicator
displs = 0
DO proc = 0, ( dfft%nproc3 - 1 )
recvcount(proc) = dfft%nr1x * dfft%nr2x * dfft%nr3p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + recvcount(proc-1)
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
info = 0
!write (6,*) 'gather grid ok 3'
CALL MPI_GATHERV( f_aux, recvcount(dfft%mype3), MPI_DOUBLE_PRECISION, &
f_out, recvcount, displs, MPI_DOUBLE_PRECISION, dfft%root, &
dfft%comm3, info )
!write (6,*) 'gather grid ok 4'
CALL fftx_error__( ' gather_real_grid', 'info<>0', info )
!
! ... the following check should be performed only on processor dfft%root
! ... otherwise f_out must be allocated on all processors even if not used
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
info = size( f_out ) - displs( dfft%nproc3-1 ) - recvcount( dfft%nproc3-1 )
IF( info < 0 ) &
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL fftx_error__( ' gather_real_grid ', ' f_out too small ', -info )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DEALLOCATE ( f_aux )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL stop_clock( 'rgather_grid' )
!
#else
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL fftx_error__(' gather_real_grid', 'do not use in serial execution', 1)
#endif
!
RETURN
!
END SUBROUTINE gather_real_grid
!----------------------------------------------------------------------------
SUBROUTINE gather_complex_grid ( dfft, f_in, f_out )
!----------------------------------------------------------------------------
!
! ... gathers a distributed real-space FFT grid to dfft%root, that is,
! ... the first processor of input descriptor dfft - complex arrays
!
! ... COMPLEX*16 f_in = distributed variable (dfft%nnr)
! ... COMPLEX*16 f_out = gathered variable (dfft%nr1x*dfft%nr2x*dfft%nr3x)
!
IMPLICIT NONE
!
COMPLEX(DP), INTENT(in) :: f_in (:)
COMPLEX(DP), INTENT(inout):: f_out(:)
TYPE ( fft_type_descriptor ), INTENT(IN) :: dfft
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
COMPLEX(DP), ALLOCATABLE :: f_aux(:)
!
#if defined(__MPI)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER :: proc, info, offset_in, offset_aux, ir3
! ... the following are automatic arrays
INTEGER :: displs(0:dfft%nproc-1), recvcount(0:dfft%nproc-1)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL start_clock( 'cgather_grid' )
!write (*,*) 'gcgather_grid size(f_in),dfft%nnr',size(f_in), dfft%nnr ; FLUSH(6)
IF( 2*size( f_in ) < dfft%nnr ) &
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL fftx_error__( ' gather_complex_grid ', ' f_in too small ', dfft%nnr-size( f_in ) )
!
!write (6,*) 'gather grid ok 0 ', dfft%nproc, dfft%nproc2, dfft%nproc3
ALLOCATE ( f_aux(dfft%nr1x * dfft%nr2x * dfft%my_nr3p ) )
! 1) gather within the comm2 communicator
displs = 0
DO proc =0, ( dfft%nproc2 -1 )
recvcount(proc) = 2 * dfft%nr1x * dfft%nr2p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + recvcount(proc-1)
end do
offset_in = 1; offset_aux = 1
do ir3 = 1, dfft%my_nr3p
info = 0
!write (6,*) 'gather grid ok 1 ir3=', ir3
CALL MPI_GATHERV( f_in(offset_in) , recvcount(dfft%mype2), MPI_DOUBLE_PRECISION, &
f_aux(offset_aux), recvcount, displs, MPI_DOUBLE_PRECISION, dfft%root, &
dfft%comm2, info )
CALL fftx_error__( 'gather_complex_grid', 'info<>0', info )
!write (6,*) 'gather grid ok 2 ir3=', ir3
offset_in = offset_in + dfft%nr1x * dfft%my_nr2p
offset_aux = offset_aux + dfft%nr1x * dfft%nr2
end do
! 2) gather within the comm3 communicator
displs = 0
DO proc = 0, ( dfft%nproc3 - 1 )
recvcount(proc) = 2 * dfft%nr1x * dfft%nr2x * dfft%nr3p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + recvcount(proc-1)
ENDDO
!
! ... the following check should be performed only on processor dfft%root
! ... otherwise f_out must be allocated on all processors even if not used
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
!write (*,*) 'gcgather_grid 2*size(f_out)',2*size(f_out) ; FLUSH(6)
!write (*,*) 'gcgather_grid displ+recv',dfft%nproc3, displs(dfft%nproc3-1) + recvcount(dfft%nproc3-1); FLUSH(6)
info = 2*size( f_out ) - displs( dfft%nproc3 - 1 ) - recvcount( dfft%nproc3-1 ) ; FLUSH(6)
IF( info < 0 ) CALL fftx_error__( ' gather_complex_grid ', ' f_out too small ', -info )
info = 0
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
!write (6,*) 'gather grid ok 3'
CALL MPI_GATHERV( f_aux, recvcount(dfft%mype3), MPI_DOUBLE_PRECISION, &
f_out, recvcount, displs, MPI_DOUBLE_PRECISION, dfft%root, &
dfft%comm3, info )
!write (6,*) 'gather grid ok 4'
CALL fftx_error__( 'gather_complex_grid', 'info<>0', info )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DEALLOCATE ( f_aux )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL stop_clock( 'cgather_grid' )
!
#else
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL fftx_error__('gather_complex_grid', 'do not use in serial execution', 1)
#endif
!
RETURN
!
END SUBROUTINE gather_complex_grid
!----------------------------------------------------------------------------
SUBROUTINE scatter_real_grid ( dfft, f_in, f_out )
!----------------------------------------------------------------------------
!
! ... scatters a real-space FFT grid from dfft%root, first processor of
! ... input descriptor dfft, to all others - opposite of "gather_grid"
!
! ... REAL*8 f_in = gathered variable (dfft%nr1x*dfft%nr2x*dfft%nr3x)
! ... REAL*8 f_out = distributed variable (dfft%nnr)
!
IMPLICIT NONE
!
REAL(DP), INTENT(in) :: f_in (:)
REAL(DP), INTENT(inout):: f_out(:)
TYPE ( fft_type_descriptor ), INTENT(IN) :: dfft
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
REAL(DP), ALLOCATABLE :: f_aux(:)
!
#if defined(__MPI)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER :: proc, info, offset_in, offset_aux, ir3
! ... the following are automatic arrays
INTEGER :: displs(0:dfft%nproc-1), sendcount(0:dfft%nproc-1)
!
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL start_clock( 'rscatter_grid' )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
!write (6,*) 'scatter grid ok 0'
ALLOCATE ( f_aux(dfft%nr1x * dfft%nr2x * dfft%my_nr3p) )
! 1) scatter within the comm3 communicator
displs = 0
DO proc = 0, ( dfft%nproc3 - 1 )
sendcount(proc) = dfft%nr1x * dfft%nr2x * dfft%nr3p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + sendcount(proc-1)
ENDDO
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
info = size( f_in ) - displs( dfft%nproc3 - 1 ) - sendcount( dfft%nproc3 - 1 )
IF( info < 0 ) CALL fftx_error__( ' scatter_real_grid ', ' f_in too small ', -info )
info = 0
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
!write (6,*) 'scatter grid ok 1'
CALL MPI_SCATTERV( f_in, sendcount, displs, MPI_DOUBLE_PRECISION, &
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
f_aux, sendcount(dfft%mype3), MPI_DOUBLE_PRECISION, &
dfft%root, dfft%comm3, info )
!write (6,*) 'scatter grid ok 2'
CALL fftx_error__( 'scatter_real_grid', 'info<>0', info )
! 2) scatter within the comm2 communicator
IF( size( f_out ) < dfft%nnr ) &
CALL fftx_error__( ' scatter_real_grid ', ' f_out too small ', dfft%nnr-size( f_out ) )
!
displs = 0 ; f_out = 0.0D0
DO proc =0, ( dfft%nproc2 -1 )
sendcount(proc) = dfft%nr1x * dfft%nr2p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + sendcount(proc-1)
end do
offset_in = 1 ; offset_aux = 1
do ir3 = 1, dfft%my_nr3p
info = 0
!write (6,*) 'scatter grid ok 3, ir3=', ir3
CALL MPI_SCATTERV( f_aux(offset_aux), sendcount, displs, MPI_DOUBLE_PRECISION, &
f_out(offset_in), sendcount(dfft%mype2), MPI_DOUBLE_PRECISION, &
dfft%root, dfft%comm2, info )
!write (6,*) 'scatter grid ok 4, ir3=', ir3
CALL fftx_error__( 'scatter_real_grid', 'info<>0', info )
offset_in = offset_in + dfft%nr1x * dfft%my_nr2p
offset_aux = offset_aux + dfft%nr1x * dfft%nr2
end do
!
DEALLOCATE ( f_aux )
!
CALL stop_clock( 'rscatter_grid' )
!
#else
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL fftx_error__('scatter_real_grid', 'do not use in serial execution', 1)
#endif
!
RETURN
!
END SUBROUTINE scatter_real_grid
!----------------------------------------------------------------------------
SUBROUTINE scatter_complex_grid ( dfft, f_in, f_out )
!----------------------------------------------------------------------------
!
! ... scatters a real-space FFT grid from dfft%root, first processor of
! ... input descriptor dfft, to all others - opposite of "gather_grid"
!
! ... COMPLEX*16 f_in = gathered variable (dfft%nr1x*dfft%nr2x*dfft%nr3x)
! ... COMPLEX*16 f_out = distributed variable (dfft%nnr)
!
IMPLICIT NONE
!
COMPLEX(DP), INTENT(in) :: f_in (:)
COMPLEX(DP), INTENT(inout):: f_out(:)
TYPE ( fft_type_descriptor ), INTENT(IN) :: dfft
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
COMPLEX(DP), ALLOCATABLE :: f_aux(:)
!
#if defined(__MPI)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER :: proc, info, offset_in, offset_aux, ir3
! ... the following are automatic arrays
INTEGER :: displs(0:dfft%nproc-1), sendcount(0:dfft%nproc-1)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL start_clock( 'cscatter_grid' )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
!write (6,*) 'scatter grid ok 0'
ALLOCATE ( f_aux(dfft%nr1x * dfft%nr2x * dfft%my_nr3p ) )
! 1) scatter within the comm3 communicator
displs = 0
DO proc = 0, ( dfft%nproc3 - 1 )
sendcount(proc) = 2 * dfft%nr1x * dfft%nr2x * dfft%nr3p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + sendcount(proc-1)
ENDDO
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
!write(*,*) 'cscatter_grid 2*size(f_in) ', 2*size(f_in); FLUSH(6)
!write(*,*) 'cscatter_grid displ+send ', dfft%nproc3, displs(dfft%nproc3-1) + sendcount(dfft%nproc3-1); FLUSH(6)
info = 2*size( f_in ) - displs( dfft%nproc3 - 1 ) - sendcount( dfft%nproc3 - 1 )
IF( info < 0 ) &
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL fftx_error__( ' scatter_complex_grid ', ' f_in too small ', -info )
!
info = 0
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
!write (6,*) 'scatter grid ok 1'
CALL MPI_SCATTERV( f_in, sendcount, displs, MPI_DOUBLE_PRECISION, &
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
f_aux, sendcount(dfft%mype3), MPI_DOUBLE_PRECISION, &
dfft%root, dfft%comm3, info )
!write (6,*) 'scatter grid ok 2'
CALL fftx_error__( ' scatter_complex_grid', 'info<>0', info )
! 2) scatter within the comm2 communicator
!write(*,*) 'cscatter_grid size(f_out), dfft%nnr ', size(f_out),dfft%nnr; FLUSH(6)
IF( size( f_out ) < dfft%nnr ) &
CALL fftx_error__( ' scatter_complex_grid ', ' f_out too small ', dfft%nnr-size( f_out ) )
!
displs = 0 ; f_out = 0.0D0
DO proc =0, ( dfft%nproc2 -1 )
sendcount(proc) = 2 * dfft%nr1x * dfft%nr2p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + sendcount(proc-1)
end do
offset_in = 1 ; offset_aux = 1
do ir3 = 1, dfft%my_nr3p
info = 0
!write (6,*) 'scatter grid ok 3, ir3=', ir3
CALL MPI_SCATTERV( f_aux(offset_aux), sendcount, displs, MPI_DOUBLE_PRECISION, &
f_out(offset_in), sendcount(dfft%mype2), MPI_DOUBLE_PRECISION, &
dfft%root, dfft%comm2, info )
!write (6,*) 'scatter grid ok 4, ir3=', ir3
CALL fftx_error__( 'scatter_complex_grid', 'info<>0', info )
offset_in = offset_in + dfft%nr1x * dfft%my_nr2p
offset_aux = offset_aux + dfft%nr1x * dfft%nr2x
end do
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! ... the following check should be performed only on processor dfft%root
! ... otherwise f_in must be allocated on all processors even if not used
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DEALLOCATE ( f_aux )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL stop_clock( 'cscatter_grid' )
!
#else
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
CALL fftx_error__('scatter_complex_grid', 'do not use in serial execution', 1)
#endif
!
RETURN
!
END SUBROUTINE scatter_complex_grid
!
! ... "gather"-like subroutines
!
!-----------------------------------------------------------------------
SUBROUTINE cgather_sym( dfftp, f_in, f_out )
!-----------------------------------------------------------------------
!
! ... gather complex data for symmetrization (used in phonon code)
! ... Differs from gather_grid because mpi_allgatherv is used instead
! ... of mpi_gatherv - all data is gathered on ALL processors
! ... COMPLEX*16 f_in = distributed variable (nrxx)
! ... COMPLEX*16 f_out = gathered variable (nr1x*nr2x*nr3x)
!
IMPLICIT NONE
!
TYPE (fft_type_descriptor), INTENT(in) :: dfftp
COMPLEX(DP) :: f_in( : ), f_out(:)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
COMPLEX(DP), ALLOCATABLE :: f_aux(:)
!
#if defined(__MPI)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER :: proc, info, offset_in, offset_aux, ir3
! ... the following are automatic arrays
INTEGER :: displs(0:dfftp%nproc-1), recvcount(0:dfftp%nproc-1)
!
CALL start_clock( 'cgather' )
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ALLOCATE ( f_aux(dfftp%nr1x * dfftp%nr2x * dfftp%my_nr3p ) )
!
CALL MPI_BARRIER( dfftp%comm, info )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! 1) gather within the comm2 communicator
displs = 0
DO proc =0, ( dfftp%nproc2 -1 )
recvcount(proc) = 2 * dfftp%nr1x * dfftp%nr2p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + recvcount(proc-1)
end do
offset_in = 1; offset_aux = 1
do ir3 = 1, dfftp%my_nr3p
info = 0
CALL MPI_ALLGATHERV( f_in(offset_in), recvcount(dfftp%mype2), MPI_DOUBLE_PRECISION, &
f_aux(offset_aux),recvcount, displs, MPI_DOUBLE_PRECISION, &
dfftp%comm2, info )
CALL fftx_error__( 'cgather_sym', 'info<>0', info )
offset_in = offset_in + dfftp%nr1x * dfftp%my_nr2p
offset_aux = offset_aux + dfftp%nr1x * dfftp%nr2x
end do
! 2) gather within the comm3 communicator
displs = 0
DO proc = 0, ( dfftp%nproc3 - 1 )
recvcount(proc) = 2 * dfftp%nr1x * dfftp%nr2x * dfftp%nr3p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + recvcount(proc-1)
ENDDO
info = 0
CALL MPI_ALLGATHERV( f_aux, recvcount(dfftp%mype3), MPI_DOUBLE_PRECISION, &
f_out, recvcount, displs, MPI_DOUBLE_PRECISION, &
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
dfftp%comm3, info )
CALL fftx_error__( 'cgather_sym', 'info<>0', info )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! ... the following check should be performed only on processor dfft%root
! ... otherwise f_out must be allocated on all processors even if not used
!
DEALLOCATE ( f_aux )
!
CALL stop_clock( 'cgather' )
!
#else
CALL fftx_error__('cgather_sym', 'do not use in serial execution', 1)
#endif
!
RETURN
!
END SUBROUTINE cgather_sym
!
!
!-----------------------------------------------------------------------
SUBROUTINE cgather_sym_many( dfftp, f_in, f_out, nbnd, nbnd_proc, start_nbnd_proc )
!-----------------------------------------------------------------------
!
! ... Written by A. Dal Corso
!
! ... This routine generalizes cgather_sym, receiveng nbnd complex
! ... distributed functions and collecting nbnd_proc(dfftp%mype+1)
! ... functions in each processor.
! ... start_nbnd_proc(dfftp%mype+1), says where the data for each processor
! ... start in the distributed variable
! ... COMPLEX*16 f_in = distributed variable (nrxx,nbnd)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! ... COMPLEX*16 f_out = gathered variable (nr1x*nr2x*nr3x,nbnd_proc(dfftp%mype+1))
!
IMPLICIT NONE
!
TYPE (fft_type_descriptor), INTENT(in) :: dfftp
INTEGER :: nbnd, nbnd_proc(dfftp%nproc), start_nbnd_proc(dfftp%nproc)
COMPLEX(DP) :: f_in(dfftp%nnr,nbnd)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
COMPLEX(DP) :: f_out(dfftp%nr1x*dfftp%nr2x*dfftp%nr3x,nbnd_proc(dfftp%mype+1))
COMPLEX(DP), ALLOCATABLE :: f_aux(:)
!
#if defined(__MPI)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER :: proc_, proc2_, proc3_, proc, info, offset_in, offset_aux, ir3, nr2px
INTEGER :: ibnd, jbnd, iii
INTEGER :: displs(0:dfftp%nproc-1), recvcount(0:dfftp%nproc-1)
!
!
CALL start_clock( 'cgather' )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ALLOCATE ( f_aux(dfftp%nr1x * dfftp%nr2x * dfftp%my_nr3p ) )
!
CALL MPI_BARRIER( dfftp%comm, info )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
f_out = (0.d0,0.d0)
!write (*,*) 'enter cgather ', nbnd
!write (*,*) nbnd_proc
!write (*,*) start_nbnd_proc
!write (*,*) 'dfftp%nproc',dfftp%nproc
!do ibnd =1,3
!write (*,*) 'evc = ',ibnd
!write (*,*) f_in(1:3,ibnd)
!end do
nr2px = MAXVAL ( dfftp%nr2p ) ! maximum number of Y values to be disributed
DO proc_ = 0, dfftp%nproc - 1
! define the processor index of the two sub-communicators
proc2_ = dfftp%iproc2(proc_ + 1) -1 ; proc3_ = dfftp%iproc3(proc_ + 1) -1
!write (*,*) ' proc_, proc2_, proc3_, dfftp%mype2', proc_, proc2_, proc3_, dfftp%mype2
DO ibnd = 1, nbnd_proc(proc_ +1)
jbnd = start_nbnd_proc(proc_ +1) + ibnd - 1
!write (*,*) ' proc_, ibnd, jbnd ', proc_, ibnd, jbnd
f_aux(:) = (0.d0,0.d0)
! 1) gather within the comm2 communicator
displs = 0
DO proc =0, ( dfftp%nproc2 -1 )
recvcount(proc) = 2 * dfftp%nr1x * dfftp%nr2p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + recvcount(proc-1)
end do
!write (*,*) 'dfftp%nr1x ', dfftp%nr1x
!write (*,*) 'dfftp%nr2x ', dfftp%nr2x
!write (*,*) 'dfftp%nr3x ', dfftp%nr3x
!write (*,*) 'dfftp%nr2p ', dfftp%nr2p, ' nnr3px ', nr2px
!write (*,*) 'recvcount ', recvcount(0:dfftp%nproc2 -1)
!write (*,*) 'displs ', displs(0:dfftp%nproc2 -1)
offset_in = 1; offset_aux = 1
do ir3 = 1, dfftp%my_nr3p
info = 0
CALL MPI_GATHERV( f_in(offset_in,jbnd), recvcount(dfftp%mype2), MPI_DOUBLE_PRECISION, &
f_aux(offset_aux), recvcount, displs, MPI_DOUBLE_PRECISION, &
proc2_, dfftp%comm2, info )
CALL fftx_error__( 'cgather_sym_many', 'info<>0', info )
offset_in = offset_in + dfftp%nr1x * dfftp%my_nr2p
offset_aux = offset_aux + dfftp%nr1x * dfftp%nr2x
end do
!write(*,*) ' -> f_in(...+1:...+3,jbnd) ',jbnd
!offset_in = 0
!do iii =1,2*dfftp%nr2x
! write(*,'(i4,3("(",2f10.7,") "))') iii, f_in(offset_in+1:offset_in+3, jbnd)
! offset_in = offset_in + dfftp%nr1x
!enddo
IF (dfftp%mype2==proc2_) THEN
! write(*,*) ' -> f_aux(...+1:...+3)'
! offset_aux = 0
! do iii =1,4*dfftp%nr2x
! write(*,'(i4,3("(",2f10.7,") "))') iii, f_aux(offset_aux+1:offset_aux+3)
! offset_aux = offset_aux + dfftp%nr1x
! enddo
! write(*,*) ' -> F_AUX(...+1:...+3)'
! offset_aux = 0
! do iii =1,dfftp%my_nr3p
! write(*,'(i4,3("(",2f10.7,") "))') iii, f_aux(offset_aux+1:offset_aux+3)
! offset_aux = offset_aux + dfftp%nr1x*dfftp%nr2x
! enddo
! 2) gather within the comm3 communicator
displs = 0
DO proc = 0, ( dfftp%nproc3 - 1 )
recvcount(proc) = 2 * dfftp%nr1x * dfftp%nr2x * dfftp%nr3p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + recvcount(proc-1)
ENDDO
info = 0
CALL MPI_GATHERV( f_aux, recvcount(dfftp%mype3), MPI_DOUBLE_PRECISION, &
f_out(1,ibnd), recvcount, displs, MPI_DOUBLE_PRECISION, &
proc3_, dfftp%comm3, info )
CALL fftx_error__( 'cgather_sym_many', 'info<>0', info )
! IF (dfftp%mype3==proc3_) THEN
! write(*,*) ' -> f_out(...+1:...+3,ibnd) ',ibnd
! offset_in = 0
! do iii =1,dfftp%nr3x
! write(*,'(i4,3("(",2f10.7,") "))') iii, f_out(offset_in+1:offset_in+3, ibnd)
! offset_in = offset_in + dfftp%nr1x*dfftp%nr2x
! enddo
! END IF
END IF
END DO
END DO
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DEALLOCATE ( f_aux )
!
CALL stop_clock( 'cgather' )
!
#else
CALL fftx_error__('cgather_sym_many', 'do not use in serial execution', 1)
#endif
!
RETURN
!
END SUBROUTINE cgather_sym_many
!
!----------------------------------------------------------------------------
SUBROUTINE cscatter_sym_many( dfftp, f_in, f_out, target_ibnd, nbnd, nbnd_proc, &
start_nbnd_proc )
!----------------------------------------------------------------------------
!
! ... Written by A. Dal Corso
!
! ... generalizes cscatter_sym. It assumes that each processor has
! ... a certain number of bands (nbnd_proc(dfftp%mype+1)). The processor
! ... that has target_ibnd scatters it to all the other processors
! ... that receive a distributed part of the target function.
! ... start_nbnd_proc(dfftp%mype+1) is used to identify the processor
! ... that has the required band
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
! ... COMPLEX*16 f_in = gathered variable (nr1x*nr2x*nr3x, nbnd_proc(dfftp%mype+1) )
! ... COMPLEX*16 f_out = distributed variable (nrxx)
!
IMPLICIT NONE
!
TYPE (fft_type_descriptor), INTENT(in) :: dfftp
INTEGER :: nbnd, nbnd_proc(dfftp%nproc), start_nbnd_proc(dfftp%nproc)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
COMPLEX(DP) :: f_in(dfftp%nr1x*dfftp%nr2x*dfftp%nr3x,nbnd_proc(dfftp%mype+1))
COMPLEX(DP) :: f_out(dfftp%nnr)
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
COMPLEX(DP), ALLOCATABLE :: f_aux(:)
INTEGER :: target_ibnd
!
#if defined(__MPI)
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
INTEGER :: proc_, proc2_, proc3_, proc, info, offset_out, offset_aux, ir3
INTEGER :: displs(0:dfftp%nproc-1), sendcount(0:dfftp%nproc-1)
INTEGER :: ibnd, jbnd
!
CALL start_clock( 'cscatter_sym' )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
ALLOCATE ( f_aux(dfftp%nr1x * dfftp%nr2x * dfftp%my_nr3p ) )
!
f_out = (0.0_DP, 0.0_DP)
!
CALL MPI_BARRIER( dfftp%comm, info )
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DO proc_ = 0, dfftp%nproc - 1
! define the processor index of the two sub-communicators
proc2_ = dfftp%iproc2(proc_ + 1) -1 ; proc3_ = dfftp%iproc3(proc_ + 1) -1
DO ibnd = 1, nbnd_proc(proc_+1)
jbnd = start_nbnd_proc(proc_+1) + ibnd - 1
IF (jbnd/=target_ibnd) CYCLE
IF (dfftp%mype2==proc2_) THEN
! 1) scatter within the comm3 communicator
displs = 0
DO proc = 0, ( dfftp%nproc3 - 1 )
sendcount(proc) = 2 * dfftp%nr1x * dfftp%nr2x * dfftp%nr3p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + sendcount(proc-1)
ENDDO
info = 0
CALL MPI_SCATTERV( f_in(1,ibnd), sendcount, displs, MPI_DOUBLE_PRECISION, &
f_aux, sendcount(dfftp%mype3), MPI_DOUBLE_PRECISION, &
proc3_, dfftp%comm3, info )
ELSE
f_aux=(0.d0,0.d0)
END IF
! 2) scatter within the comm2 communicator
displs = 0 ; f_out = 0.0D0
DO proc =0, ( dfftp%nproc2 -1 )
sendcount(proc) = 2 * dfftp%nr1x * dfftp%nr2p(proc+1)
if (proc > 0) displs(proc) = displs(proc-1) + sendcount(proc-1)
end do
offset_out = 1 ; offset_aux = 1
do ir3 = 1, dfftp%my_nr3p
info = 0
CALL MPI_SCATTERV( f_aux(offset_aux), sendcount, displs, MPI_DOUBLE_PRECISION, &
f_out(offset_out), sendcount(dfftp%mype2), MPI_DOUBLE_PRECISION, &
proc2_, dfftp%comm2, info )
CALL fftx_error__( 'gather_grid', 'info<>0', info )
offset_out = offset_out + dfftp%nr1x * dfftp%my_nr2p
offset_aux = offset_aux + dfftp%nr1x * dfftp%nr2x
end do
ENDDO
ENDDO
!
MAJOR restructuring of the FFTXlib library In real space processors are organized in a 2D pattern. Each processor owns data from a sub-set of Z-planes and a sub-set of Y-planes. In reciprocal space each processor owns Z-columns that belong to a sub set of X-values. This allows to split the processors in two sets for communication in the YZ and XY planes. In alternative, if the situation allows for it, a task group paralelization is used (with ntg=nyfft) where complete XY planes of ntg wavefunctions are collected and Fourier trasnformed in G space by different task-groups. This is preferable to the Z-proc + Y-proc paralleization if task group can be used because a smaller number of larger ammounts of data are transferred. Hence three types of fft are implemented: ! !! ... isgn = +-1 : parallel 3d fft for rho and for the potential ! !! ... isgn = +-2 : parallel 3d fft for wavefunctions ! !! ... isgn = +-3 : parallel 3d fft for wavefunctions with task group ! !! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R) !! ... fft along z using pencils (cft_1z) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along x using pencils (cft_1x) ! !! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega !! ... fft along x using pencils (cft_1x) !! ... transpose across nodes (fft_scatter_xy) !! ... fft along y using pencils (cft_1y) !! ... transpose across nodes (fft_scatter_yz) !! ... fft along z using pencils (cft_1z) ! ! If task_group_fft_is_active the FFT acts on a number of wfcs equal to ! dfft%nproc2, the number of Y-sections in which a plane is divided. ! Data are reshuffled by the fft_scatter_tg routine so that each of the ! dfft%nproc2 subgroups (made by dfft%nproc3 procs) deals with whole planes ! of a single wavefunciton. ! fft_type module heavily modified, a number of variables renamed with more intuitive names (at least to me), a number of more variables introduced for the Y-proc parallelization. Task_group module made void. task_group management is now reduced to the logical component fft_desc%have_task_groups of fft_type_descriptor type variable fft_desc. In term of interfaces, the 'easy' calling sequences are SUBROUTINE invfft/fwfft( grid_type, f, dfft, howmany ) !! where: !! !! **grid_type = 'Dense'** : !! inverse/direct fourier transform of potentials and charge density f !! on the dense grid (dfftp). On output, f is overwritten !! !! **grid_type = 'Smooth'** : !! inverse/direct fourier transform of potentials and charge density f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Wave'** : !! inverse/direct fourier transform of wave functions f !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'tgWave'** : !! inverse/direct fourier transform of wave functions f with task group !! on the smooth grid (dffts). On output, f is overwritten !! !! **grid_type = 'Custom'** : !! inverse/direct fourier transform of potentials and charge density f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **grid_type = 'CustomWave'** : !! inverse/direct fourier transform of wave functions f !! on a custom grid (dfft_exx). On output, f is overwritten !! !! **dfft = FFT descriptor**, IMPORTANT NOTICE: grid is specified only by dfft. !! No check is performed on the correspondence between dfft and grid_type. !! grid_type is now used only to distinguish cases 'Wave' / 'CustomWave' !! from all other cases Many more files modified. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13676 c92efa57-630b-4861-b058-cf58834340f0
2017-08-02 04:31:02 +08:00
DEALLOCATE ( f_aux )
!
CALL stop_clock( 'cscatter_sym' )
!
#else
CALL fftx_error__('cscatter_sym_many', 'do not use in serial execution', 1)
#endif
!
RETURN
!
END SUBROUTINE cscatter_sym_many
!=----------------------------------------------------------------------=!
END MODULE scatter_mod
!=----------------------------------------------------------------------=!
!
!
!---------------------------------------------------------------------
subroutine fftsort (n, ia)
!---------------------------------------------------------------------
! sort an integer array ia(1:n) into ascending order using heapsort algorithm.
! n is input, ia is replaced on output by its sorted rearrangement.
! create an index table (ind) by making an exchange in the index array
! whenever an exchange is made on the sorted data array (ia).
! in case of equal values in the data array (ia) the values in the
! index array (ind) are used to order the entries.
! if on input ind(1) = 0 then indices are initialized in the routine,
! if on input ind(1) != 0 then indices are assumed to have been
! initialized before entering the routine and these
! indices are carried around during the sorting process
!
! no work space needed !
! free us from machine-dependent sorting-routines !
!
! adapted from Numerical Recipes pg. 329 (new edition)
!
implicit none
!-input/output variables
integer :: n
integer :: ia (2,n)
!-local variables
integer :: i, ir, j, l
integer :: iia(2)
! nothing to order
if (n.lt.2) return
! initialize indices for hiring and retirement-promotion phase
l = n / 2 + 1
ir = n
10 continue
! still in hiring phase
if (l.gt.1) then
l = l - 1
iia(:) = ia (:,l)
! in retirement-promotion phase.
else
! clear a space at the end of the array
iia(:) = ia (:,ir)
!
! retire the top of the heap into it
ia (:,ir) = ia (:,1)
!
! decrease the size of the corporation
ir = ir - 1
! done with the last promotion
if (ir.eq.1) then
! the least competent worker at all !
ia (:,1) = iia(:)
!
return
endif
endif
! wheter in hiring or promotion phase, we
i = l
! set up to place iia in its proper level
j = l + l
!
do while (j.le.ir)
if (j.lt.ir) then
if (ia (1,j) .lt. ia (1,j + 1) ) then
j = j + 1
endif
endif
! demote iia
if (iia(1).lt.ia (1,j) ) then
ia (:,i) = ia (:,j)
i = j
j = j + j
else
! set j to terminate do-while loop
j = ir + 1
endif
enddo
ia (:,i) = iia(:)
goto 10
!
end subroutine fftsort