mirror of https://gitlab.com/QEF/q-e.git
Added workaround for a bug in MKL causing the code to crash in BFGS. Fixes #15
This commit is contained in:
parent
a175a3fd53
commit
2221882c19
|
@ -25,6 +25,9 @@ MODULE matrix_inversion
|
|||
! if "da" is specified and if the matrix is dimensioned 3x3,
|
||||
! it also returns the determinant in "da"
|
||||
!
|
||||
#if defined(_OPENMP)
|
||||
USE omp_lib
|
||||
#endif
|
||||
USE kinds, ONLY : DP
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(in) :: n
|
||||
|
@ -41,6 +44,27 @@ MODULE matrix_inversion
|
|||
! more work space
|
||||
INTEGER, SAVE :: lworkfact = 64
|
||||
!
|
||||
! WORKAROUND STARTS ==================================================
|
||||
!
|
||||
! Comment taken from v6.1 (https://github.com/fspiga/qe-gpu)
|
||||
!
|
||||
! There is a bug in several versions of MKL that will cause an hang in the multithreaded DGEMM for AVX2.
|
||||
! To avoid the bug, we have two options, set the number of MKL threads to one or force to use AVX instead of AVX2.
|
||||
! To force the single threads, we need to read the current number of threads with numt=mkl_get_max_threads(), set it
|
||||
! temporarely to one with "call mkl_set_num_threads(1)" and then resetting it to the original numt at the end of the function.
|
||||
! To force AVX, we can call mkl_cbwr_set(MKL_CBWR_AVX).
|
||||
!
|
||||
! There is currently no way to check if MKL is used for LA.
|
||||
! Since the size of the matrix to be inverted in PW is generally small,
|
||||
! we disable OpenMP in the calls below for the time being.
|
||||
!
|
||||
#ifdef _OPENMP
|
||||
INTEGER :: num_threads
|
||||
num_threads=omp_get_max_threads()
|
||||
CALL omp_set_num_threads(1)
|
||||
#endif
|
||||
! WORKAROUND ENDS ====================================================
|
||||
!
|
||||
IF ( PRESENT(da) ) THEN
|
||||
IF ( n == 3 ) THEN
|
||||
da = a(1,1)*(a(2,2)*a(3,3)-a(2,3)*a(3,2)) + &
|
||||
|
@ -72,13 +96,23 @@ MODULE matrix_inversion
|
|||
!
|
||||
lworkfact = INT (work(1)/n)
|
||||
DEALLOCATE ( work, ipiv )
|
||||
|
||||
! WORKAROUND STARTS ==================================================
|
||||
!
|
||||
! ... and now restrore the previous value.
|
||||
!
|
||||
#ifdef _OPENMP
|
||||
CALL omp_set_num_threads(num_threads)
|
||||
#endif
|
||||
! WORKAROUND ENDS ====================================================
|
||||
END SUBROUTINE invmat_r
|
||||
|
||||
SUBROUTINE invmat_c (n, a, a_inv, da)
|
||||
!-----------------------------------------------------------------------
|
||||
! as invmat_r, for a complex matrix
|
||||
!
|
||||
#if defined(_OPENMP)
|
||||
USE omp_lib
|
||||
#endif
|
||||
USE kinds, ONLY : DP
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(IN) :: n
|
||||
|
@ -95,6 +129,27 @@ MODULE matrix_inversion
|
|||
! more work space
|
||||
INTEGER, SAVE :: lworkfact = 64
|
||||
!
|
||||
! WORKAROUND STARTS ==================================================
|
||||
!
|
||||
! Comment taken from v6.1 (https://github.com/fspiga/qe-gpu)
|
||||
!
|
||||
! There is a bug in several versions of MKL that will cause an hang in the multithreaded DGEMM for AVX2.
|
||||
! To avoid the bug, we have two options, set the number of MKL threads to one or force to use AVX instead of AVX2.
|
||||
! To force the single threads, we need to read the current number of threads with numt=mkl_get_max_threads(), set it
|
||||
! temporarely to one with "call mkl_set_num_threads(1)" and then resetting it to the original numt at the end of the function.
|
||||
! To force AVX, we can call mkl_cbwr_set(MKL_CBWR_AVX).
|
||||
!
|
||||
! There is currently no way to check if MKL is used for LA.
|
||||
! Since the size of the matrix to be inverted in PW is generally small,
|
||||
! we disable OpenMP in the calls below for the time being.
|
||||
!
|
||||
#ifdef _OPENMP
|
||||
INTEGER :: num_threads
|
||||
num_threads=omp_get_max_threads()
|
||||
CALL omp_set_num_threads(1)
|
||||
#endif
|
||||
! WORKAROUND ENDS ====================================================
|
||||
!
|
||||
IF ( PRESENT(da) ) THEN
|
||||
IF (n == 3) THEN
|
||||
da = a(1,1)*(a(2,2)*a(3,3)-a(2,3)*a(3,2)) + &
|
||||
|
@ -127,6 +182,15 @@ MODULE matrix_inversion
|
|||
lworkfact = INT (work(1)/n)
|
||||
DEALLOCATE ( work, ipiv )
|
||||
!
|
||||
! WORKAROUND STARTS ==================================================
|
||||
!
|
||||
! ... and now restrore the previous value.
|
||||
!
|
||||
#ifdef _OPENMP
|
||||
CALL omp_set_num_threads(num_threads)
|
||||
#endif
|
||||
! WORKAROUND ENDS ====================================================
|
||||
!
|
||||
END SUBROUTINE invmat_c
|
||||
|
||||
END MODULE matrix_inversion
|
||||
|
|
Loading…
Reference in New Issue