mirror of https://github.com/QMCPACK/qmcpack.git
remove multi subtree
This commit is contained in:
parent
a3336246fb
commit
fc018300d2
|
@ -1,3 +0,0 @@
|
|||
build*
|
||||
.build*
|
||||
|
|
@ -1,657 +0,0 @@
|
|||
# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;-*-
|
||||
# © Alfredo A. Correa 2020-2021
|
||||
|
||||
image: debian:testing
|
||||
|
||||
g++-latest:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++"
|
||||
- apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build .
|
||||
- ctest --output-on-failure
|
||||
- make install
|
||||
|
||||
g++-latest-std20:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_CXX_STANDARD=20
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
g++-current-debug:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_BUILD_TYPE=Debug
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
g++-current-asan:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- CXXFLAGS="-fsanitize=address" cmake .. -DCMAKE_BUILD_TYPE=Debug
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
g++-current-codecov:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev curl gcovr lcov
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- CXXFLAGS="-ftest-coverage -fprofile-arcs --coverage -fno-inline -fno-inline-small-functions -fno-default-inline" cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXE_LINKER_FLAGS="-lgcov --coverage" -DCMAKE_BUILD_TYPE=Debug
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure -T Test -T Coverage
|
||||
- gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR}
|
||||
- lcov --directory . --capture --output-file coverage.info
|
||||
- lcov --remove coverage.info '/usr/*' --output-file coverage.info
|
||||
- lcov --list coverage.info
|
||||
coverage: /^\s*lines:\s*\d+.\d+\%/
|
||||
artifacts:
|
||||
name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA}
|
||||
expire_in: 2 days
|
||||
reports:
|
||||
cobertura: build/coverage.xml
|
||||
|
||||
g++-current-release:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
g++-7-std17:
|
||||
stage: test
|
||||
image: debian:stable-backports
|
||||
script:
|
||||
- export CXX="g++-7"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_CXX_STANDARD=17
|
||||
- cmake --build .
|
||||
- ctest --output-on-failure
|
||||
|
||||
g++-8:
|
||||
stage: test
|
||||
image: debian:stable-backports
|
||||
script:
|
||||
- export CXX="g++-8"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build .
|
||||
- ctest --output-on-failure
|
||||
|
||||
g++-9:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++-9"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
g++-9-std17:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++-9"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_CXX_STANDARD=17
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
g++-9-openblas:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++-9"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libopenblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
memcheck:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++-9"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet $CXX valgrind cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build . --verbose
|
||||
- ctest -T memcheck --output-on-failure
|
||||
|
||||
clang++-9:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="clang++-9"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet clang-9 cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
clang++-9-std17:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="clang++-9"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet clang-9 cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_CXX_STANDARD=17
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
clang++-9-asan:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="clang++-9"
|
||||
- export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-9/bin/llvm-symbolizer
|
||||
- export CXXFLAGS="-fsanitize=undefined -fsanitize=address -fno-omit-frame-pointer"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet clang-9 llvm-9 cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev llvm
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build . --verbose
|
||||
- export ASAN_OPTIONS="halt_on_error=1 detect_leaks=1"
|
||||
- ctest --output-on-failure
|
||||
|
||||
# clang 10 and clang 11 have a bug when compiling in c++17 mode
|
||||
clang++:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="clang++"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet clang cmake make libboost-test-dev libblas-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build . --verbose -- --quiet --no-print-directory
|
||||
- ctest --output-on-failure
|
||||
|
||||
clang++-tidy:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="clang++"
|
||||
- apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet clang clang-tidy cmake make libboost-test-dev libblas-dev libblas-dev liblapack-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- clang-tidy --version
|
||||
- mkdir build && cd build
|
||||
- clang-tidy -checks=*,-fuchsia-default-arguments-calls,-fuchsia-statically-constructed-objects,-cppcoreguidelines-pro-type-vararg,-hicpp-vararg,-cppcoreguidelines-avoid-magic-numbers,-readability-magic-numbers,-cppcoreguidelines-macro-usage,-cppcoreguidelines-avoid-non-const-global-variables,-llvmlibc-implementation-in-namespace,-llvmlibc-callee-namespace,-llvmlibc-restrict-system-libc-headers,-cert-err58-cpp --warnings-as-errors=* --list-checks
|
||||
- cmake .. -DCMAKE_CXX_CLANG_TIDY="clang-tidy;-checks=*,-fuchsia-default-arguments-calls,-fuchsia-statically-constructed-objects,-fuchsia-overloaded-operator,-cppcoreguidelines-pro-type-vararg,-hicpp-vararg,-cppcoreguidelines-avoid-magic-numbers,-readability-magic-numbers,-cppcoreguidelines-macro-usage,-cppcoreguidelines-avoid-non-const-global-variables,-llvmlibc-implementation-in-namespace,-llvmlibc-callee-namespace,-llvmlibc-restrict-system-libc-headers,-cert-err58-cpp;--warnings-as-errors=*"
|
||||
- cmake --build . --verbose -- --quiet --no-print-directory
|
||||
- ctest --output-on-failure
|
||||
|
||||
clang++-std17:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="clang++"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet clang cmake make libboost-test-dev libblas-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_CXX_STANDARD=17
|
||||
- cmake --build . --verbose -- --quiet --no-print-directory
|
||||
- ctest --output-on-failure
|
||||
|
||||
clang++-11:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="clang++-11"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet clang-11 cmake make libboost-test-dev libblas-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build . --verbose -- --quiet --no-print-directory
|
||||
- ctest --output-on-failure
|
||||
|
||||
clang++-11-gcc9:
|
||||
stage: test
|
||||
image: vistart/cuda:10.2-ubuntu20.04
|
||||
script:
|
||||
- export CXX="clang++-11"
|
||||
- apt-get update
|
||||
- DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y --quiet gcc-9 clang-11 cmake make libboost-test-dev libblas-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX -v
|
||||
- mkdir build && cd build
|
||||
- cmake ..
|
||||
- cmake --build . --verbose -- --quiet --no-print-directory
|
||||
- ctest --output-on-failure
|
||||
|
||||
clang++-11-std17:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="clang++-11"
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet clang-11 cmake make libboost-test-dev libblas-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_CXX_STANDARD=17
|
||||
- cmake --build . --verbose -- --quiet --no-print-directory
|
||||
- ctest --output-on-failure
|
||||
|
||||
#icc:
|
||||
# image: meteocima/dkr-intel
|
||||
# stage: test
|
||||
# script:
|
||||
# - . /opt/intel/bin/compilervars.sh intel64
|
||||
# - export CXX="icpc"
|
||||
# - apt-get update && apt-get install --no-install-recommends -y --quiet cmake make libboost-test-dev libblas-dev liblapack-dev libfftw3-dev
|
||||
# - ln --symbolic --force . ../multi
|
||||
# - $CXX -v
|
||||
# - mkdir build && cd build
|
||||
# - cmake .. -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
|
||||
# - cmake --build . -- --quiet --no-print-directory
|
||||
# - export MKL_VERBOSE=1
|
||||
# - ctest --output-on-failure
|
||||
|
||||
#icc-nomkl:
|
||||
# image: meteocima/dkr-intel
|
||||
# stage: test
|
||||
# script:
|
||||
# - export CXX="/opt/intel/bin/icpc"
|
||||
# - apt-get update && apt-get install --no-install-recommends -y --quiet cmake make libboost-test-dev libblas-dev liblapack-dev libfftw3-dev
|
||||
# - ln --symbolic --force . ../multi
|
||||
# - $CXX -v
|
||||
# - mkdir build && cd build
|
||||
# - cmake .. -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
|
||||
# - cmake --build . -- --quiet --no-print-directory
|
||||
# - export MKL_VERBOSE=1
|
||||
# - ctest --output-on-failure
|
||||
#
|
||||
#icc-memcheck:
|
||||
# image: meteocima/dkr-intel
|
||||
# stage: test
|
||||
# script:
|
||||
# - . /opt/intel/bin/compilervars.sh intel64
|
||||
# - export CXX="icpc"
|
||||
# - apt-get update && apt-get install --no-install-recommends -y --quiet cmake make valgrind libboost-test-dev libblas-dev liblapack-dev libfftw3-dev
|
||||
# - ln --symbolic --force . ../multi
|
||||
# - $CXX -v
|
||||
# - mkdir build && cd build
|
||||
# - cmake .. -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DMEMORYCHECK_COMMAND_OPTIONS="--leak-check=full --show-reachable=yes --track-origins=yes --malloc-fill=0xEE --free-fill=0xFF --leak-check-heuristics=none -v --track-fds=yes --error-limit=no --show-below-main=yes --read-var-info=yes --gen-suppressions=all"
|
||||
# - cmake --build . -- --quiet --no-print-directory
|
||||
# - export MKL_VERBOSE=1
|
||||
# - ctest -T memcheck --output-on-failure || (cat Testing/Temporary/MemoryChecker.*.log && exit 0)
|
||||
|
||||
#icc-std17:
|
||||
# image: meteocima/dkr-intel
|
||||
# stage: test
|
||||
# script:
|
||||
# - . /opt/intel/bin/compilervars.sh intel64
|
||||
# - export CXX="icpc"
|
||||
# - apt-get update && apt-get install --no-install-recommends -y --quiet cmake make libboost-test-dev libblas-dev liblapack-dev libfftw3-dev
|
||||
# - ln --symbolic --force . ../multi
|
||||
# - $CXX -v
|
||||
# - mkdir build && cd build
|
||||
# - cmake .. -DCMAKE_CXX_STANDARD=17 -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
|
||||
# - cmake --build . -- --quiet --no-print-directory
|
||||
# - export MKL_VERBOSE=1
|
||||
# - ctest --output-on-failure
|
||||
|
||||
cuda-10.0:
|
||||
image: vistart/cuda:10.2-ubuntu20.04
|
||||
stage: test
|
||||
script:
|
||||
- export DEBIAN_FRONTEND=noninteractive
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet gcc-8 g++-8 cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- nvcc --version
|
||||
- g++-8 --version
|
||||
- cd test
|
||||
- mkdir build && cd build
|
||||
- cmake -DENABLE_CUDA=1 -DCMAKE_CUDA_FLAGS="-ccbin=g++-8" ..
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
cuda-11.0:
|
||||
image: nvidia/cuda:11.0-devel
|
||||
stage: test
|
||||
script:
|
||||
- export DEBIAN_FRONTEND=noninteractive
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- nvcc --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DENABLE_CUDA=1
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
cuda-11.1:
|
||||
image: nvidia/cuda:11.1-devel
|
||||
stage: test
|
||||
script:
|
||||
- export DEBIAN_FRONTEND=noninteractive
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- nvcc --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DENABLE_CUDA=1
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
cuda-11.0-std17:
|
||||
image: nvidia/cuda:11.0-devel
|
||||
stage: test
|
||||
script:
|
||||
- export DEBIAN_FRONTEND=noninteractive
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- nvcc --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_CXX_STANDARD=17 -DENABLE_CUDA=1
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
cuda-11.1-std17:
|
||||
image: nvidia/cuda:11.1-devel
|
||||
stage: test
|
||||
script:
|
||||
- export DEBIAN_FRONTEND=noninteractive
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- nvcc --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_CXX_STANDARD=17 -DENABLE_CUDA=1
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
cuda-11.2-std17:
|
||||
image: nvidia/cuda:11.2.0-devel
|
||||
stage: test
|
||||
script:
|
||||
- export DEBIAN_FRONTEND=noninteractive
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet cmake make libboost-test-dev libboost-timer-dev libblas-dev libfftw3-dev
|
||||
- ln --symbolic --force . ../multi
|
||||
- nvcc --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_CXX_STANDARD=17 -DENABLE_CUDA=1
|
||||
- cmake --build . --verbose
|
||||
- ctest --output-on-failure
|
||||
|
||||
g++-cppcheck:
|
||||
stage: test
|
||||
script:
|
||||
- export CXX="g++"
|
||||
- apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet $CXX cmake make libboost-test-dev libboost-timer-dev libblas-dev liblapack-dev libfftw3-dev cppcheck
|
||||
- ln --symbolic --force . ../multi
|
||||
- $CXX --version
|
||||
- cppcheck --version
|
||||
- mkdir build && cd build
|
||||
- cmake -DCMAKE_CXX_CPPCHECK="cppcheck;--enable=all;--suppress=missingIncludeSystem;--suppress=unmatchedSuppression;--suppress=missingInclude;--inline-suppr;-D__align__;-DCUDARTAPI;--language=c++;--std=c++17;--error-exitcode=666" ..
|
||||
- cmake --build .
|
||||
- ctest --output-on-failure
|
||||
|
||||
qmcpack-g++:
|
||||
stage: test
|
||||
script:
|
||||
- apt-get -qq update && apt-get -qq install --no-install-recommends -y libblas-dev liblapack-dev libfftw3-dev libboost-serialization-dev libopenmpi-dev gfortran g++ cmake make git ca-certificates numdiff python3 python3-numpy python3-h5py python3-mpi4py python3-scipy libxml2-dev libhdf5-dev
|
||||
- git clone https://github.com/QMCPACK/qmcpack.git
|
||||
- cd qmcpack
|
||||
- git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa"
|
||||
- git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree"
|
||||
- git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_BRANCH # e.g. https://gitlab.com/correaa/boost-multi.git
|
||||
- cd build
|
||||
- cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DBUILD_PPCONVERT=1 -DQMC_MIXED_PRECISION=1 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-Werror" -DMPIEXEC_PREFLAGS="--allow-run-as-root;--bind-to;none" ..
|
||||
- make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance
|
||||
- ctest -R ppconvert --output-on-failure
|
||||
- ctest -R afqmc --output-on-failure
|
||||
|
||||
qmcpack-cuda-11.2-compileonly:
|
||||
image: nvidia/cuda:11.2.0-devel
|
||||
stage: test
|
||||
script:
|
||||
- export DEBIAN_FRONTEND=noninteractive
|
||||
- apt-get -qq update && apt-get -qq install --no-install-recommends -y libblas-dev liblapack-dev libfftw3-dev libboost-serialization-dev libopenmpi-dev gfortran g++ cmake make git ca-certificates numdiff python3 python3-numpy python3-h5py python3-mpi4py python3-scipy libxml2-dev libhdf5-dev
|
||||
- git clone https://github.com/QMCPACK/qmcpack.git
|
||||
- cd qmcpack
|
||||
- git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa"
|
||||
- git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree"
|
||||
- git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_BRANCH # e.g. https://gitlab.com/correaa/boost-multi.git
|
||||
- cd build
|
||||
- cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DBUILD_PPCONVERT=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 ..
|
||||
- make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance
|
||||
- ctest -R ppconvert --output-on-failure
|
||||
|
||||
inq-g++-latest:
|
||||
stage: test
|
||||
script:
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-serialization-dev libopenmpi-dev gfortran g++ pkg-config cmake make git ca-certificates
|
||||
- git clone --recurse-submodules --remote-submodules https://gitlab.com/npneq/inq.git
|
||||
- cd inq
|
||||
- cd external_libs/multi
|
||||
- git checkout $CI_COMMIT_BRANCH
|
||||
- cd ../..
|
||||
- mkdir build && cd build
|
||||
- CXX=mpic++ ../configure --prefix=$HOME
|
||||
- make
|
||||
- make install
|
||||
- ctest --output-on-failure
|
||||
|
||||
inq-cuda-11.2-compileonly:
|
||||
image: nvidia/cuda:11.2.0-devel
|
||||
stage: test
|
||||
script:
|
||||
- export DEBIAN_FRONTEND=noninteractive
|
||||
- apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-serialization-dev libopenmpi-dev gfortran g++ pkg-config cmake make git ca-certificates
|
||||
- git clone --recurse-submodules https://gitlab.com/npneq/inq.git
|
||||
- cd inq
|
||||
- cd external_libs/multi
|
||||
- git checkout $CI_COMMIT_BRANCH
|
||||
- cd ../..
|
||||
- mkdir build && cd build
|
||||
- export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||
- export CUDAFLAGS="$(for x in `mpic++ --showme:incdirs`; do echo -n -I$x" " ; done) -std=c++17 -DFMT_USE_UDL_TEMPLATE=0 -D_DISABLE_CUDA_SLOW -O0 --gpu-architecture sm_70 --expt-relaxed-constexpr --expt-extended-lambda --Werror=cross-execution-space-call --compiler-options -std=c++17,-O0,-Wall,-Wfatal-errors"
|
||||
- export LDFLAGS=$(for x in `mpic++ --showme:libdirs`; do echo -n -L$x" " ; done)
|
||||
- export LIBS=$(for x in `mpic++ --showme:libs`; do echo -n -l$x" " ; done)
|
||||
- $CUDACXX -V
|
||||
- ../configure --prefix=$PREFIX --enable-cuda --with-cuda-prefix=/usr/local/cuda
|
||||
- make silicon
|
||||
|
||||
g++-codecov-runner:
|
||||
stage: test
|
||||
tags:
|
||||
- intel_compiler
|
||||
script:
|
||||
- export CXX="g++"
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake --version
|
||||
- CXXFLAGS="-ftest-coverage -fprofile-arcs --coverage" cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_EXE_LINKER_FLAGS="-lgcov --coverage"
|
||||
- cmake --build . -j 12
|
||||
- ctest --output-on-failure -T Test -T Coverage # - gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR}
|
||||
- lcov --directory . --capture --output-file coverage.info
|
||||
- lcov --remove coverage.info '/usr/*' --output-file coverage.info
|
||||
- lcov --list coverage.info
|
||||
- bash <(curl -s https://codecov.io/bash) -t 999feb5b-a599-4d02-b9c5-46d977247f3a || echo "Codecov did not collect coverage reports"
|
||||
|
||||
icpc-nomkl-runner:
|
||||
stage: test
|
||||
tags:
|
||||
- intel_compiler
|
||||
script:
|
||||
- export CXX="/opt/intel/system_studio_2020/bin/icpc"
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
|
||||
- cmake --build . -j 12
|
||||
- export MKL_VERBOSE=1
|
||||
- ctest --output-on-failure
|
||||
|
||||
icpc-runner:
|
||||
stage: test
|
||||
tags:
|
||||
- intel_compiler
|
||||
script:
|
||||
- . /opt/intel/system_studio_2020/bin/compilervars.sh intel64
|
||||
- export CXX="icpc"
|
||||
- $CXX --version
|
||||
- mkdir build && cd build
|
||||
- cmake .. -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
|
||||
- cmake --build . -j 12
|
||||
- export MKL_VERBOSE=1
|
||||
- ctest --output-on-failure
|
||||
|
||||
#icpc-memcheck-runner:
|
||||
# stage: test
|
||||
# tags:
|
||||
# - intel_compiler
|
||||
# script:
|
||||
# - . /opt/intel/system_studio_2020/bin/compilervars.sh intel64
|
||||
# - export CXX="icpc"
|
||||
# - $CXX --version
|
||||
# - mkdir build && cd build
|
||||
# - cmake .. -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
|
||||
# - cmake --build . -j 12
|
||||
# - ctest -T memcheck --output-on-failure || (cat Testing/Temporary/MemoryChecker.*.log && exit 0)
|
||||
|
||||
#icpc-std17-runner:
|
||||
# stage: test
|
||||
# tags:
|
||||
# - intel_compiler
|
||||
# script:
|
||||
# - . /opt/intel/system_studio_2020/bin/compilervars.sh intel64
|
||||
# - export CXX="icpc"
|
||||
# - $CXX --version
|
||||
# - mkdir build && cd build
|
||||
# - cmake .. -DCMAKE_CXX_STANDARD=17 -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
|
||||
# - cmake --build . -j 12
|
||||
# - export MKL_VERBOSE=1
|
||||
# - ctest --output-on-failure
|
||||
|
||||
inq-nvcc-ompi:
|
||||
stage: test
|
||||
tags:
|
||||
- intel_compiler
|
||||
script:
|
||||
- export PREFIX=`mktemp -d`
|
||||
- git clone --recurse-submodules https://gitlab.com/npneq/inq.git
|
||||
- cd inq
|
||||
- cd external_libs/multi
|
||||
- git checkout $CI_COMMIT_BRANCH
|
||||
- cd ../..
|
||||
- mkdir build && cd build
|
||||
- export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||
- export CUDAFLAGS="$(for x in `mpic++ --showme:incdirs`; do echo -n -I$x" " ; done) -std=c++17 -DFMT_USE_UDL_TEMPLATE=0 -D_DISABLE_CUDA_SLOW -O3 --gpu-architecture sm_70 --expt-relaxed-constexpr --expt-extended-lambda --Werror=cross-execution-space-call --compiler-options -Ofast,-std=c++17,-Wall,-Wfatal-errors"
|
||||
- export LDFLAGS=$(for x in `mpic++ --showme:libdirs`; do echo -n -L$x" " ; done)
|
||||
- export LIBS=$(for x in `mpic++ --showme:libs`; do echo -n -l$x" " ; done)
|
||||
- $CUDACXX -V
|
||||
- ../configure --prefix=$PREFIX --enable-cuda --with-cuda-prefix=/usr/local/cuda
|
||||
- make -j8
|
||||
- make -j8 install
|
||||
- ctest --output-on-failure --timeout 600
|
||||
- cd src; INQ_EXEC_ENV="mpirun --oversubscribe -np 4" ctest --output-on-failure --timeout 600; cd ..
|
||||
- rm -rf $PREFIX
|
||||
|
||||
#blas&fft:
|
||||
# stage: test
|
||||
# script:
|
||||
# - perl -pi -e 's/main/main\ contrib\ non-free/g' /etc/apt/sources.list
|
||||
# - apt update --quiet
|
||||
# - DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -f-assume-yes --quiet libboost-test-dev libboost-timer-dev libtbb-dev libboost-serialization-dev libboost-iostreams-dev librange-v3-dev valgrind
|
||||
# - DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends --assume-yes --quiet clang pkg-config libblas-dev libblas64-dev libfftw3-dev nvidia-cuda-toolkit
|
||||
# - ln --symbolic --force . ../multi
|
||||
# - export CXXX="clang++ -x c++"
|
||||
# - export CXXFLAGS="-Wall -Wextra -Wpedantic -O3 -lcudart -lfftw3 -lcublas -lcufft -lboost_timer -lboost_unit_test_framework `pkg-config --libs blas`"
|
||||
# - export CXX="${CXXX} ${CXXFLAGS}"
|
||||
# - $CXX --version
|
||||
# - cd adaptors/blas
|
||||
# - for a in ./*.hpp; do echo $a; $CXX $a || exit; done;
|
||||
# - cd tests
|
||||
# - for a in ./*.cpp; do echo $a; $CXX $a || exit; done;
|
||||
# - cd ..
|
||||
# - cd ../..
|
||||
# - cd adaptors
|
||||
# - sh ./fftw.hpp
|
||||
# - $CXX fft.hpp
|
||||
|
||||
#blas&fftGPU-11:
|
||||
# stage: build
|
||||
# tags:
|
||||
# - cuda_gpu
|
||||
# stage: test
|
||||
# script:
|
||||
# - export PATH=/usr/local/cuda-11.0/bin:$PATH #export PATH=/usr/local/cuda/bin:$PATH
|
||||
# - export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH
|
||||
# - export CXXX="clang++ -x c++"
|
||||
# - export CXXFLAGS="`#-Wall -Wextra -Wpedantic` -Ofast -Wl,-rpath=/usr/local/cuda/lib64 -L/usr/local/cuda-11.0/lib64 -I/usr/local/cuda-11.0/include -lcudart -lfftw3 -lcublas -lcufft -lboost_timer -lboost_unit_test_framework `pkg-config --libs blas` "
|
||||
# - export CXX="${CXXX} ${CXXFLAGS}"
|
||||
# - $CXX --version
|
||||
# - cd adaptors/blas
|
||||
# - for a in ./*.hpp; do echo $a; sh $a || exit; echo "\n"; done;
|
||||
# - cd tests
|
||||
# - for a in ./*.cpp; do echo $a; sh $a || exit; echo "\n"; done;
|
||||
# - cd ..
|
||||
# - cd ../..
|
||||
# - cd adaptors
|
||||
# - sh ./fftw.hpp
|
||||
# - sh ./fft.hpp
|
||||
|
||||
#blas&fftGPU:
|
||||
# stage: build
|
||||
# tags:
|
||||
# - cuda_gpu
|
||||
# stage: test
|
||||
# script:
|
||||
# - export PATH=/usr/local/cuda/bin:$PATH #export PATH=/usr/local/cuda/bin:$PATH
|
||||
# - export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
||||
# - export CXXX="clang++ -x c++"
|
||||
# - export CXXFLAGS="`#-Wall -Wextra -Wpedantic` -Ofast -Wl,-rpath=/usr/local/cuda/lib64 -L/usr/local/cuda/lib64 -I/usr/local/cuda/include -lcudart -lfftw3 -lcublas -lcufft -lboost_timer -lboost_unit_test_framework `pkg-config --libs blas` "
|
||||
# - export CXX="${CXXX} ${CXXFLAGS}"
|
||||
# - $CXX --version
|
||||
# - cd adaptors/blas
|
||||
# - for a in ./*.hpp; do echo $a; sh $a || exit; echo "\n"; done;
|
||||
# - cd tests
|
||||
# - for a in ./*.cpp; do echo $a; sh $a || exit; echo "\n"; done;
|
||||
# - cd ..
|
||||
# - cd ../..
|
||||
# - cd adaptors
|
||||
# - sh ./fftw.hpp
|
||||
# - sh ./fft.hpp
|
||||
|
|
@ -1,72 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.11)
|
||||
project(multi VERSION 0.76.0
|
||||
DESCRIPTION "A header only C++ library that provides multidimensional array access to contiguous or regularly contiguous memory (or ranges)."
|
||||
HOMEPAGE_URL "https://gitlab.com/correaa/boost-multi"
|
||||
LANGUAGES CXX)
|
||||
|
||||
#set(CMAKE_CXX_STANDARD 14)
|
||||
#set(CMAKE_CXX_STANDARD_REQUIRED True)
|
||||
#set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
include(GNUInstallDirs)
|
||||
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
INTERFACE $<BUILD_INTERFACE:${${PROJECT_NAME}_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
|
||||
|
||||
target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_14)
|
||||
|
||||
enable_testing()
|
||||
find_program(MEMORYCHECK_COMMAND valgrind)
|
||||
set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --error-exitcode=1")
|
||||
include(CTest)
|
||||
add_subdirectory(test)
|
||||
|
||||
install(TARGETS ${PROJECT_NAME}
|
||||
EXPORT ${PROJECT_NAME}_Targets
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
write_basic_package_version_file("${PROJECT_NAME}ConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY SameMajorVersion)
|
||||
|
||||
configure_package_config_file(
|
||||
"${PROJECT_SOURCE_DIR}/cmake/multiConfig.cmake.in"
|
||||
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
|
||||
INSTALL_DESTINATION
|
||||
${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
|
||||
|
||||
install(EXPORT ${PROJECT_NAME}_Targets
|
||||
FILE ${PROJECT_NAME}Targets.cmake
|
||||
NAMESPACE boost::multi::
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
|
||||
|
||||
install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
|
||||
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
|
||||
|
||||
#install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/SI DESTINATION include)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/array_ref.hpp ${PROJECT_SOURCE_DIR}/array.hpp DESTINATION include/multi)
|
||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/detail DESTINATION include/multi)
|
||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/adaptors DESTINATION include/multi)
|
||||
|
||||
add_subdirectory(adaptors/blas)
|
||||
|
||||
#to install this project:
|
||||
#cmake .. -DCMAKE_INSTALL_PREFIX:PATH=$HOME
|
||||
#cmake --build . --config Release --target test --target install -- -j $(nproc)
|
||||
|
||||
# to use this project do
|
||||
#project("Your project")
|
||||
#find_package(boost-multi CONFIG REQUIRED)
|
||||
#add_executable(${PROJECT_NAME} src/your_main.cpp)
|
||||
#target_link_libraries(${PROJECT_NAME} boost-multi::boost-multi)
|
||||
|
||||
|
||||
|
|
@ -1,671 +0,0 @@
|
|||
<!--
|
||||
(pandoc `#--from gfm` --to html --standalone --metadata title=" " $0 > $0.html) && firefox --new-window $0.html; sleep 5; rm $0.html; exit
|
||||
-->
|
||||
# [Boost.]Multi
|
||||
|
||||
(not an official Boost library)
|
||||
|
||||
_© Alfredo A. Correa, 2018-2021_
|
||||
|
||||
`Multi` provides multidimensional array access to contiguous or regularly contiguous memory (or ranges).
|
||||
It shares the goals of [Boost.MultiArray](https://www.boost.org/doc/libs/1_69_0/libs/multi_array/doc/index.html),
|
||||
although the code is completely independent and the syntax has slight differences or has been extended.
|
||||
`Multi` and `Boost.MultiArray` types can be used interchangeably for the most part, they differ in the semantics of reference and value types.
|
||||
|
||||
Multi aims to simplify the semantics of Boost.MultiArray and make it more compatible with the Standard (STL) Algorithms and special memory.
|
||||
It requires C++14.
|
||||
|
||||
Some features:
|
||||
|
||||
* Arbitrary pointer types (minimal requirements)
|
||||
* Simplified implementation (~1200 lines)
|
||||
* Fast access of subarrays (view) types
|
||||
* Value semantics of multi-dimensional array container
|
||||
* Better semantics of subarray (view) types
|
||||
* Interoperability with other libraries, STL, ranges,
|
||||
|
||||
(Do not confuse this library with Boost.MultiArray or Boost.MultiIndex.)
|
||||
|
||||
|
||||
## Contents
|
||||
[[_TOC_]]
|
||||
|
||||
## Installation and Tests
|
||||
|
||||
`Multi` doesn't require instalation, single file `#include<multi/array.hpp>` is enough to use the full core library.
|
||||
`Multi`'s _only_ dependecy is the standard C++ library.
|
||||
|
||||
It is important to compile programs that use the library with a decent level of optimization (e.g. `-O2`) to avoid slowdown if indiviudual element-access is intensively used.
|
||||
For example, when testing speed, please make sure that you are compiling in release mode (`-DNDEBUG`) and with optimizations (`-O3`),
|
||||
if your test involves mathematical operations add arithmetic optimizations (`-Ofast`) to compare with Fortran code.
|
||||
|
||||
A CMake build system is provided to automatically run basic tests.
|
||||
Test do depend on Boost.Test.
|
||||
|
||||
```bash
|
||||
git clone https://gitlab.com/correaa/boost-multi.git multi
|
||||
cd multi
|
||||
```
|
||||
```bash
|
||||
#export CXX="nvcc -DBOOST_PP_VARIADICS=1 -x cu -O3" #optional spec. compiler
|
||||
mkdir -p test/build
|
||||
cd test/build
|
||||
cmake ..
|
||||
make -j
|
||||
make test -j
|
||||
```
|
||||
|
||||
The code is developed on `clang` (10.0), `gcc` (9.3) and `nvcc` 11 compilers, and [tested regularly ](https://gitlab.com/correaa/boost-multi/pipelines) with clang 9.0, NVCC 10.1, Intel (19.1), and PGI(nvc++) 20.7 compilers.
|
||||
For detailed compilation instructions of test see the Continuous Integration (CI) definition file https://gitlab.com/correaa/boost-multi/-/blob/master/.gitlab-ci.yml
|
||||
|
||||
## Types
|
||||
|
||||
* `multi::array<T, D, A>`: Array of dimension `D`, it has value semantics if `T` has value semantics. Memory is requested by allocator of type `A`, should support stateful allocators.
|
||||
* `multi::array_ref<T, D, P = T*>`: Array interpretation of a random access range, usually a memory block. It has reference semantics. Thanks to (non-virtual) inheritance an `array<T, D, A>` is-a `array_ref<T, D, A::pointer>`.
|
||||
* other derived "unspecified types" fulfil (a still loosely defined) `MultiArrayView` concept, for example by taking partial indices or rotations (transpositions). These reference types cannot be stored except through life-time extensions `auto&&`. Due to language limitations `auto` will not deduce a corresponding value-sematics type; for this reason it is necessary to use a "decay" idiom to obtain value object.
|
||||
* `MultiArrayView<T,D,P>::(const_)iterator`: Iterator to subarrays of dimension `D - 1`. For `D == 1` this is an iterator to an element. This types are generated by `begin` and `end` functions.
|
||||
* `MultiArrayView<T, D, P>::(const_)reference`: Reference to subarrays of dimension `D - 1`. For `D > 1` this are not true C++-references but types emulate them (with reference semantics), therefore `auto` is not well behaved. For `D==1` this is a true C++ reference to an elements. These types are generated by dereferencing iterators, e.g. `*begin(MA)`.
|
||||
|
||||
## Basic Usage
|
||||
|
||||
Declare an array specifying the element type and the dimension.
|
||||
Elements can be input with nested braced notation.
|
||||
```cpp
|
||||
std::array<double, 2> A = {
|
||||
{1, 2, 3}
|
||||
{4, 5, 6}
|
||||
};
|
||||
```
|
||||
|
||||
The size is automatically deduced; the first dimension are the (two) "rows" above.
|
||||
|
||||
```cpp
|
||||
assert( A.size()==2 );
|
||||
assert( std::get<1>(A.sizes()) == 3 );
|
||||
```
|
||||
|
||||
The value of an array can be copied, moved, and compared.
|
||||
Copies are equal but independent.
|
||||
```cpp
|
||||
std::array<double, 2> B = A;
|
||||
assert( extensions(B) == extensions(A) );
|
||||
assert( B[0][1] == A[0][1] );
|
||||
assert( &B[0][1] != &A[0][1] );
|
||||
assert( B == A );
|
||||
```
|
||||
|
||||
Array can be initialized by the size alone, in which case the element values are default constructed:
|
||||
|
||||
```cpp
|
||||
std::array<double, 3> C({3, 4, 5}); // 3*4*5 = 60 elements
|
||||
```
|
||||
|
||||
Arrays can be passed by value or by reference, most of the time they should be passed through generic parameters.
|
||||
Most useful function work on the concept of array rather than on a concrete type.
|
||||
|
||||
```cpp
|
||||
template<class ArrayDouble2D> // instead of the over specific argument std::array<double, 2>
|
||||
double const& element_1_1(ArrayDouble2D const& m){return m[1][1];}
|
||||
...
|
||||
assert( element_1_1(A) == A[1][1] );
|
||||
```
|
||||
|
||||
These generic function arguments that are not intended to be modified are passed by `const&`; otherwise pass by forward-reference `&&`.
|
||||
In this way the functions can be called on subblocks of larger matrices.
|
||||
|
||||
```cpp
|
||||
assert( &element_1_1(C3D[0]) == &C3D[0][1][1] );
|
||||
```
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
We create a static C-array of `double`s, and refer to it via a bidimensional array `multi::array_ref<double, 2>`.
|
||||
|
||||
```cpp
|
||||
#include "../array_ref.hpp"
|
||||
#include "../array.hpp"
|
||||
|
||||
#include<algorithm> // for sort
|
||||
#include<iostream> // for print
|
||||
|
||||
namespace multi = boost::multi;
|
||||
using std::cout; using std::cerr;
|
||||
|
||||
int main(){
|
||||
double d2D[4][5] = {
|
||||
{150, 16, 17, 18, 19},
|
||||
{ 30, 1, 2, 3, 4},
|
||||
{100, 11, 12, 13, 14},
|
||||
{ 50, 6, 7, 8, 9}
|
||||
};
|
||||
multi::array_ref<double, 2> d2D_ref{&d2D[0][0], {4, 5}};
|
||||
...
|
||||
```
|
||||
|
||||
Note that the syntax of creating a reference array involves passing the pointer to a memory block (20 elements here) and the logical dimensions of that memory block (4 by 5 here).
|
||||
|
||||
Next we print the elements in a way that corresponds to the logical arrangement:
|
||||
|
||||
```cpp
|
||||
...
|
||||
for(auto i : d2D_ref.extension(0)){
|
||||
for(auto j : d2D_ref.extension(1))
|
||||
cout << d2D_ref[i][j] <<' ';
|
||||
cout <<'\n';
|
||||
}
|
||||
...
|
||||
```
|
||||
|
||||
This will output:
|
||||
|
||||
> ```cpp
|
||||
> 150 16 17 18 19
|
||||
> 30 1 2 3 4
|
||||
> 100 11 12 13 14
|
||||
> 50 6 7 8 9
|
||||
> ```
|
||||
|
||||
It is sometimes said (by Sean Parent) that the whole of STL algorithms can be seen as intermediate pieces to implement`std::stable_sort`.
|
||||
Pressumably if one can sort over a range, one can perform any other standard algorithm.
|
||||
|
||||
```cpp
|
||||
...
|
||||
std::stable_sort( begin(d2D_ref), end(d2D_ref) );
|
||||
...
|
||||
```
|
||||
|
||||
If we print this we will get
|
||||
|
||||
> ```cpp
|
||||
> 30 1 2 3 4
|
||||
> 50 6 7 8 9
|
||||
> 100 11 12 13 14
|
||||
> 150 16 17 18 19
|
||||
> ```
|
||||
|
||||
|
||||
The array has been changed to be in row-based lexicographical order.
|
||||
Since the sorted array is a reference to the original data, the original array has changed.
|
||||
|
||||
```cpp
|
||||
...
|
||||
assert( d2D[1][1] == 6 );
|
||||
...
|
||||
```
|
||||
|
||||
(Note that `std::*sort` cannot be applied directly to a multidimensional C-array or to Boost.MultiArray types.)
|
||||
|
||||
If we want to order the matrix in a per-column basis we need to "view" the matrix as range of columns. This is done in the bidimensional case, by accessing the matrix as a range of columns:
|
||||
|
||||
```cpp
|
||||
...
|
||||
std::stable_sort( d2D_ref.begin(1), d2D_ref.end(1) );
|
||||
}
|
||||
```
|
||||
|
||||
Which will transform the matrix into.
|
||||
|
||||
> ```cpp
|
||||
> 1 2 3 4 30
|
||||
> 6 7 8 9 50
|
||||
> 11 12 13 14 100
|
||||
> 16 17 18 19 150
|
||||
> ```
|
||||
|
||||
In other words, a matrix of dimension `D` can be viewed simultaneously as `D` different ranges of different "transpositions" by passing an interger value to `begin` and `end` indicating the preferred dimension.
|
||||
`begin(0)` is equivalent to `begin()`.
|
||||
|
||||
## Initialization
|
||||
|
||||
`array_ref` is initialized from a preexisting contiguous range, the index extensions should compatible with the total number of elements.
|
||||
|
||||
```cpp
|
||||
double* dp = new double[12];
|
||||
multi::array_ref<double, 2> A({3,4}, dp);
|
||||
multi::array_ref<double, 2> B({2,6}, dp);
|
||||
...
|
||||
delete[] dp;
|
||||
```
|
||||
`array` is initialized by specifying the index extensions (and optionally a default value) or alternatively from a rectangular list.
|
||||
|
||||
```cpp
|
||||
/*In C++17 the element-type and the dimensionality can be omitted*/
|
||||
multi::array/*<double, 1>*/ A1 = {1.,2.,3.};
|
||||
assert(A1.dimensionality==1 and A1.num_elements()==3);
|
||||
multi::array/*<double, 2>*/ A2 {
|
||||
{1.,2.,3.},
|
||||
{4.,5.,6.}
|
||||
}; assert(A2.dimensionality==2 and A2.num_elements()==2*3);
|
||||
multi::array/*<double, 3>*/ const A3 = {
|
||||
{{ 1.2, 0.}, { 2.4, 1.}},
|
||||
{{11.2, 3.}, {34.4, 4.}},
|
||||
{{15.2, 99.}, {32.4, 2.}}
|
||||
}; assert(A3.dimensionality==3 and A3.num_elements()==3*2*2);
|
||||
```
|
||||
|
||||
## Iteration
|
||||
|
||||
Accessing arrays by iterators (`begin`/`end`) enables the use of many iterator based algorithms (see the sort example above).
|
||||
`begin/end(A)` (or equivalently `A.begin/end()`) gives iterators that linear and random access in the leading dimension.
|
||||
|
||||
`A.begin/end(n)` gives access in non-leading nested dimension number `n`.
|
||||
|
||||
`cbegin/cend(A)` (or equivalently `A.cbegin/cend()`) gives read-only iterators.
|
||||
|
||||
For example in three dimensional array,
|
||||
|
||||
(cbegin(A)+1)->operator[](1).begin()[0] = 342.4; //error, read-only
|
||||
(begin(A)+1)->operator[](1).begin()[0] = 342.4; // assigns to A[1][1][0]
|
||||
assert( (begin(A)+1)->operator[](1).begin()[0] == 342.4 );
|
||||
|
||||
As an example, this function allows printing arrays of arbitrary dimension into a linear comma-separated form.
|
||||
|
||||
```cpp
|
||||
void print(double const& d){cout<<d;};
|
||||
template<class MultiArray>
|
||||
void print(MultiArray const& ma){
|
||||
cout<<"{";
|
||||
if(not ma.empty()){
|
||||
print(*cbegin(ma));
|
||||
std::for_each(cbegin(ma)+1, cend(ma), [](auto&& e){cout<<","; print(e);});
|
||||
}
|
||||
cout<<"}";
|
||||
}
|
||||
...
|
||||
print(A);
|
||||
```
|
||||
> {{{1.2,1.1},{2.4,1}},{{11.2,3},{34.4,4}},{{15.2,99},{32.4,2}}}
|
||||
|
||||
|
||||
Except for those corresponding to the one-dimensional case, derreferencing iterators generally produce proxy-reference objects.
|
||||
Therefore this is not allowed:
|
||||
|
||||
auto row = *begin(A); // compile error
|
||||
|
||||
This because `row` doesn't have the expected value semantics, and didn't produce any data copy.
|
||||
However this express the intention better
|
||||
|
||||
decltype(A)::value_type row = *begin(A); // there is a real copy.
|
||||
|
||||
In my experience, however, this produces a more consistent idiom to hold references without copying elements.
|
||||
|
||||
auto const& crow = *cbegin(A); // same as decltype(A)::const_reference crow = *cbegin(A);
|
||||
auto&& row = * begin(A); // same as decltype(A):: reference row = * begin(A);
|
||||
|
||||
## Indexing
|
||||
|
||||
Arrays provide random access to elements or subviews.
|
||||
Many algorithms on arrays are oriented to linear algebra, which are ubiquitously implemented in terms of multidimensional index access.
|
||||
|
||||
### Element access and partial access
|
||||
|
||||
Index access mimics that of C-fixed sizes arrays, for example a 3-dimensional array will access to an element by `m[1][2][3]`,
|
||||
which can be used for write and read operations.
|
||||
|
||||
Partial index arguments `m[1][2]` generate a view 1-dimensional object.
|
||||
Transpositions are also multi-dimensional arrays views in which the index are *logically* rearranged, for example `m.rotated(1)[2][3][1] == rotated(m)[2][3][1] == m[1][2][3]`.
|
||||
(rotate refers to the fact that the logical indices are rotated.)
|
||||
|
||||
As an illustration of an algorithm based on index access (as opposed to iterators),
|
||||
this example code implements Gauss Jordan Elimination without pivoting:
|
||||
|
||||
```cpp
|
||||
template<class Matrix, class Vector>
|
||||
auto gj_solve(Matrix&& A, Vector&& y)->decltype(y[0]/=A[0][0], y){
|
||||
std::ptrdiff_t Asize = size(A);
|
||||
for(std::ptrdiff_t r = 0; r != Asize; ++r){
|
||||
auto&& Ar = A[r];
|
||||
auto&& Arr = Ar[r];
|
||||
for(std::ptrdiff_t c = r + 1; c != Asize; ++c) Ar[c] /= Arr;
|
||||
auto const yr = (y[r] /= Arr);
|
||||
for(std::ptrdiff_t r2 = r + 1; r2 != Asize; ++r2){
|
||||
auto&& Ar2 = A[r2];
|
||||
auto const& Ar2r = Ar2[r]; // auto&& Ar = A[r];
|
||||
for(std::ptrdiff_t c = r + 1; c != Asize; ++c) Ar2[c] -= Ar2r*Ar[c];
|
||||
y[r2] -= Ar2r*yr;
|
||||
}
|
||||
}
|
||||
for(std::ptrdiff_t r = Asize - 1; r > 0; --r){
|
||||
auto const& yr = y[r];
|
||||
for(std::ptrdiff_t r2 = r-1; r2 >=0; --r2) y[r2] -= yr*A[r2][r];
|
||||
}
|
||||
return y;
|
||||
}
|
||||
```
|
||||
|
||||
This function can be applied to a `multi::array` container:
|
||||
|
||||
```cpp
|
||||
multi::array<double, 2> A = {{-3., 2., -4.},{0., 1., 2.},{2., 4., 5.}};
|
||||
multi::array<double, 1> y = {12.,5.,2.}; //(M); assert(y.size() == M); iota(y.begin(), y.end(), 3.1);
|
||||
gj_solve(A, y);
|
||||
```
|
||||
|
||||
and also to a combination of `MultiArrayView`-type objects:
|
||||
|
||||
```cpp
|
||||
multi::array<double, 2> A({6000, 7000}); std::iota(A.data(), A.data() + A.num_elements(), 0.1);
|
||||
std::vector<double> y(3000); std::iota(y.begin(), y.end(), 0.2);
|
||||
gj_solve(A({1000, 4000}, {0, 3000}), y);
|
||||
```
|
||||
|
||||
### Slices and strides
|
||||
|
||||
Given an array, a slice in the first dimension can be taken with the `sliced` function. `sliced` takes two arguments, the first index of the slice and the last index (not included) of the slice. For example,
|
||||
|
||||
```cpp
|
||||
multi::array<double, 2> d2D({4, 5});
|
||||
assert( d2D.size(0) == 4 and d2D.size(1) == 5 );
|
||||
|
||||
auto&& d2D_sliced = d2D.sliced(1, 3); // {{d2D[1], d2D[2]}}
|
||||
assert( d2D_sliced.size(0) == 2 and d2D_sliced.size(1) == 5 );
|
||||
```
|
||||
|
||||
The number of rows in the sliced matrix is 2 because we took only two rows, row 1 and row 2 (row 3 is excluded).
|
||||
|
||||
In the same way a strided view of the original array can be taken with the `strided` function.
|
||||
|
||||
```cpp
|
||||
auto&& d2D_strided = d2D.strided(2); // {{ d2D[0], d2D[1] }};
|
||||
assert( d2D_strided.size(0) == 2 and d2D_strided.size(1) == 5 );
|
||||
```
|
||||
|
||||
In this case the number of rows is 2 because, out of the 4 original rows we took one every two.
|
||||
|
||||
Operations can be combined in a single line:
|
||||
|
||||
```cpp
|
||||
auto&& d2D_slicedstrided = d2D.sliced(1, 3).strided(2); // {{ d2D[1] }};
|
||||
assert( d2D_slicedstrided.size(0) == 1 and d2D_slicedstrided.size(1) == 5 );
|
||||
```
|
||||
|
||||
For convenience, `A.sliced(a, b, c)` is the same as `A.sliced(a, b).strided(c)`.
|
||||
|
||||
By combining `rotated`, `sliced` and `strided` one can take sub arrays at any dimension.
|
||||
For example in a two dimensional array one can take a subset of columns by defining.
|
||||
|
||||
```cpp
|
||||
auto&& subA = A.rotated(1).strided(1, 3).sliced(2).rotated(-1);
|
||||
```
|
||||
|
||||
Other notations are available, but when in doubt the `rotated/strided/sliced/rotated` and combinations of them idioms provides the most control over the subview operations.
|
||||
(At the moment the `strided` argument has to divide the total size of the slice (or matrix), otherwise the behavior is undefined.)
|
||||
|
||||
Blocks (slices) in multidimensions can be obtained but pure index notation using `.operator()`:
|
||||
|
||||
```cpp
|
||||
multi::array<double, 2> A({6, 7}); // 6x7 array
|
||||
A({1, 4}, {2, 4}) // 3x2 array, containing indices 1 to 4 in the first dimension and 2 to 4 in the second dimension.
|
||||
```
|
||||
|
||||
## Concept Requirements
|
||||
|
||||
The design tries to impose the minimum possible requirements over the used referred types.
|
||||
Pointer-like random access types can be used as substitutes of built-in pointers.
|
||||
|
||||
```cpp
|
||||
namespace minimal{
|
||||
template<class T> class ptr{ // minimalistic pointer
|
||||
T* impl_;
|
||||
T& operator*() const{return *impl_;}
|
||||
auto operator+(std::ptrdiff_t n) const{return ptr{impl_ + n};}
|
||||
// operator[], operator+=, etc are optional but not necessary
|
||||
};
|
||||
}
|
||||
|
||||
int main(){
|
||||
double* buffer = new double[100];
|
||||
multi::array_ref<double, 2, minimal::ptr<double> > CC(minimal::ptr<double>{buffer}, {10, 10});
|
||||
CC[2]; // requires operator+
|
||||
CC[1][1]; // requires operator*
|
||||
CC[1][1] = 9;
|
||||
assert(CC[1][1] == 9);
|
||||
delete[] buffer;
|
||||
}
|
||||
```
|
||||
|
||||
### Linear Sequences: Pointers
|
||||
|
||||
An `array_ref` can reference to an arbitrary random access iterator sequence.
|
||||
This way, any linear (random access) sequence (e.g. `raw memory`, `std::vector`, `std::queue`) can be efficiently arranged as a multidimensional array.
|
||||
|
||||
```cpp
|
||||
std::vector<double> buffer(100);
|
||||
multi::array_ref<double, 2, std::vector<double>::iterator> A({10, 10}, buffer.begin());
|
||||
A[1][1] = 9;
|
||||
assert(A[1][1] == 9);
|
||||
assert(buffer[11]==9);
|
||||
```
|
||||
Since `array_ref` does not manage the memory associated with it, the reference can be simply dangle if the `buffer` memory is reallocated (e.g. by `resize`).
|
||||
|
||||
### Special Memory: Allocators and Fancy Pointers
|
||||
|
||||
`array`'s manages its memory through allocators.
|
||||
It can handle special memory, as long as the underlying types behave coherently, these include fancy pointers and fancy references.
|
||||
Associated fancy pointers and fancy reference (if any) are deduced from the allocator types.
|
||||
|
||||
The behavior regarding memory managament of the [fancy pointers](https://en.cppreference.com/w/cpp/named_req/Allocator#Fancy_pointers) can be customized (if necessary) by specializations of some or all of these functions:
|
||||
|
||||
```cpp
|
||||
destroy(a, first, last)
|
||||
destroy_n(a, first, n) -> last
|
||||
uninitialized_copy_n(a, first, n, dest) -> last;
|
||||
uninitialized_fill_n(a, first, n, value) -> last
|
||||
uninitialized_default_construct_n(a, first, n) -> last
|
||||
uninitialized_value_construct_n(a, first, n) -> last
|
||||
```
|
||||
|
||||
where `a` is the special allocator, `n` is a size (usually the number of elements), `first`, `last` and `dest` are fancy pointers.
|
||||
|
||||
Copying underlying memory can be customized by specializing
|
||||
|
||||
```cpp
|
||||
copy_n(first, n, dest)
|
||||
fill_n(first, n, value)
|
||||
```
|
||||
|
||||
Specific cases of fancy memory are file-mapped memory or interprocess shared memory.
|
||||
This example illustrates memory persistency by combining with Boost.Interprocess library.
|
||||
The arrays support their allocators and fancy pointers (`boost::interprocess::offset_ptr`).
|
||||
|
||||
```cpp
|
||||
#include <boost/interprocess/managed_mapped_file.hpp>
|
||||
using namespace boost::interprocess;
|
||||
using manager = managed_mapped_file;
|
||||
template<class T> using mallocator = allocator<T, manager::segment_manager>;
|
||||
decltype(auto) get_allocator(manager& m){return m.get_segment_manager();}
|
||||
|
||||
template<class T, auto D> using marray = multi::array<T, D, mallocator<T>>;
|
||||
|
||||
int main(){
|
||||
{
|
||||
manager m{create_only, "mapped_file.bin", 1 << 25};
|
||||
auto&& arr2d = *m.construct<marray<double, 2>>("arr2d")(std::tuple{1000, 1000}, 0.0, get_allocator(m));
|
||||
arr2d[4][5] = 45.001;
|
||||
}
|
||||
// imagine execution restarts here
|
||||
{
|
||||
manager m{open_only, "mapped_file.bin"};
|
||||
auto&& arr2d = *m.find<marray<double, 2>>("arr2d").first;
|
||||
assert( arr2d[7][8] == 0. );
|
||||
assert( arr2d[4][5] == 45.001 );
|
||||
m.destroy<marray<double, 2>>("arr2d");
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
# Interoperability with other software
|
||||
|
||||
## STL (Standard Template Library)
|
||||
|
||||
The fundamental goal of the library is that the arrays and iterators can be used with STL algorithms out-of-the-box with a reasonable efficiency.
|
||||
The most dramatic example of this is that `std::sort` works with array as it is shown in a previous example.
|
||||
|
||||
Along with STL itself, the library tries to interact with other existing C++ libraries.
|
||||
|
||||
## Range v3
|
||||
|
||||
```cpp
|
||||
#include <range/v3/all.hpp>
|
||||
int main(){
|
||||
|
||||
multi::array const d2D = {
|
||||
{ 0, 1, 2, 3},
|
||||
{ 5, 6, 7, 8},
|
||||
{10, 11, 12, 13},
|
||||
{15, 16, 17, 18}
|
||||
};
|
||||
assert( ranges::inner_product(d2D[0], d2D[1], 0.) == 6+2*7+3*8 );
|
||||
assert( ranges::inner_product(d2D[0], rotated(d2D)[0], 0.) == 1*5+2*10+15*3 );
|
||||
|
||||
static_assert(ranges::RandomAccessIterator<multi::array<double, 1>::iterator>{});
|
||||
static_assert(ranges::RandomAccessIterator<multi::array<double, 2>::iterator>{});
|
||||
}
|
||||
```
|
||||
|
||||
## Boost.Interprocess
|
||||
|
||||
Using Interprocess allows for shared memory and for persistent mapped memory.
|
||||
|
||||
```cpp
|
||||
#include <boost/interprocess/managed_mapped_file.hpp>
|
||||
#include "multi/array.hpp"
|
||||
#include<cassert>
|
||||
|
||||
namespace bip = boost::interprocess;
|
||||
using manager = bip::managed_mapped_file;
|
||||
template<class T> using mallocator = bip::allocator<T, manager::segment_manager>;
|
||||
auto get_allocator(manager& m){return m.get_segment_manager();}
|
||||
|
||||
namespace multi = boost::multi;
|
||||
template<class T, int D> using marray = multi::array<T, D, mallocator<T>>;
|
||||
|
||||
int main(){
|
||||
{
|
||||
manager m{bip::create_only, "bip_mapped_file.bin", 1 << 25};
|
||||
auto&& arr2d = *m.construct<marray<double, 2>>("arr2d")(std::tuple{1000, 1000}, 0., get_allocator(m));
|
||||
arr2d[4][5] = 45.001;
|
||||
m.flush();
|
||||
}
|
||||
{
|
||||
manager m{bip::open_only, "bip_mapped_file.bin"};
|
||||
auto&& arr2d = *m.find<marray<double, 2>>("arr2d").first;
|
||||
assert( arr2d[4][5] == 45.001 );
|
||||
m.destroy<marray<double, 2>>("arr2d");// eliminate<marray<double, 2>>(m, "arr2d");}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
(Similarly works with [LLNL's Meta Allocator](https://github.com/llnl/metall))
|
||||
|
||||
## Cuda thrust
|
||||
|
||||
```cpp
|
||||
#include "multi/adaptors/thrust/allocator_traits.hpp"
|
||||
#include "multi/adaptors/thrust/algorithms.hpp"
|
||||
#include "multi/array.hpp"
|
||||
|
||||
namespace multi = boost::multi;
|
||||
int main(){
|
||||
multi::array<double, 2, thrust::device_allocator<double>> A2({10,10});
|
||||
multi::array<double, 2, thrust::device_allocator<double>> B2({10,10});
|
||||
A2[5][0] = 50.;
|
||||
thrust::copy(begin(rotated(A2)[0]), end(rotated(A2)[0]), begin(rotated(B2)[0]));
|
||||
assert( B2[5][0] == 50. );
|
||||
}
|
||||
```
|
||||
|
||||
## TotalView
|
||||
|
||||
TotalView visual debugger (commercial) can display arrays in human-readable form (for simple types, like `double` or `std::complex`).
|
||||
To use it, simply `#include "multi/adaptors/totalview.hpp"` and link to the TotalView libraries, compile and run the code with the debugger.
|
||||
|
||||
## Memory Resources
|
||||
|
||||
The library is compatible with C++17's polymorphic memory resources which allows using preallocated buffers.
|
||||
This enables the use of stack memory or in order to reduce the number of allocations.
|
||||
For example, this code ends up with `buffer` containing the string `"aaaabbbbbb "`.
|
||||
|
||||
```cpp
|
||||
#include<pmr>
|
||||
int main(){
|
||||
char buffer[13] = "____________"; // a small buffer on the stack
|
||||
std::pmr::monotonic_buffer_resource pool{std::data(buffer), std::size(buffer)}; // or multi::memory::monotonic<char*>
|
||||
|
||||
multi::array<char, 2, std::pmr::polymorphic_allocator<char>> A({2, 2}, 'a', &pool); // or multi::memory::monotonic_allocator<double>
|
||||
multi::array<char, 2, std::pmr::polymorphic_allocator<char>> B({3, 2}, 'b', &pool);
|
||||
}
|
||||
```
|
||||
|
||||
The library comes with its own customized (non-polymorphic) memory resources if, for any reason, the standard PMRs are not sufficiently general.
|
||||
The headers to include are:
|
||||
|
||||
```cpp
|
||||
#include<multi/memory/monotonic.hpp> // multi::memory::monotonic<char*> : no memory reclaim
|
||||
#include<multi/memory/stack.hpp> // multi::memory::stack<char*> : FIFO memory reclaim
|
||||
```
|
||||
|
||||
# Technical points
|
||||
|
||||
### What's up with the multiple bracket notation?
|
||||
|
||||
The chained bracket notation (`A[i][j][k]`) allows to refer to elements and subarrays lower dimensional subarrays in a consistent and _generic_ manner and it is the recommended way to access the array objects.
|
||||
It is a frequently raised question whether the chained bracket notation is good for performance, since it appears that each utilization of the bracket leads to the creation of a temporary which in turn generates a partial copy of the layout.
|
||||
Moreover, this goes against [historical recommendations](https://isocpp.org/wiki/faq/operator-overloading#matrix-subscript-op).
|
||||
|
||||
It turns out that [modern compilers with a fair level of optimization (`-O2`)](https://godbolt.org/z/3fYd5c) can elide these temporary objects, so that `A[i][j][k]` generates identical assembly code as `A.base() + i*stride1 + j*stride2 + k*stride3` (+offsets not shown).
|
||||
|
||||
In a subsequent optimization, constant indices can have their "partial stride" computation removed from loops.
|
||||
As a result, these two loops lead to the [same machine code](https://godbolt.org/z/z1se74):
|
||||
|
||||
```cpp
|
||||
for(int j = 0; j != nj; ++j)
|
||||
++A[i][j][k];
|
||||
```
|
||||
```cpp
|
||||
double* Ai_k = A.base() + i*A_stride1 + k*A_stride3;
|
||||
for(int j = 0; j != nj; ++jj)
|
||||
++(*(Ai_k + j*A_stride2));
|
||||
```
|
||||
|
||||
Incidentally, the library also supports parenthesis notation with multiple indices `A(i, j, k)` for element or partial access, but it does so for accidental reasons as part of a more general syntax to generate sub-blocks.
|
||||
In any case `A(i, j, k)` is expanded to `A[i][j][k]` internally in the library when `i, j, k` are normal integer indices.
|
||||
Additionally, array coordinates can be directly stored in tuple-like data structures, allowing this functional syntax:
|
||||
|
||||
```cpp
|
||||
std::array p = {2,3,4};
|
||||
std::apply(A, p) = 234; // A[2][3][4] = 234;
|
||||
```
|
||||
|
||||
### Customizing recursive operations: SCARY iterators
|
||||
|
||||
A custom level of customization can be achieved by intercepting internal recursive algorithms.
|
||||
Multi iterators are [SCARY](http://www.open-std.org/jtc1/sc22/WG21/docs/papers/2009/n2980.pdf).
|
||||
SCARY means that they are independent of any container and can be accessed generically through their dimension and underlying pointer types:
|
||||
|
||||
For example, `boost::multi::array_iterator<double, 2, double*> it` is a row (or column) iterator of an array of dimension 2 or higher, whose underlying pointer type is `double*`.
|
||||
This row (or column) and subsequent ones can be accessed by the normal iterator(pointer) notation `*it` and `it[n]` respectively.
|
||||
Indirection `it->...` is supported (even for iterators if high dimension).
|
||||
The base pointer, the strides and the size of the arrow can be accessed by `base(it)`, `stride(it)`, `it->size()`.
|
||||
|
||||
The template arguments of the iterator can be used to customize operations that are recursive (and possibly inefficient in certain context) in the library:
|
||||
|
||||
```cpp
|
||||
namespace boost{namespace multi{
|
||||
template<class It, class T> // custom copy 1D (aka strided copy)
|
||||
void copy(It first, It last, multi::array_iterator<T, 1, fancy::ptr<T> > dest){
|
||||
assert( stride(first) == stride(last) );
|
||||
std::cerr<<"1D copy(it1D, it1D, it1D) with strides "<< stride(first) <<" "<< stride(dest) <<std::endl;
|
||||
}
|
||||
|
||||
template<class It, class T> // custom copy 2D (aka double strided copy)
|
||||
void copy(It first, It last, multi::array_iterator<T, 2, fancy::ptr<T> > dest){
|
||||
assert( stride(first) == stride(last) );
|
||||
std::cerr<<"2D copy(It, It, it2D) with strides "<< stride(first) <<" "<< stride(dest) <<std::endl;
|
||||
}
|
||||
}}
|
||||
```
|
||||
|
||||
For example, if your custom pointers refers a memory type in which 2D memory copying (strided copy) is faster than sequencial copying, that kind of instruction can be ejecuted when the library internally calls `copy`.
|
||||
This customization must be performed (unfortunately) in the `boost::multi` namespace (this is where the Multi iterators are defined) and the customization happens through matching the dimension and the pointer type.
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2018-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_HPP
|
||||
|
||||
#include "../adaptors/blas/iamax.hpp"
|
||||
#include "../adaptors/blas/asum.hpp"
|
||||
#include "../adaptors/blas/axpy.hpp"
|
||||
#include "../adaptors/blas/copy.hpp"
|
||||
#include "../adaptors/blas/dot.hpp"
|
||||
#include "../adaptors/blas/gemm.hpp"
|
||||
#include "../adaptors/blas/syrk.hpp"
|
||||
#include "../adaptors/blas/herk.hpp"
|
||||
#include "../adaptors/blas/gemv.hpp"
|
||||
#include "../adaptors/blas/ger.hpp"
|
||||
#include "../adaptors/blas/nrm2.hpp"
|
||||
#include "../adaptors/blas/trsm.hpp"
|
||||
#include "../adaptors/blas/scal.hpp"
|
||||
#include "../adaptors/blas/swap.hpp"
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../array.hpp"
|
||||
#include "../utility.hpp"
|
||||
|
||||
#include<iostream>
|
||||
#include<complex>
|
||||
#include<numeric> // iota
|
||||
#include<algorithm> // transform
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex){
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
using multi::blas::herk;
|
||||
{
|
||||
multi::array<complex, 2> const A = {
|
||||
{1. + 3.*I, 9. + 1.*I},
|
||||
{3. - 2.*I, 7. - 8.*I},
|
||||
{4. + 1.*I, 1. - 3.*I}
|
||||
};
|
||||
multi::array<complex, 2> C({3, 3}, 9999.);
|
||||
herk(1., A, C); // herk(A, C); // C†=C=AA†=(A†A)†
|
||||
BOOST_REQUIRE( C[1][2] == complex(41., 2.) );
|
||||
BOOST_REQUIRE( C[2][1] == conj(C[1][2]) );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_asum_complex){
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 1> arr(1000, 0.);
|
||||
// std::iota(begin(arr), end(arr), -700.);
|
||||
// std::transform(cbegin(arr), cend(arr), begin(arr), [](auto&& a){return sqrt(a);});
|
||||
{
|
||||
using multi::blas::asum;
|
||||
BOOST_REQUIRE( asum(arr) == 0 );
|
||||
// std::cout << asum(arr) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_nrm2_complex){
|
||||
multi::array<complex, 1> arr(1000, 0.);
|
||||
// std::iota(begin(arr), end(arr), -700.);
|
||||
// std::transform(cbegin(arr), cend(arr), begin(arr), [](auto&& a){return sqrt(a);});
|
||||
{
|
||||
using multi::blas::nrm2;
|
||||
BOOST_REQUIRE( nrm2(arr) == 0. );
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.11)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
project(boost-multi-adaptors-blas VERSION 0.1 LANGUAGES CXX)
|
||||
|
||||
set(BLA_VENDOR Intel10_64lp)
|
||||
find_package(BLAS)
|
||||
if(BLAS_FOUND) # in some systems with MKL, regular BLAS headers need to be found for it to work
|
||||
message("Multi/BLAS: MKL environment detected")
|
||||
add_definitions(-DRETURN_BY_STACK)
|
||||
else()
|
||||
message("Multi/BLAS: MKL environment not detected, looking for other BLAS")
|
||||
unset(BLA_VENDOR)
|
||||
find_package(BLAS REQUIRED)
|
||||
endif()
|
||||
|
||||
#find_path(BLAS_INCLUDE_DIRS cblas.h
|
||||
# /usr/include
|
||||
# /usr/local/include
|
||||
# $ENV{BLAS_HOME}/include)
|
||||
|
||||
link_libraries(${BLAS_LIBRARIES})
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
add_subdirectory(test)
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
<!--
|
||||
(pandoc `#--from gfm` --to html --standalone --metadata title=" " $0 > $0.html) && firefox --new-window $0.html; sleep 5; rm $0.html; exit
|
||||
-->
|
||||
# [Boost.]Multi BLAS Adaptor
|
||||
|
||||
(not an official Boost library)
|
||||
|
||||
_© Alfredo A. Correa, 2018-2021_
|
||||
|
||||
The BLAS Adaptor provides an interface for BLAS-like libraries.
|
||||
|
||||
## Contents
|
||||
[[_TOC_]]
|
||||
|
||||
## Numeric Arrays, Conjugation Real and Imaginary parts
|
||||
|
||||
This functions produce views (not copies) related to conjugation, real and imaginary parts.
|
||||
|
||||
```cpp
|
||||
using complex = std::complex<double>;
|
||||
complex const I{0, 1};
|
||||
multi::array<complex, 2> B = {
|
||||
{1. - 3.*I, 6. + 2.*I},
|
||||
{8. + 2.*I, 2. + 4.*I},
|
||||
{2. - 1.*I, 1. + 1.*I}
|
||||
};
|
||||
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> conjB = blas::conj(B);
|
||||
|
||||
assert( blas::conj(B)[2][1] == std::conj(B[2][1]) );
|
||||
|
||||
assert( blas::transposed(B)[1][2] == B[2][1] );
|
||||
assert( blas::transposed(B) == ~B );
|
||||
|
||||
assert( blas::hermitized(B)[2][1] == blas::conj(B)[1][2] );
|
||||
assert( blas::hermitized(B) == blas::conj(blas::transposed(B)) );
|
||||
|
||||
assert( blas::real(B)[2][1] == std::real(B[2][1]) );
|
||||
assert( blas::imag(B)[2][1] == std::imag(B[2][1]) );
|
||||
|
||||
multi::array<double, 2> B_real_doubled = {
|
||||
{ 1., -3., 6., 2.},
|
||||
{ 8., 2., 2., 4.},
|
||||
{ 2., -1., 1., 1.}
|
||||
};
|
||||
assert( blas::real_doubled(B) == B_real_doubled );
|
||||
```
|
||||
|
||||
Usage:
|
||||
```cpp
|
||||
multi::array<double, 2> const a_real = {
|
||||
{ 1., 3., 1.},
|
||||
{ 9., 7., 1.},
|
||||
};
|
||||
|
||||
multi::array<complex, 2> const b = {
|
||||
{ 11.+1.*I, 12.+1.*I, 4.+1.*I, 8.-2.*I},
|
||||
{ 7.+8.*I, 19.-2.*I, 2.+1.*I, 7.+1.*I},
|
||||
{ 5.+1.*I, 3.-1.*I, 3.+8.*I, 1.+1.*I}
|
||||
};
|
||||
|
||||
multi::array<complex, 2> c({2, 4});
|
||||
|
||||
blas::real_doubled(c) = blas::gemm(1., a_real, blas::real_doubled(b)); // c = a_real*b
|
||||
```
|
||||
|
||||
## Installation and Tests
|
||||
|
||||
...
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0x `pkg-config --cflags --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
// TODO make it work with thrust complex
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_ASUM_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_ASUM_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{namespace blas{
|
||||
|
||||
template<class It, typename Size>
|
||||
auto asum_n(It first, Size n)
|
||||
->decltype(asum(n, base(first), stride(first))){
|
||||
return asum(n, base(first), stride(first));}
|
||||
|
||||
using std::distance;
|
||||
|
||||
template<class It>
|
||||
auto asum(It f, It last)
|
||||
->decltype(asum_n(f, distance(f, last))){assert(stride(f) == stride(last));
|
||||
return asum_n(f, distance(f, last));}
|
||||
|
||||
using std::begin; using std::end;
|
||||
|
||||
template<class X1D>
|
||||
auto asum(X1D const& x)
|
||||
->decltype(asum(begin(x), end(x))){assert( not offset(x) );
|
||||
return asum(begin(x), end(x));}
|
||||
|
||||
}}
|
||||
}
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_SCAL
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi.BLAS asum"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
//#include<boost/test/tools/floating_point_comparison.hpp>
|
||||
|
||||
#include "../../array.hpp"
|
||||
//#include "../../utility.hpp"
|
||||
|
||||
#include<numeric> // accumulate
|
||||
|
||||
namespace multi = boost::multi;
|
||||
using multi::blas::asum;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_asum_double){
|
||||
multi::array<double, 2> const A = {
|
||||
{1., 2., 3., 4.},
|
||||
{-5., 6., -7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE(asum(A[1]) == std::accumulate(begin(A[1]), end(A[1]), 0., [](auto&& a, auto&& b){return a+std::abs(b);}));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_asum_complex){
|
||||
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 1.*I, 2., 3., 4.},
|
||||
{-5. + 3.*I, 6., -7., 8.},
|
||||
{ 9. - 2.*I, 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE(asum(rotated(A)[0]) == 1.+1. + 5.+3. + 9.+2.);
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_asum_double_carray){
|
||||
// double A[3][4] = {
|
||||
// {1., 2., 3., 4.},
|
||||
// {-5., 6., -7., 8.},
|
||||
// {9., 10., 11., 12.}
|
||||
// }; (void)A;
|
||||
// using std::begin; using std::end;
|
||||
// BOOST_REQUIRE(asum(A[1]) == std::accumulate(begin(A[1]), end(A[1]), 0., [](auto&& a, auto&& b){return a+abs(b);}));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_AXPY_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_AXPY_HPP
|
||||
|
||||
#include "../../adaptors/blas/core.hpp"
|
||||
#include "../../config/NODISCARD.hpp"
|
||||
#include "../../array_ref.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{namespace blas{
|
||||
|
||||
using core::axpy;
|
||||
|
||||
template<class It1, class Size, class OutIt>
|
||||
auto axpy_n(typename It1::value_type alpha, It1 first, Size n, OutIt d_first)
|
||||
->decltype(axpy(n, &alpha, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n){
|
||||
return axpy(n, &alpha, base(first) , stride(first) , base(d_first) , stride(d_first) ) , d_first + n;}
|
||||
|
||||
template<class Context, class It1, class Size, class OutIt, class=std::enable_if_t<is_context<Context>{}>>
|
||||
auto axpy_n(Context&& ctxt, typename It1::value_type alpha, It1 first, Size n, OutIt d_first)
|
||||
->decltype(std::forward<Context>(ctxt).axpy(n, &alpha, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n){
|
||||
return std::forward<Context>(ctxt).axpy(n, &alpha, base(first) , stride(first) , base(d_first) , stride(d_first)) , d_first + n;}
|
||||
|
||||
template<class X1D, class Y1D, typename = decltype( std::declval<Y1D&&>()[0] = 0. )>
|
||||
auto axpy(typename X1D::element alpha, X1D const& x, Y1D&& y)
|
||||
->decltype(axpy_n(alpha, x.begin(), x.size(), y.begin()), std::forward<Y1D>(y)){assert(size(x)==size(y)); // intel doesn't like ADL in deduced/sfinaed return types
|
||||
return axpy_n(alpha, begin(x), size(x), begin(y)), std::forward<Y1D>(y);
|
||||
}
|
||||
|
||||
template<class Context, class X1D, class Y1D, typename = decltype( std::declval<Y1D&&>()[0] = 0. )>
|
||||
auto axpy(Context&& ctxt, typename X1D::element alpha, X1D const& x, Y1D&& y)
|
||||
->decltype(axpy_n(std::forward<Context>(ctxt), alpha, x.begin( ), x.size( ), y.begin( )), std::forward<Y1D>(y)){assert(size(x)==size(y)); // intel doesn't like ADL in deduced/sfinaed return types
|
||||
return axpy_n(std::forward<Context>(ctxt), alpha, begin(x), size(x), begin(y)), std::forward<Y1D>(y);
|
||||
}
|
||||
|
||||
template<class X1D, class Y1D>
|
||||
Y1D&& axpy(X1D const& x, Y1D&& y){return axpy(+1., x, std::forward<Y1D>(y));}
|
||||
|
||||
template<class Context, class X1D, class Y1D, std::enable_if_t<is_context<Context>{}> >
|
||||
Y1D&& axpy(Context&& ctxt, X1D const& x, Y1D&& y){return axpy(std::forward<Context>(ctxt), +1., x, std::forward<Y1D>(y));}
|
||||
|
||||
template<class Context, class Scale, class ItX>
|
||||
class axpy_range{
|
||||
Context ctxt_;
|
||||
Scale alpha_;
|
||||
ItX x_begin_;
|
||||
size_type count_;
|
||||
public:
|
||||
axpy_range(axpy_range const&) = delete;
|
||||
axpy_range(Context ctxt, Scale alpha, ItX x_first, ItX x_last)
|
||||
: ctxt_{ctxt}, alpha_{alpha}, x_begin_{x_first}, count_{x_last - x_first}{}
|
||||
template<class Other>
|
||||
friend Other&& operator+=(Other&& other, axpy_range const& self){
|
||||
assert(other.size() == self.count_);
|
||||
blas::axpy_n(std::forward<Context>(self.ctxt_), +self.alpha_, self.x_begin_, self.count_, other.begin());
|
||||
return std::forward<Other>(other);
|
||||
}
|
||||
template<class Other>
|
||||
friend Other&& operator-=(Other&& other, axpy_range const& self){
|
||||
assert(other.size() == self.count_);
|
||||
blas::axpy_n(std::forward<Context>(self.ctxt_), -self.alpha_, self.x_begin_, self.count_, other.begin());
|
||||
return std::forward<Other>(other);
|
||||
}
|
||||
axpy_range& operator*=(Scale s)&{alpha_ *= s;}
|
||||
};
|
||||
|
||||
template<class Context, class Scale, class X, class=std::enable_if_t<is_context<Context>{}>>
|
||||
axpy_range<Context, Scale, typename X::const_iterator> axpy(Context&& ctxt, Scale a, X const& x){
|
||||
return {std::forward<Context>(ctxt), a, begin(x), end(x)};}
|
||||
|
||||
template<class Scale, class X>
|
||||
axpy_range<blas::context const&, Scale, typename X::const_iterator> axpy(Scale a, X const& x){return {blas::context{}, a, begin(x), end(x)};}
|
||||
|
||||
namespace operators{
|
||||
|
||||
template<class X1D, class Y1D> auto operator+=(X1D&& x, Y1D const& other) DECLRETURN(axpy(+1., other, std::forward<X1D>(x)))
|
||||
template<class X1D, class Y1D> auto operator-=(X1D&& x, Y1D const& other) DECLRETURN(axpy(-1., other, std::forward<X1D>(x)))
|
||||
|
||||
template<class X1D, class Y1D> auto operator+(X1D const& x, Y1D const& y)->std::decay_t<decltype(x.decay())>{auto X=x.decay(); X+=y; return X;}
|
||||
template<class X1D, class Y1D> auto operator-(X1D const& x, Y1D const& y)->std::decay_t<decltype(x.decay())>{auto X=x.decay(); X-=y; return X;}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,126 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_COPY_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_COPY_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
#include "../blas/operations.hpp"
|
||||
|
||||
#include "../../config/NODISCARD.hpp"
|
||||
|
||||
#include<type_traits>
|
||||
|
||||
namespace boost{
|
||||
namespace multi::blas{
|
||||
|
||||
using core::copy;
|
||||
|
||||
template<class It, typename Size, class OutIt>
|
||||
auto copy_n(It first, Size n, OutIt d_first)
|
||||
->decltype(copy(n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n){
|
||||
return copy(n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n;}
|
||||
|
||||
template<class Context, class It, typename Size, class OutIt, class=std::enable_if_t<blas::is_context<Context>{}> >
|
||||
auto copy_n(Context&& ctxt, It first, Size n, OutIt d_first)
|
||||
->decltype(copy(std::forward<Context>(ctxt), n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n){
|
||||
return copy(std::forward<Context>(ctxt), n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n;}
|
||||
|
||||
template<class It, class OutIt>
|
||||
auto copy(It first, It last, OutIt d_first)
|
||||
->decltype(copy_n(first, last - first, d_first)){
|
||||
return copy_n(first, last - first, d_first);}
|
||||
|
||||
template<class Context, class It, class OutIt, class=std::enable_if_t<blas::is_context<Context>{}>>
|
||||
auto copy(Context&& ctxt, It first, It last, OutIt d_first)
|
||||
->decltype(copy_n(std::forward<Context>(ctxt), first, last - first, d_first)){
|
||||
return copy_n(std::forward<Context>(ctxt), first, last - first, d_first);}
|
||||
|
||||
template<class X1D, class Y1D>
|
||||
auto copy(X1D const& x, Y1D&& y)
|
||||
->decltype(blas::copy_n(x.begin(), x.size(), y.begin()), std::forward<Y1D>(y)){assert(x.size()==y.size());
|
||||
return blas::copy_n(x.begin(), x.size(), y.begin()), std::forward<Y1D>(y);}
|
||||
|
||||
template<class Context, class X1D, class Y1D>
|
||||
auto copy(Context&& ctxt, X1D const& x, Y1D&& y)
|
||||
->decltype(blas::copy_n(std::forward<Context>(ctxt), x.begin(), x.size(), y.begin()), std::forward<Y1D>(y)){assert(x.size()==y.size());
|
||||
return blas::copy_n(std::forward<Context>(ctxt), x.begin(), x.size(), y.begin()), std::forward<Y1D>(y);}
|
||||
|
||||
template<class ContextPtr, class It1D>
|
||||
class copy_iterator{
|
||||
ContextPtr ctxt = {};
|
||||
It1D it_;
|
||||
public:
|
||||
using difference_type = typename std::iterator_traits<It1D>::difference_type;
|
||||
using value_type = typename std::iterator_traits<It1D>::value_type;
|
||||
using pointer = void;
|
||||
using reference = void;
|
||||
using iterator_category = std::output_iterator_tag;
|
||||
using iterator_type = It1D;
|
||||
using context_type = ContextPtr;
|
||||
constexpr explicit copy_iterator(It1D it) : it_{it}{}
|
||||
constexpr copy_iterator(ContextPtr cp, It1D it) : ctxt{cp}, it_{it}{}
|
||||
constexpr iterator_type base() const{return it_;}
|
||||
template<class It1DOut>
|
||||
friend constexpr It1DOut copy_n(copy_iterator first, difference_type count, It1DOut result){
|
||||
return blas::copy_n(first.ctxt, first.base(), count, result);
|
||||
}
|
||||
template<class It1DOut>
|
||||
friend constexpr It1DOut copy(copy_iterator first, copy_iterator last, It1DOut d_first){
|
||||
return copy_n(first, distance(first, last), d_first);
|
||||
}
|
||||
template<class It1DOut>
|
||||
friend constexpr It1DOut uninitialized_copy(copy_iterator first, copy_iterator last, It1DOut d_first){
|
||||
return copy_n(first, distance(first, last), d_first);
|
||||
}
|
||||
friend constexpr difference_type distance(copy_iterator const& a, copy_iterator const& b){assert(stride(b.it_) == stride(a.it_));
|
||||
return b.it_-a.it_;
|
||||
}
|
||||
constexpr value_type operator*() const{return *it_;}
|
||||
};
|
||||
|
||||
template<class ContextPtr, class It1D, class DecayType = void, class DiffType = typename std::iterator_traits<It1D>::difference_type>
|
||||
class copy_range{
|
||||
ContextPtr ctxp_ = {};
|
||||
It1D begin_, end_;
|
||||
public:
|
||||
using difference_type = DiffType;
|
||||
using iterator = copy_iterator<ContextPtr, It1D>;
|
||||
using decay_type = DecayType;
|
||||
copy_range(copy_range&&) = default;
|
||||
constexpr copy_range(It1D first, It1D last) : begin_{first}, end_{last}{}
|
||||
constexpr copy_range(ContextPtr ctxp, It1D first, It1D last) : ctxp_{ctxp}, begin_{first}, end_{last}{}
|
||||
constexpr difference_type size() const{return end_ - begin_;}
|
||||
constexpr auto begin() const{return iterator{ctxp_, begin_};}
|
||||
constexpr auto end() const{return iterator{ctxp_, end_ };}
|
||||
constexpr typename decay_type::extensions_type extensions() const{return {size()};}
|
||||
template<class Other, class=decltype(Other(std::declval<iterator>(), std::declval<iterator>()))>
|
||||
operator Other() const{return Other(begin(), end());}
|
||||
friend auto operator+(copy_range const& s){return s.operator decay_type();}
|
||||
};
|
||||
|
||||
template<class DecayType, class It> NODISCARD()
|
||||
auto copy(It const& first, It const& last)
|
||||
->decltype(copy_range<void*, It, DecayType>{first, last}){
|
||||
return copy_range<void*, It, DecayType>{first, last};}
|
||||
|
||||
template<class DecayType, class Context, class It> NODISCARD()
|
||||
auto copy(Context&& ctxt, It const& first, It const& last)
|
||||
->decltype(copy_range<Context, It, DecayType>{ctxt, first, last}){
|
||||
return copy_range<Context, It, DecayType>{ctxt, first, last};}
|
||||
|
||||
template<class A> NODISCARD()
|
||||
auto copy(A const& a) // need to specify templates (instead of deduced for intel)
|
||||
->decltype(copy<typename A::decay_type, typename A::const_iterator>(a.begin(), a.end())){
|
||||
return copy<typename A::decay_type, typename A::const_iterator>(a.begin(), a.end());}
|
||||
|
||||
template<class Context, class A, class=std::enable_if_t<blas::is_context<Context>{}>> NODISCARD()
|
||||
auto copy(Context&& ctxt, A const& a)
|
||||
->decltype(copy<typename A::decay_type, Context, typename A::const_iterator>(std::forward<Context>(ctxt), a.begin(), a.end())){
|
||||
return copy<typename A::decay_type, Context, typename A::const_iterator>(std::forward<Context>(ctxt), a.begin(), a.end());}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,598 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --libs blas`&&$0.$X&&rm $0.$X;exit
|
||||
#endif
|
||||
//(for a in `find tests/ -name '*.cpp'`; do sh $a || break; done); exit
|
||||
|
||||
// https://software.intel.com/en-us/articles/intel-mkl-link-line-advisor
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_CORE_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_CORE_HPP
|
||||
|
||||
//#include <cblas/cblas.h> // consider being replaceable by cblas.h
|
||||
|
||||
#include<iostream> // debug
|
||||
#include<cassert>
|
||||
#include<complex>
|
||||
#include<stdint.h> // int64_t
|
||||
#include<limits> // numeric_limits
|
||||
#include<type_traits> // is_convertible
|
||||
#include<cstring> // std::memcpy
|
||||
|
||||
#include "../blas/traits.hpp"
|
||||
#include "../../config/MARK.hpp"
|
||||
|
||||
#if 0
|
||||
#define MULTI_ASSERT1(ExpR) assert (ExpR)
|
||||
#define MULTI_ASSERT2(ExpR, DescriptioN) MULTI_ASSERT1(ExpR && ##DescriptioN)
|
||||
#else
|
||||
#if not defined(NDEBUG)
|
||||
#include<stdexcept>
|
||||
#include<string>
|
||||
#define MULTI_ASSERT1(ExpR) (void)((ExpR)?0:throw std::logic_error("\n" __FILE__ ":"+std::to_string(__LINE__)+"::\n"+std::string(__PRETTY_FUNCTION__)+"\nLogic assertion `" #ExpR "' failed."))
|
||||
#define MULTI_ASSERT2(ExpR, DescriptioN) (void)((ExpR)?0:throw std::DescriptioN("\n" __FILE__ ":"+std::to_string(__LINE__)+"::\n"+std::string(__PRETTY_FUNCTION__)+"\nLogic assertion `" #ExpR "' failed."))
|
||||
#else
|
||||
#define MULTI_ASSERT1(ExpR) assert(ExpR)
|
||||
#define MULTI_ASSERT2(ExpR, DescriptioN) assert(EXpR)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CBLAS_H
|
||||
#define BLAS(NamE) cblas_##NamE
|
||||
#else
|
||||
#define BLAS(NamE) NamE##_
|
||||
extern "C"{
|
||||
|
||||
#ifndef _BLAS_INT
|
||||
#if defined(__INTPTR_WIDTH__)
|
||||
#define _BLAS_INT __INTPTR_WIDTH__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define s float
|
||||
#define d double
|
||||
#define c std::complex<s>
|
||||
#define z std::complex<d>
|
||||
#define v void
|
||||
|
||||
typedef struct { float real, imag; } Complex_float ;
|
||||
typedef struct { double real, imag; } Complex_double;
|
||||
|
||||
#define C Complex_float // _Complex s
|
||||
#define Z Complex_double // _Complex d
|
||||
|
||||
#if defined(_BLAS_INT)
|
||||
#if _BLAS_INT==32
|
||||
#define INT int32_t
|
||||
#elif _BLAS_INT==64
|
||||
#define INT int64_t
|
||||
#else
|
||||
#define INT int32_t // 32bit safe? pesimistic?
|
||||
#endif
|
||||
#else
|
||||
#define INT int32_t // 32bit safe? pesimistic?
|
||||
#endif
|
||||
|
||||
namespace core{
|
||||
using size_t = INT;
|
||||
using ssize_t = std::make_signed_t<size_t>;
|
||||
}
|
||||
|
||||
#define INTEGER INT const&
|
||||
#define N INTEGER n
|
||||
#define INCX INTEGER incx
|
||||
#define INCY INTEGER incy
|
||||
|
||||
static_assert(sizeof(INT)==32/8 or sizeof(INT)==64/8, "please set _BLAS_INT to int32_t or int64_t");
|
||||
|
||||
// TODO indent declarations like here https://www.netlib.org/lapack/lug/node145.html
|
||||
|
||||
#define xROTG(T1, T2) v BLAS( T1##rotg)( T1 const*, T1 const*, T2*, T1*)
|
||||
#define xROTMG(T) v BLAS( T##rotmg)( T*, T*, T*, T const&, T(¶m)[5])
|
||||
#define xROT(TT, T, S) v BLAS( TT##rot )(N, T *x, INCX, T *y, INCY, S const&, S const&)
|
||||
#define xROTM(T) v BLAS( T##rotm )(N, T* x, INCX, T* y, INCY, T const(&p)[5])
|
||||
#define xSWAP(T) v T ##swap##_ (N, T *x, INCX, T *y, INCY)
|
||||
#define xSCAL(TT, TA, TX) v TT##scal##_ (N, TA const& a, TX *x, INCX )
|
||||
#define xCOPY(T) v T ##copy##_ (N, T const *x, INCX, T *y, INCY)
|
||||
#define xAXPY(T) v T ##axpy##_ (N, T const* a, T const *x, INCX, T *y, INCY)
|
||||
#define xDOT(R, TT, T) R BLAS( TT##dot )(N, T const *x, INCX, T const *y, INCY)
|
||||
#if defined(RETURN_BY_STACK) || (defined(FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) && FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID)
|
||||
#define xDOTU(R, T) v BLAS( T##dotu )(R*, N, T const *x, INCX, T const *y, INCY)
|
||||
#define xDOTC(R, T) v T##dotc ##_ (R*, N, T const *x, INCX, T const *y, INCY)
|
||||
#else
|
||||
#define xDOTU(R, T) R T ##dotu##_ ( N, T const *x, INCX, T const *y, INCY)
|
||||
#define xDOTC(R, T) R T ##dotc##_ ( N, T const *x, INCX, T const *y, INCY)
|
||||
#endif
|
||||
#define xxDOT(TT, T) T TT##dot ##_ ( N, T const& a, T const *x, INCX, T const *y, INCY)
|
||||
#define xNRM2(R, TT, T) R TT##nrm2##_ ( N, T const *x, INCX )
|
||||
#define xASUM(R, TT, T) R TT##asum##_ ( N, T const *x, INCX )
|
||||
#define IxAMAX(T) INT i##T ##amax##_ ( N, T const* x, INCX )
|
||||
|
||||
xROTG(s, s) ; xROTG(d,d) ;// MKL extension xROTG(c, s); xROTG(z, d);
|
||||
xROTMG(s) ; xROTMG(d) ;
|
||||
xROT(s, s, s) ; xROT(d, d, d) ; xROT(cs, c, s); xROT(zd, z, d);
|
||||
xROTM(s) ; xROTM(d) ;
|
||||
xSWAP(s) ; xSWAP(d) ; xSWAP(c) ; xSWAP(z);
|
||||
xSCAL(s, s, s); xSCAL(d, d, d); xSCAL(c, c, c); xSCAL(z, z, z); xSCAL(zd, d, z); xSCAL(cs, s, c);
|
||||
xCOPY(s) ; xCOPY(d) ; xCOPY(c) ; xCOPY(z) ;
|
||||
xAXPY(s) ; xAXPY(d) ; xAXPY(c) ; xAXPY(z) ;
|
||||
xDOT(s, s, s); xDOT(d, d, d); xDOT(d, ds, s);
|
||||
|
||||
xDOTU(C, c); xDOTU(Z, z);
|
||||
//xDOTU(c, c); xDOTU(z, z);
|
||||
|
||||
xDOTC(C, c); xDOTC(Z, z);
|
||||
xxDOT(sds, s);
|
||||
xNRM2(s, s, s); xNRM2(d, d, d); xNRM2(s, sc, c); xNRM2(d, dz, z);
|
||||
xASUM(s, s, s); xASUM(d, d, d); xASUM(s, sc, c); xASUM(d, dz, z);
|
||||
IxAMAX(s); IxAMAX(d); IxAMAX(c); IxAMAX(z);
|
||||
|
||||
#define TRANS const char& trans
|
||||
#define NR INTEGER nr
|
||||
#define NC INTEGER nc
|
||||
#define LDA INTEGER lda
|
||||
#define UPLO const char& uplo
|
||||
#define DIAG const char& diag
|
||||
|
||||
#define xGEMV(T) void T## gemv ##_ ( TRANS, NR, NC, T const& a, T const* A, LDA, T const* X, INCX, T const& beta, T* Y, INCY )
|
||||
#define xGER(T) void T## ger ##_ ( NR, NC, T const& a, T const* X, INCX, T const* Y, INCY, T* A, LDA)
|
||||
#define xGERU(T) void T## geru ##_ ( NR, NC, T const& a, T const* X, INCX, T const* Y, INCY, T* A, LDA)
|
||||
#define xGERC(T) void T## gerc ##_ ( NR, NC, T const& a, T const* X, INCX, T const* Y, INCY, T* A, LDA)
|
||||
#define xTRSV(T) void T## trsv ##_ (UPLO, TRANS, DIAG, N, T const* A, LDA, T* X , INCX )
|
||||
|
||||
xGEMV(s); xGEMV(d); xGEMV(c); xGEMV(z);
|
||||
xGER(s); xGER(d);
|
||||
xGERU(c); xGERU(z);
|
||||
xGERC(c); xGERC(z);
|
||||
xTRSV(s); xTRSV(d); xTRSV(c); xTRSV(z);
|
||||
|
||||
#define TRANSA const char& transa
|
||||
#define TRANSB const char& transb
|
||||
#define NK INTEGER nk
|
||||
#define LDB INTEGER ldb
|
||||
#define LDC INTEGER ldc
|
||||
|
||||
#define SIDE const char& side
|
||||
|
||||
#define xGEMM(T) void T ##gemm ##_ ( TRANSA, TRANSB, NR, NC, NK, T const& a, T const* A, LDA, T const* B, LDB, T const& b , T const* CC, LDC)
|
||||
#define xSYRK(T) void T ##syrk ##_ ( UPLO, TRANSA, NR, NK, T const& a, T const* A, LDA, T const& b , T* CC, LDC)
|
||||
#define xHERK(TT, T) void T ##herk ##_ ( UPLO, TRANSA, NR, NK, TT const& a, T const* A, LDA, TT const& b , T* CC, LDC)
|
||||
#define xTRSM(T) void T ##trsm ##_ (SIDE, UPLO, TRANSA, DIAG, NR, NK, T const& a, T const* A, LDA, T const* B, LDB )
|
||||
|
||||
xGEMM(s); xGEMM(d); xGEMM(c) ; xGEMM(z) ;
|
||||
xSYRK(s); xSYRK(d); xSYRK(c) ; xSYRK(z) ;
|
||||
xHERK(s, c); xHERK(d, z);
|
||||
xTRSM(s); xTRSM(d); xTRSM(c) ; xTRSM(z) ;
|
||||
|
||||
#undef TRANS
|
||||
#undef UPLO
|
||||
#undef SIDE
|
||||
#undef DIAG
|
||||
#undef xROTG
|
||||
#undef xROTMG
|
||||
#undef xROT
|
||||
#undef xROTM
|
||||
#undef xSCAL
|
||||
#undef xSWAP
|
||||
#undef xCOPY
|
||||
#undef xAXPY
|
||||
#undef xDOT
|
||||
#undef xDOTU
|
||||
#undef xDOTC
|
||||
#undef xxDOT
|
||||
#undef xNRM2
|
||||
#undef xASUM
|
||||
#undef IxAMAX
|
||||
#undef xGEMV
|
||||
#undef xGER
|
||||
#undef xGERU
|
||||
#undef xGERC
|
||||
#undef xGEMM
|
||||
#undef xHERK
|
||||
#undef xTRSM
|
||||
|
||||
#undef s
|
||||
#undef d
|
||||
#undef c
|
||||
#undef z
|
||||
#undef C
|
||||
#undef Z
|
||||
#undef v
|
||||
#undef INTEGER
|
||||
#undef N
|
||||
#undef INCX
|
||||
#undef INCY
|
||||
#undef TRANSA
|
||||
#undef TRANSB
|
||||
#undef LDA
|
||||
#undef LDB
|
||||
#undef LDC
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
|
||||
template<class T> struct complex_ptr{
|
||||
std::complex<T>* impl_;
|
||||
template<class TT, class=std::enable_if_t<sizeof(*TT{})==sizeof(std::complex<T>) and sizeof(*TT{})==sizeof(TT{}->real())+sizeof(TT{}->imag())>>
|
||||
explicit complex_ptr(TT tt) : impl_{reinterpret_cast<std::complex<T>*>(tt)}{}
|
||||
complex_ptr(complex_ptr const&) = delete;
|
||||
operator std::complex<T>*() const{return impl_;}
|
||||
std::complex<T>& operator*() const{return *impl_;}
|
||||
};
|
||||
|
||||
template<class T> struct complex_const_ptr{
|
||||
std::complex<T> const* impl_;
|
||||
template<class TT, class=std::enable_if_t<sizeof(*TT{})==sizeof(std::complex<T>) and sizeof(*TT{})==sizeof(TT{}->real())+sizeof(TT{}->imag())>>
|
||||
explicit complex_const_ptr(TT tt) : impl_{reinterpret_cast<std::complex<T> const*>(tt)}{}
|
||||
complex_const_ptr(complex_const_ptr const&) = delete;
|
||||
operator std::complex<T> const*() const{return impl_;}
|
||||
std::complex<T> const& operator*() const{return *impl_;}
|
||||
};
|
||||
|
||||
template<class T> struct add_ptr{using type = T*;};
|
||||
template<class T> struct add_const_ptr{using type = T const*;};
|
||||
|
||||
template<class T> struct add_ptr<std::complex<T>>{using type = complex_ptr<T>;};
|
||||
template<class T> struct add_const_ptr<std::complex<T>>{using type = complex_const_ptr<T>;};
|
||||
|
||||
template<class T> using add_ptr_t = typename add_ptr<T>::type;
|
||||
template<class T> using add_const_ptr_t = typename add_const_ptr<T>::type;
|
||||
|
||||
namespace{
|
||||
using s = float;
|
||||
using d = double;
|
||||
using c = std::complex<s>; using C = Complex_float ;
|
||||
using z = std::complex<d>; using Z = Complex_double;
|
||||
using v = void;
|
||||
}
|
||||
|
||||
#define BC(x) [](auto xx){assert(xx>=std::numeric_limits<INT>::min() and xx<std::numeric_limits<INT>::max()); return xx;}(x)
|
||||
|
||||
#define xrotg(T1, T2) v rotg (T1 const& a, T1 const& b, T2& cc, T1& ss ){ BLAS(T1##rotg )(const_cast<T1*>(&a), const_cast<T1*>(&b), &cc, &ss); }
|
||||
#define xrotmg(T) v rotmg(T& d1, T& d2, T& A, T const& B, T(&p)[5] ){ BLAS( T##rotmg)(&d1, &d2, &A, B, p); }
|
||||
#define xrot(T, TT, CS) template<class S> v rot (S n, T *x, S incx, T *y, S incy, CS const& cos, CS const& sin){ BLAS(TT##rot )(BC(n), x, BC(incx), y, BC(incy), cos, sin); }
|
||||
#define xrotm(T) template<class S> v rotm (S n, T *x, S incx, T *y, S incy, T const(&p)[5] ){ BLAS( T##rotm )(BC(n), x, BC(incx), y, BC(incy), p); }
|
||||
#define xswap(T) template<class S> v swap (S n, T *x, S incx, T *y, S incy ){ BLAS( T##swap )(BC(n), x, BC(incx), y, BC(incy)); }
|
||||
#define xscal(XX, TA, TX) TX* scal (INT n, TA const* a, TX *x, INT incx ){ BLAS(XX##scal )(BC(n), *a, x, BC(incx) ); return x+n*incx;}
|
||||
//#define xcopy(T) v copy (INT n, T const *x, INT incx, T *y, INT incy ){ BLAS( T##copy )(BC(n), x, BC(incx), y, BC(incy)); }
|
||||
//#define xaxpy(T) template<class S> T* axpy (S n, T a, T const *x, S incx, T *y, S incy ){ BLAS( T##axpy )(BC(n), a, x, BC(incx), y, BC(incy)); return y+n*incy; }
|
||||
#define xdot(R, TT, T) template<class S> v dot (S n, T const* x, S incx, T const* y, S incy, R* r ){\
|
||||
MULTI_MARK_SCOPE("cpu_dot"); *r = BLAS(TT##dot )(BC(n), x, BC(incx), y, BC(incy)); }
|
||||
|
||||
xrotg(s, s) xrotg(d, d) //MKL extension xrotg(c, s); xrotg(z, d);
|
||||
xrotmg(s) xrotmg(d)
|
||||
xrot(s, s, s) xrot(d, d, d) xrot(c, cs, s) xrot(z, zd, d)
|
||||
xrotm(s) xrotm(d)
|
||||
xswap(s) xswap(d) xswap(c) xswap(z)
|
||||
|
||||
namespace core{
|
||||
|
||||
xscal(s, s, s) xscal(d, d, d) xscal(c, c, c) xscal(z, z, z) xscal(zd, d, z) xscal(cs, s, c)
|
||||
|
||||
using std::enable_if_t;
|
||||
using std::is_assignable;
|
||||
template<class SX, class SY, enable_if_t<is_s<SX>{} and is_s<SY>{} and is_assignable<SY&, SX&>{},int> =0> void copy(size_t n, SX* x, size_t incx, SY* y, size_t incy){BLAS(scopy)(n, ( float const*)(x), incx, ( float *)(y), incy);}
|
||||
template<class DX, class DY, enable_if_t<is_d<DX>{} and is_d<DY>{} and is_assignable<DY&, DX&>{},int> =0> void copy(size_t n, DX* x, size_t incx, DY* y, size_t incy){BLAS(dcopy)(n, ( double const*)(x), incx, ( double *)(y), incy);}
|
||||
template<class CX, class CY, enable_if_t<is_c<CX>{} and is_c<CY>{} and is_assignable<CY&, CX&>{},int> =0> void copy(size_t n, CX* x, size_t incx, CY* y, size_t incy){BLAS(ccopy)(n, (std::complex<float > const*)(x), incx, (std::complex<float >*)(y), incy);}
|
||||
template<class ZX, class ZY, enable_if_t<is_z<ZX>{} and is_z<ZY>{} and is_assignable<ZY&, ZX&>{},int> =0> void copy(size_t n, ZX* x, size_t incx, ZY* y, size_t incy){BLAS(zcopy)(n, (std::complex<double> const*)(x), incx, (std::complex<double>*)(y), incy);}
|
||||
|
||||
xdot(s, s, s) xdot(d, d, d) xdot(d, ds, s)
|
||||
|
||||
using std::pointer_traits;
|
||||
using std::enable_if_t;
|
||||
using std::is_convertible_v;
|
||||
|
||||
#define xaxpy(T) \
|
||||
template<class ALPHA, class SXP, class SX = typename pointer_traits<SXP>::element_type, class SYP, class SY = typename pointer_traits<SYP>::element_type, enable_if_t< \
|
||||
is_##T<ALPHA>{} and is_##T<SX>{} and is_##T<SY>{} and is_assignable<SY&, decltype(ALPHA{}*SX{})>{} \
|
||||
and is_convertible_v<SXP, SX*> and is_convertible_v<SYP, SY*> \
|
||||
, int> =0> \
|
||||
void axpy(size_t n, ALPHA const* a, SXP x, size_t incx, SYP y, size_t incy){BLAS(T##axpy)(n, (T const *)a, (T const*)static_cast<SX*>(x), incx, (T*)static_cast<SY*>(y), incy);}
|
||||
|
||||
xaxpy(s) xaxpy(d) xaxpy(c) xaxpy(z)
|
||||
#undef xaxpy
|
||||
//template<class A, class SX, class SY, enable_if_t<is_s<SX>{} and is_s<SY>{} and is_assignable<SY&, decltype(A{}*SX{})>{}, int> =0> void axpy(size_t n, A a, SX* x, size_t incx, SY* y, size_t incy){BLAS(saxpy)(n, a, (s const*)(x), incx, (s*)(y), incy);}
|
||||
//template<class A, class DX, class DY, enable_if_t<is_d<DX>{} and is_d<DY>{} and is_assignable<DY&, decltype(A{}*DX{})>{}, int> =0> void axpy(size_t n, A a, DX* x, size_t incx, DY* y, size_t incy){BLAS(daxpy)(n, a, (d const*)(x), incx, (d*)(y), incy);}
|
||||
//template<class A, class CX, class CY, enable_if_t<is_c<CX>{} and is_c<CY>{} and is_assignable<CY&, decltype(A{}*CX{})>{}, int> =0> void axpy(size_t n, A a, CX* x, size_t incx, CY* y, size_t incy){BLAS(caxpy)(n, a, (c const*)(x), incx, (c*)(y), incy);}
|
||||
//template<class A, class ZX, class ZY, enable_if_t<is_z<ZX>{} and is_z<ZY>{} and is_assignable<ZY&, decltype(A{}*ZX{})>{}, int> =0> void axpy(size_t n, A a, ZX* x, size_t incx, ZY* y, size_t incy){BLAS(zaxpy)(n, a, (z const*)(x), incx, (z*)(y), incy);}
|
||||
|
||||
}
|
||||
|
||||
template<class R, class S, class T> R dot(S n, T const* x, S incx, T const* y, S incy){
|
||||
R ret;
|
||||
dot(n, x, incx, y, incy, &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class S, class T> T dot(S n, T const* x, S incx, T const* y, S incy){
|
||||
return dot<T, S, T>(n, x, incx, y, incy);
|
||||
}
|
||||
|
||||
#undef xrotg
|
||||
#undef xrot
|
||||
#undef xswap
|
||||
#undef xscal
|
||||
#undef xcopy
|
||||
#undef xaxpy
|
||||
#undef xdot
|
||||
|
||||
#ifndef CBLAS_H
|
||||
|
||||
namespace core{
|
||||
|
||||
using std::enable_if_t;
|
||||
using std::is_assignable;
|
||||
|
||||
#if defined(RETURN_BY_STACK) || (defined(FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) && FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID)
|
||||
template<class X, class Y, class R, enable_if_t<is_c<X>{} and is_c<Y>{} and is_assignable<R&, decltype(0.+X{}*Y{}+X{}*Y{})>{}, int> =0> void dotu(size_t n, X* x, size_t incx, Y* y, size_t incy, R* r){BLAS(cdotu)((Complex_float *)r, n, (c const*)x, incx, (c const*)y, incy);}
|
||||
template<class X, class Y, class R, enable_if_t<is_z<X>{} and is_z<Y>{} and is_assignable<R&, decltype(0.+X{}*Y{}+X{}*Y{})>{}, int> =0> void dotu(size_t n, X* x, size_t incx, Y* y, size_t incy, R* r){BLAS(zdotu)((Complex_double*)r, n, (z const*)x, incx, (z const*)y, incy);}
|
||||
|
||||
template<class X, class Y, class R, enable_if_t<is_c<X>{} and is_c<Y>{} and is_assignable<R&, decltype(0.+X{}*Y{}+X{}*Y{})>{}, int> =0> void dotc(size_t n, X* x, size_t incx, Y* y, size_t incy, R* r){BLAS(cdotc)((Complex_float *)r, n, (c const*)x, incx, (c const*)y, incy);}
|
||||
template<class X, class Y, class R, enable_if_t<is_z<X>{} and is_z<Y>{} and is_assignable<R&, decltype(0.+X{}*Y{}+X{}*Y{})>{}, int> =0> void dotc(size_t n, X* x, size_t incx, Y* y, size_t incy, R* r){BLAS(zdotc)((Complex_double*)r, n, (z const*)x, incx, (z const*)y, incy);}
|
||||
#else
|
||||
template<class XP, class X = typename std::pointer_traits<XP>::element_type, class YP, class Y = typename std::pointer_traits<YP>::element_type, class RP, class R = typename std::pointer_traits<RP>::element_type, enable_if_t<is_c<X>{} and is_c<Y>{} and is_assignable<R&, decltype(0.+X{}*Y{}+X{}*Y{})>{}, int> =0> void dotu(size_t n, XP x, size_t incx, YP y, size_t incy, RP r){auto rr = BLAS(cdotu)(n, (c const*)static_cast<X*>(x), incx, (c const*)static_cast<Y*>(y), incy); std::memcpy(reinterpret_cast<float (*)[2]>(static_cast<R*>(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));}
|
||||
template<class XP, class X = typename std::pointer_traits<XP>::element_type, class YP, class Y = typename std::pointer_traits<YP>::element_type, class RP, class R = typename std::pointer_traits<RP>::element_type, enable_if_t<is_z<X>{} and is_z<Y>{} and is_assignable<R&, decltype(0.+X{}*Y{}+X{}*Y{})>{}, int> =0> void dotu(size_t n, XP x, size_t incx, YP y, size_t incy, RP r){auto rr = BLAS(zdotu)(n, (z const*)static_cast<X*>(x), incx, (z const*)static_cast<Y*>(y), incy); std::memcpy(reinterpret_cast<double(*)[2]>(static_cast<R*>(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));}
|
||||
|
||||
template<class XP, class X = typename std::pointer_traits<XP>::element_type, class YP, class Y = typename std::pointer_traits<YP>::element_type, class RP, class R = typename std::pointer_traits<RP>::element_type, enable_if_t<is_c<X>{} and is_c<Y>{} and is_assignable<R&, decltype(0.+X{}*Y{}+X{}*Y{})>{}, int> =0> void dotc(size_t n, XP x, size_t incx, YP y, size_t incy, RP r){auto rr = BLAS(cdotc)(n, (c const*)static_cast<X*>(x), incx, (c const*)static_cast<Y*>(y), incy); std::memcpy(reinterpret_cast<float (*)[2]>(static_cast<R*>(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));}
|
||||
template<class XP, class X = typename std::pointer_traits<XP>::element_type, class YP, class Y = typename std::pointer_traits<YP>::element_type, class RP, class R = typename std::pointer_traits<RP>::element_type, enable_if_t<is_z<X>{} and is_z<Y>{} and is_assignable<R&, decltype(0.+X{}*Y{}+X{}*Y{})>{}, int> =0> void dotc(size_t n, XP x, size_t incx, YP y, size_t incy, RP r){auto rr = BLAS(zdotc)(n, (z const*)static_cast<X*>(x), incx, (z const*)static_cast<Y*>(y), incy); std::memcpy(reinterpret_cast<double(*)[2]>(static_cast<R*>(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));}
|
||||
#endif
|
||||
|
||||
}
|
||||
#else
|
||||
// TODO: make cblas version
|
||||
#define xdotu(T) template<class S> v dotu(S n, add_const_ptr_t<T> x, S incx, add_const_ptr_t<T> y, S incy, add_ptr_t<T> r){BLAS(T##dotu_sub)(BC(n), x, BC(incx), y, BC(incy), r);}
|
||||
#define xdotc(T) template<class S> v dotc(S n, add_const_ptr_t<T> x, S incx, add_const_ptr_t<T> y, S incy, add_ptr_t<T> r){BLAS(T##dotc_sub)(BC(n), x, BC(incx), y, BC(incy), r);}
|
||||
|
||||
namespace core{
|
||||
xdotu(c) xdotu(z)
|
||||
xdotc(c) xdotc(z)
|
||||
}
|
||||
|
||||
#undef xdotu
|
||||
#undef xdotc
|
||||
#endif
|
||||
|
||||
namespace core{
|
||||
template<class S> s dot(S n, s const& b, s const* x, S incx, s const* y, S incy){return BLAS(sdsdot)(BC(n), b, x, BC(incx), y, BC(incy));}
|
||||
|
||||
//template<class S> void dot(S n, s const& b, s const* x, S incx, s const* y, S incy, s* result){*result = BLAS(sdsdot)(BC(n), b, x, BC(incx), y, BC(incy));}
|
||||
|
||||
}
|
||||
|
||||
//#define xnrm2(R, T, TT) template<class S> v nrm2 (S n, add_const_ptr_t<T> x, S incx, R* r){*r = BLAS(TT##nrm2 )(BC(n), x, BC(incx));}
|
||||
|
||||
#define xasum(T, TT) template<class S> auto asum (S n, T const* x, S incx){return BLAS(TT##asum )(BC(n), x, BC(incx));}
|
||||
#define ixamax(T) template<class S> auto iamax(S n, T const* x, S incx){return BLAS(i##T##amax)(BC(n), x, BC(incx)) - 1;}
|
||||
xasum(s, s) xasum(d, d) xasum (c, sc) xasum(z, dz)
|
||||
namespace core{
|
||||
// xnrm2(s, s, s) xnrm2(d, d, d) xnrm2(s, c, sc) xnrm2(d, z, dz)
|
||||
|
||||
template<class XP, class X = typename std::pointer_traits<XP>::element_type, class RP, class R = typename std::pointer_traits<RP>::element_type, enable_if_t<is_s<X>{} and is_s<R>{} and std::is_assignable<R&, decltype(X{})>{} , int> =0> void nrm2(size_t n, XP x, size_t incx, RP r){auto rr = BLAS(snrm2) (n, (s const*)static_cast<X*>(x), incx); std::memcpy((s*)static_cast<R*>(r), &rr, sizeof(s));}
|
||||
template<class XP, class X = typename std::pointer_traits<XP>::element_type, class RP, class R = typename std::pointer_traits<RP>::element_type, enable_if_t<is_d<X>{} and is_d<R>{} and std::is_assignable<R&, decltype(X{})>{} , int> =0> void nrm2(size_t n, XP x, size_t incx, RP r){auto rr = BLAS(dnrm2) (n, (d const*)static_cast<X*>(x), incx); std::memcpy((s*)static_cast<R*>(r), &rr, sizeof(d));}
|
||||
|
||||
template<class XP, class X = typename std::pointer_traits<XP>::element_type, class RP, class R = typename std::pointer_traits<RP>::element_type, enable_if_t<is_c<X>{} and is_s<R>{} and std::is_assignable<R&, decltype(std::norm(X{}))>{}, int> =0> void nrm2(size_t n, XP x, size_t incx, RP r){auto rr = BLAS(scnrm2)(n, (c const*)static_cast<X*>(x), incx); std::memcpy((s*)static_cast<R*>(r), &rr, sizeof(s));}
|
||||
template<class XP, class X = typename std::pointer_traits<XP>::element_type, class RP, class R = typename std::pointer_traits<RP>::element_type, enable_if_t<is_z<X>{} and is_d<R>{} and std::is_assignable<R&, decltype(std::norm(X{}))>{}, int> =0> void nrm2(size_t n, XP x, size_t incx, RP r){auto rr = BLAS(dznrm2)(n, (z const*)static_cast<X*>(x), incx); std::memcpy((s*)static_cast<R*>(r), &rr, sizeof(d));}
|
||||
|
||||
|
||||
// template<class S> v nrm2 (S n, typename add_const_ptr<std::complex<double>>::type x, S incx, d* r){*r = BLAS(dznrm2 )(BC(n), x, BC(incx));}
|
||||
|
||||
ixamax(s) ixamax(d) ixamax(c) ixamax(z)
|
||||
}
|
||||
#undef xnrm2
|
||||
#undef xasum
|
||||
#undef ixamax
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// LEVEL2
|
||||
#define xgemv(T) template<class C, class S> v gemv(C trans, S m, S n, T const& a, T const* A, S lda, T const* X, S incx, T beta, T* Y, S incy ){BLAS(T##gemv)(trans, BC(m), BC(n), a, A, BC(lda), X, BC(incx), beta, Y, BC(incy) );}
|
||||
#define xger(T) template< class S> v ger ( S m, S n, T const& a, T const* X, S incx, T const* Y, S incy, T* A, S lda){BLAS(T##ger )( BC(m), BC(n), a, X, BC(incx), Y, BC(incy), A, BC(lda));}
|
||||
template< class S> v ger ( S m, S n, c const& a, c const* X, S incx, c const* Y, S incy, c* A, S lda){BLAS(cgeru )( BC(m), BC(n), a, X, BC(incx), Y, BC(incy), A, BC(lda));}
|
||||
template< class S> v ger ( S m, S n, z const& a, z const* X, S incx, z const* Y, S incy, z* A, S lda){BLAS(zgeru )( BC(m), BC(n), a, X, BC(incx), Y, BC(incy), A, BC(lda));}
|
||||
#define xgeru(T) template< class S> v geru( S m, S n, T const& a, T const* X, S incx, T const* Y, S incy, T* A, S lda){BLAS(T##geru)( BC(m), BC(n), a, X, BC(incx), Y, BC(incy), A, BC(lda));}
|
||||
#define xgerc(T) template< class S> v gerc( S m, S n, T const& a, T const* X, S incx, T const* Y, S incy, T* A, S lda){BLAS(T##gerc)( BC(m), BC(n), a, X, BC(incx), Y, BC(incy), A, BC(lda));}
|
||||
|
||||
namespace core{
|
||||
|
||||
//xgemv(s) xgemv(d) xgemv(c) xgemv(z)
|
||||
xger(s) xger(d)
|
||||
xgeru(c) xgeru(z)
|
||||
xgerc(c) xgerc(z)
|
||||
|
||||
|
||||
using std::enable_if_t;
|
||||
using std::is_assignable;
|
||||
|
||||
template<class A, class M, class X, class B, class Y, enable_if_t<is_s<M>{} and is_s<X>{} and is_s<Y>{} and is_assignable<Y&, decltype(A{}*M{}*X{}+B{}*Y{})>{}, int> =0> void gemv(char trans, size_t m, size_t n, A const& a, M* ma, size_t lda, X* x, size_t incx, B b, Y* y, size_t incy){BLAS(sgemv)(trans, m, n, a, (s const*)ma, lda, (s const*)x, incx, b, (s*)y, incy);}
|
||||
template<class A, class M, class X, class B, class Y, enable_if_t<is_d<M>{} and is_d<X>{} and is_d<Y>{} and is_assignable<Y&, decltype(A{}*M{}*X{}+B{}*Y{})>{}, int> =0> void gemv(char trans, size_t m, size_t n, A const& a, M* ma, size_t lda, X* x, size_t incx, B b, Y* y, size_t incy){BLAS(dgemv)(trans, m, n, a, (d const*)ma, lda, (d const*)x, incx, b, (d*)y, incy);}
|
||||
template<class A, class M, class X, class B, class Y, enable_if_t<is_c<M>{} and is_c<X>{} and is_c<Y>{} and is_assignable<Y&, decltype(A{}*M{}*X{}+B{}*Y{})>{}, int> =0> void gemv(char trans, size_t m, size_t n, A const& a, M* ma, size_t lda, X* x, size_t incx, B b, Y* y, size_t incy){BLAS(cgemv)(trans, m, n, a, (c const*)ma, lda, (c const*)x, incx, b, (c*)y, incy);}
|
||||
template<class A, class M, class X, class B, class Y, enable_if_t<is_z<M>{} and is_z<X>{} and is_z<Y>{} and is_assignable<Y&, decltype(A{}*M{}*X{}+B{}*Y{})>{}, int> =0> void gemv(char trans, size_t m, size_t n, A const& a, M* ma, size_t lda, X* x, size_t incx, B b, Y* y, size_t incy){BLAS(zgemv)(trans, m, n, a, (z const*)ma, lda, (z const*)x, incx, b, (z*)y, incy);}
|
||||
|
||||
//template<class SX, class SY, enable_if_t<is_s<SX>{} and is_s<SY>{} and is_assignable<SY&, SX&>{},int> =0> void copy(size_t n, SX* x, size_t incx, SY* y, size_t incy){BLAS(scopy)(n, ( float const*)(x), incx, ( float *)(y), incy);}
|
||||
//template<class DX, class DY, enable_if_t<is_d<DX>{} and is_d<DY>{} and is_assignable<DY&, DX&>{},int> =0> void copy(size_t n, DX* x, size_t incx, DY* y, size_t incy){BLAS(dcopy)(n, ( double const*)(x), incx, ( double *)(y), incy);}
|
||||
//template<class CX, class CY, enable_if_t<is_c<CX>{} and is_c<CY>{} and is_assignable<CY&, CX&>{},int> =0> void copy(size_t n, CX* x, size_t incx, CY* y, size_t incy){BLAS(ccopy)(n, (std::complex<float > const*)(x), incx, (std::complex<float >*)(y), incy);}
|
||||
//template<class ZX, class ZY, enable_if_t<is_z<ZX>{} and is_z<ZY>{} and is_assignable<ZY&, ZX&>{},int> =0> void copy(size_t n, ZX* x, size_t incx, ZY* y, size_t incy){BLAS(zcopy)(n, (std::complex<double> const*)(x), incx, (std::complex<double>*)(y), incy);}
|
||||
|
||||
|
||||
}
|
||||
|
||||
template<class T>
|
||||
struct blas2{
|
||||
// template<class S>
|
||||
// static v trsv(char ulA, char transA, char di, S m, T const* A, S lda, T* X, S incx) = delete;
|
||||
};
|
||||
|
||||
template<> struct blas2<s>{template<class... As> static v trsv(As... as) {BLAS(strsv)(as...);}};
|
||||
template<> struct blas2<d>{template<class... As> static v trsv(As... as) {BLAS(dtrsv)(as...);}};
|
||||
template<> struct blas2<c>{template<class... As> static v trsv(As... as) {BLAS(ctrsv)(as...);}};
|
||||
template<> struct blas2<z>{template<class... As> static auto trsv(As... as)->decltype(BLAS(ztrsv)(as...)){BLAS(ztrsv)(as...);}};
|
||||
|
||||
namespace core{
|
||||
template<typename TconstP, typename TP, typename S=std::size_t, typename C=char> v trsv(C ulA, C transA, C diA, S n, TconstP A, S lda, TP X, S incx){blas2<std::decay_t<typename std::pointer_traits<TP>::element_type>>::trsv(ulA, transA, diA, n, A, lda, X, incx);}
|
||||
}
|
||||
|
||||
#undef xgemv
|
||||
#undef xger
|
||||
#undef xgeru
|
||||
#undef xgerc
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// LEVEL 3
|
||||
|
||||
#define xsyrk(T) template<class UL, class C, class S> v syrk( UL ul, C transA, S n, S k, T alpha, T const* A, S lda, T beta, T* CC, S ldc){\
|
||||
MULTI_MARK_SCOPE("cpu_syrk"); BLAS(T##syrk)( ul, transA, BC(n), BC(k), alpha, A, BC(lda), beta, CC, BC(ldc));}
|
||||
|
||||
namespace core{
|
||||
|
||||
using std::is_convertible_v;
|
||||
using std::pointer_traits;
|
||||
using std::enable_if_t;
|
||||
using std::max;
|
||||
|
||||
#define xherk(T) \
|
||||
template<class UL, class C, class S, class ALPHA, class AAP, class AA = typename pointer_traits<AAP>::element_type, class BETA, class CCP, class CC = typename pointer_traits<CCP>::element_type, class Real = typename T::value_type,\
|
||||
enable_if_t< \
|
||||
is_##T<AA>{} and is_##T<CC>{} and is_assignable<CC&, decltype(ALPHA{}*AA{}*AA{})>{} and \
|
||||
is_convertible_v<AAP, AA*> and is_convertible_v<CCP, CC*> \
|
||||
, int> =0> \
|
||||
v herk( UL ul, C transA, S n, S k, ALPHA const* alpha, AAP aa, S lda, BETA const* beta, CCP cc, S ldc) \
|
||||
/*=delete;*/ \
|
||||
{ \
|
||||
if(transA == 'N' or transA == 'n') MULTI_ASSERT1( lda >= max(1l, n) ); else MULTI_ASSERT1( lda >= max(1l, k) ); \
|
||||
MULTI_ASSERT1( ldc >= max(1l, n) ); \
|
||||
MULTI_MARK_SCOPE("cpu_herk"); BLAS(T##herk)( ul, transA, BC(n), BC(k), *(Real const*)alpha, aa, BC(lda), *(Real const*)beta, cc, BC(ldc)); \
|
||||
}
|
||||
|
||||
#define xgemm(T) \
|
||||
template<class ALPHA, class AAP, class AA = typename pointer_traits<AAP>::element_type, class BBP, class BB = typename pointer_traits<BBP>::element_type, class BETA, class CCP, class CC = typename pointer_traits<CCP>::element_type, \
|
||||
enable_if_t< \
|
||||
is_##T<AA>{} and is_##T<BB>{} and is_##T<CC>{} and is_assignable<CC&, decltype(ALPHA{}*AA{}*BB{})>{} and \
|
||||
is_convertible_v<AAP, AA*> and is_convertible_v<BBP, BB*> and is_convertible_v<CCP, CC*> \
|
||||
, int> =0 > \
|
||||
v gemm(char transA, char transB, ssize_t m, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb, BETA const* beta, CCP cc, ssize_t ldc) \
|
||||
{ \
|
||||
MULTI_MARK_SCOPE("cpu_gemm"); \
|
||||
using std::max; \
|
||||
if(transA =='N') MULTI_ASSERT1(lda >= max(1l, m)); else MULTI_ASSERT1(lda >= max(1l, k)); \
|
||||
if(transB =='N') MULTI_ASSERT1(ldb >= max(1l, k)); else MULTI_ASSERT1(ldb >= max(1l, n)); \
|
||||
MULTI_ASSERT1( aa != cc ); \
|
||||
MULTI_ASSERT1( bb != cc ); \
|
||||
MULTI_ASSERT1(ldc >= max(ssize_t{1}, m)); \
|
||||
if(*beta != 0.) MULTI_ASSERT1((is_assignable<CC&, decltype(ALPHA{}*AA{}*BB{} + BETA{}*CC{})>{})); \
|
||||
BLAS(T##gemm)(transA, transB, BC(m), BC(n), BC(k), *(T const*)alpha, (T const*)static_cast<AA*>(aa), BC(lda), (T const*)static_cast<BB*>(bb), BC(ldb), *(T const*)beta, (T*)static_cast<CC*>(cc), BC(ldc)); \
|
||||
}
|
||||
|
||||
xgemm(s) xgemm(d) xgemm(c) xgemm(z)
|
||||
#undef xgemm
|
||||
|
||||
#define xtrsm(T) \
|
||||
template<class ALPHA, class AAP, class AA = typename pointer_traits<AAP>::element_type, class BBP, class BB = typename pointer_traits<BBP>::element_type, \
|
||||
enable_if_t< \
|
||||
is_##T<AA>{} and is_##T<BB>{} and is_assignable<BB&, decltype(AA{}*BB{}/ALPHA{})>{} and is_assignable<BB&, decltype(ALPHA{}*BB{}/AA{})>{} and \
|
||||
is_convertible_v<AAP, AA*> and is_convertible_v<BBP, BB*> \
|
||||
,int> =0> \
|
||||
v trsm(char side, char ul, char transA, char diag, ssize_t m, ssize_t n, ALPHA alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb){ \
|
||||
MULTI_MARK_SCOPE("cpu_trsm"); \
|
||||
assert( side == 'L' or side == 'R' ); \
|
||||
assert( ul == 'U' or ul == 'L' ); \
|
||||
assert( transA == 'N' or transA == 'T' or transA == 'C' ); \
|
||||
assert( diag == 'U' or diag == 'N' ); \
|
||||
MULTI_ASSERT1( m >= 0 and n >= 0 ); \
|
||||
using std::max; \
|
||||
if(side == 'L') MULTI_ASSERT1(lda >= max(ssize_t{1}, m)); else if(side == 'R') assert( lda >= max(ssize_t{1}, n) ); \
|
||||
MULTI_ASSERT1( ldb >= max(ssize_t{1}, m) ); \
|
||||
BLAS(T##trsm)(side, ul, transA, diag, BC(m), BC(n), alpha, (T const*)static_cast<AA*>(aa), BC(lda), (T*)static_cast<BB*>(bb), BC(ldb)); \
|
||||
}
|
||||
xtrsm(s) xtrsm(d) xtrsm(c) xtrsm(z)
|
||||
#undef xtrsm
|
||||
|
||||
xsyrk(s) xsyrk(d) xsyrk(c) xsyrk(z)
|
||||
xherk(c) xherk(z)
|
||||
|
||||
}
|
||||
|
||||
#undef xsyrk
|
||||
#undef xherk
|
||||
#undef xtrsm
|
||||
|
||||
#undef BC
|
||||
|
||||
struct context{ // stateless (and thread safe)
|
||||
template<class... As>
|
||||
static auto axpy(As... as)
|
||||
->decltype(core::axpy(as...)){
|
||||
return core::axpy(as...);}
|
||||
|
||||
template<class... As>
|
||||
static auto gemv(As... as)
|
||||
->decltype(core::gemv(as...)){
|
||||
return core::gemv(as...);}
|
||||
|
||||
template<class... As>
|
||||
static auto gemm(As&&... as)
|
||||
->decltype(core::gemm(std::forward<As>(as)...)){
|
||||
return core::gemm(std::forward<As>(as)...);}
|
||||
|
||||
template<class... As>
|
||||
static auto dot(As&&... as)
|
||||
->decltype(core::dot(std::forward<As>(as)...)){
|
||||
return core::dot(std::forward<As>(as)...);}
|
||||
|
||||
template<class... As>
|
||||
static auto dotc(As&&... as)
|
||||
->decltype(core::dotc(std::forward<As>(as)...)){
|
||||
return core::dotc(std::forward<As>(as)...);}
|
||||
|
||||
template<class... As>
|
||||
static auto dotu(As&&... as)
|
||||
->decltype(core::dotu(std::forward<As>(as)...)){
|
||||
return core::dotu(std::forward<As>(as)...);}
|
||||
|
||||
template<class... As>
|
||||
static auto trsm(As&&... as)
|
||||
->decltype(core::trsm(std::forward<As>(as)...)){
|
||||
return core::trsm(std::forward<As>(as)...);}
|
||||
|
||||
template<class... As>
|
||||
static auto herk(As&&... as)
|
||||
->decltype(core::herk(std::forward<As>(as)...)){
|
||||
return core::herk(std::forward<As>(as)...);}
|
||||
};
|
||||
|
||||
template<class Context> struct is_context : std::false_type{};
|
||||
template<> struct is_context<context> : std::true_type{};
|
||||
template<> struct is_context<context&&> : std::true_type{};
|
||||
template<> struct is_context<context&> : std::true_type{};
|
||||
template<> struct is_context<context const&> : std::true_type{};
|
||||
|
||||
template<> struct is_context<void*&> : std::true_type{};
|
||||
|
||||
namespace core{
|
||||
template<class Context, class... As>
|
||||
auto copy(Context&&, As... as)
|
||||
->decltype(core::copy(as...)){
|
||||
return core::copy(as...);}
|
||||
}
|
||||
|
||||
template<class TPtr, std::enable_if_t<std::is_convertible<TPtr, typename std::pointer_traits<TPtr>::element_type*>{}, int> =0>
|
||||
blas::context* default_context_of(TPtr const&){return {};}
|
||||
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../utility.hpp"
|
||||
|
||||
int main(){}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,545 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
$CXXX $CXXFLAGS -include"boost/log/trivial.hpp" -D'MULTI_MARK_SCOPE(MsG)=BOOST_LOG_TRIVIAL(trace)<<MsG' -DBOOST_LOG_DYN_LINK $0 -o $0x `pkg-config --cflags --libs cudart-11.0 cublas-11.0 blas` -lboost_unit_test_framework -lboost_log -lboost_thread -lboost_system -lboost_log_setup -lpthread&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_CUDA_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_CUDA_HPP
|
||||
|
||||
#include "../blas/../../config/MARK.hpp" // MULTI_MARK_SCOPE
|
||||
|
||||
#include "../../adaptors/blas/core.hpp" // is_context
|
||||
|
||||
#include "../../memory/adaptors/cuda/ptr.hpp"
|
||||
#include "../../memory/adaptors/cuda/managed/ptr.hpp"
|
||||
#include "../../memory/adaptors/cuda/managed/allocator.hpp"
|
||||
|
||||
#include<cublas_v2.h>
|
||||
|
||||
#include "../cuda/cublas/error.hpp"
|
||||
|
||||
#include<thrust/complex.h>
|
||||
|
||||
#define DECLRETURN(ExpR) ->decltype(ExpR){return ExpR;}
|
||||
#define JUSTRETURN(ExpR) {return ExpR;}
|
||||
|
||||
#include<complex>
|
||||
|
||||
///////////////////
|
||||
|
||||
#include<system_error>
|
||||
|
||||
#define CUBLAS_CALL(CodE) \
|
||||
MULTI_MARK_SCOPE("multi::cublas::"#CodE); \
|
||||
auto s = static_cast<enum boost::multi::cuda::cublas::error>(CodE); \
|
||||
cudaDeviceSynchronize(); /*TODO make this more specific to mananged ptr and specific handle*/ \
|
||||
if(s != boost::multi::cuda::cublas::error::success) throw std::system_error{boost::multi::cuda::cublas::make_error_code(s), "cannot call cublas function "#CodE };
|
||||
|
||||
cublasStatus_t cublasZdot (cublasHandle_t handle, int n,
|
||||
const double2 *x, int incx,
|
||||
const double2 *y, int incy,
|
||||
double2 *result) = delete;
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
|
||||
namespace cublas{
|
||||
using Complex = cuComplex;
|
||||
using DoubleComplex = cuDoubleComplex;
|
||||
namespace {
|
||||
template<class T> struct complex_t;
|
||||
template<> struct complex_t<float>{using type = Complex;};
|
||||
template<> struct complex_t<double>{using type = DoubleComplex;};
|
||||
}
|
||||
template<class T> using complex = typename complex_t<T>::type;
|
||||
|
||||
// 2.2.7. cublasPointerMode_t https://docs.nvidia.com/cuda/cublas/index.html#cublaspointermode_t
|
||||
enum class pointer_mode : std::underlying_type<cublasPointerMode_t>::type{
|
||||
host = CUBLAS_POINTER_MODE_HOST,
|
||||
device = CUBLAS_POINTER_MODE_DEVICE
|
||||
};
|
||||
template<class T> enum pointer_mode scalar_kind(memory::cuda::ptr<T>){return pointer_mode::device;}
|
||||
template<class T> enum pointer_mode scalar_kind(T*){return pointer_mode::host;}
|
||||
}
|
||||
|
||||
using v = void;
|
||||
using S = float;
|
||||
using D = double;
|
||||
using C = cublas::complex<float>;
|
||||
using Z = cublas::complex<double>;
|
||||
|
||||
template<class T = void> struct cublas1{};
|
||||
template<class T = void> struct cublas2{};
|
||||
template<class T = void> struct cublas3{};
|
||||
|
||||
#define DEFINE_CUBLAS1(UppeR, LowR) \
|
||||
template<> struct cublas1<UppeR>{ \
|
||||
template<class...As> static auto iamax(As...as){return cublasI##LowR##amax(as...);} \
|
||||
/*amin */ \
|
||||
template<class...As> static auto asum (As...as){return cublas##UppeR##asum (as...);} \
|
||||
/*axpy */ \
|
||||
template<class...As> static auto copy (As...as){return cublas##UppeR##copy (as...);} \
|
||||
template<class...As> static auto dot (As...as){return cublas##UppeR##dot (as...);} \
|
||||
template<class...As> static auto dotu (As...as){return cublas##UppeR##dotu (as...);} \
|
||||
template<class...As> static auto dotc (As...as){return cublas##UppeR##dotc (as...);} \
|
||||
template<class...As> static auto nrm2 (As...as){return cublas##UppeR##nrm2 (as...);} \
|
||||
/*rot */ \
|
||||
/*rotg */ \
|
||||
/*rotmg*/ \
|
||||
template<class...As> static auto scal (As...as){return cublas##UppeR##scal (as...);} \
|
||||
/*swap */ \
|
||||
}
|
||||
|
||||
DEFINE_CUBLAS1(S, s);
|
||||
DEFINE_CUBLAS1(D, d);
|
||||
|
||||
#define DEFINE_CUBLAS1_COMPLEX(UppeR, LowR, ReaLUppeR, ReaLLowR) \
|
||||
template<> struct cublas1<UppeR>{ \
|
||||
template<class...As> static auto iamax(As...as){return cublasI##LowR##amax(as...);} \
|
||||
/*amin */ \
|
||||
template<class...As> static auto asum (As...as){return cublas##ReaLUppeR##LowR##asum (as...);} \
|
||||
/*axpy */ \
|
||||
template<class...As> static auto copy (As...as){return cublas##UppeR##copy (as...);} \
|
||||
template<class...As> static auto dot (As...as){return cublas##UppeR##dotu (as...);} \
|
||||
template<class...As> static auto dotu (As...as){return cublas##UppeR##dotu (as...);} \
|
||||
template<class...As> static auto dotc (As...as){return cublas##UppeR##dotc (as...);} \
|
||||
template<class...As> static auto nrm2 (As...as){return cublas##UppeR##nrm2 (as...);} \
|
||||
/*rot */ \
|
||||
/*rotg */ \
|
||||
/*rotmg*/ \
|
||||
template<class...As> static auto scal (As...as){return cublas##UppeR##scal (as...);} \
|
||||
/*swap */ \
|
||||
}
|
||||
|
||||
DEFINE_CUBLAS1_COMPLEX(C, c, S, s);
|
||||
DEFINE_CUBLAS1_COMPLEX(Z, z, D, d);
|
||||
|
||||
template<class T> struct nrm2_result;//{using type = T;};
|
||||
template<> struct nrm2_result<S>{using type = S;};
|
||||
template<> struct nrm2_result<D>{using type = D;};
|
||||
template<> struct nrm2_result<C>{using type = S;};
|
||||
template<> struct nrm2_result<Z>{using type = D;};
|
||||
|
||||
template<> struct cublas1<void>{
|
||||
// 2.5.1. cublasI<t>amax() https://docs.nvidia.com/cuda/cublas/index.html#cublasi-lt-t-gt-amax
|
||||
template<class T> static cublasStatus_t iamax(cublasHandle_t handle, int n, const T* x, int incx, int *result ){return cublas1<T>::iamax(handle, n, x, incx, result);}
|
||||
// 2.5.3. cublas<t>asum() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-asum
|
||||
template<class T1, class T2> static cublasStatus_t asum (cublasHandle_t handle, int n, T1 const* x, int incx, T2* result ){return cublas1<T1>::asum(handle, n, x, incx, result);}
|
||||
// 2.5.5. cublas<t>copy() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-copy
|
||||
template<class T> static cublasStatus_t copy (cublasHandle_t handle, int n, const T* x, int incx, T* y, int incy){return cublas1<T>::copy(handle, n, x, incx, y, incy);}
|
||||
// 2.5.6. cublas<t>dot() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-dot
|
||||
template<class T> static auto dot(cublasHandle_t handle, int n, const T* x, int incx, const T* y, int incy, T* result)
|
||||
->decltype(cublas1<T>::dot(handle, n, x, incx, y, incy, result)){MULTI_MARK_SCOPE("function dot");
|
||||
return cublas1<T>::dot(handle, n, x, incx, y, incy, result);}
|
||||
template<class T> static auto dotu(cublasHandle_t handle, int n, const T* x, int incx, const T* y, int incy, T* result)
|
||||
->decltype(cublas1<T>::dotu(handle, n, x, incx, y, incy, result)){MULTI_MARK_SCOPE("function dotu");
|
||||
return cublas1<T>::dotu(handle, n, x, incx, y, incy, result);}
|
||||
template<class T> static auto dotc(cublasHandle_t handle, int n, const T* x, int incx, const T* y, int incy, T* result)
|
||||
->decltype(cublas1<T>::dotc(handle, n, x, incx, y, incy, result)){MULTI_MARK_SCOPE("function dotc");
|
||||
return cublas1<T>::dotc(handle, n, x, incx, y, incy, result);}
|
||||
// 2.5.7. cublas<t>nrm2() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-nrm2
|
||||
template<class T> static auto nrm2(cublasHandle_t handle, int n,
|
||||
const T *x, int incx, typename nrm2_result<T>::type *result){return cublas1<T>::nrm2(handle, n, x, incx, result);}
|
||||
// 2.5.12. cublas<t>scal() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-scale
|
||||
template<class T> static cublasStatus_t scal(cublasHandle_t handle, int n,
|
||||
const T *alpha,
|
||||
T *x, int incx){return cublas1<T>::scal(handle, n, alpha, x, incx);}
|
||||
};
|
||||
|
||||
template<> struct cublas2<void>{
|
||||
// 2.6.16. cublas<t>trsv() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-trsv
|
||||
template<class T> static cublasStatus_t trsv(cublasHandle_t handle, cublasFillMode_t uplo,
|
||||
cublasOperation_t trans, cublasDiagType_t diag,
|
||||
int n, const T *A, int lda,
|
||||
T *x, int incx){return cublas2<T>::trsv(handle, uplo, trans, diag, n, A, lda, x, incx);}
|
||||
};
|
||||
|
||||
template<> struct cublas2<S>{template<class...A> static auto trsv(A...a){return cublasStrsv(a...);}};
|
||||
template<> struct cublas2<D>{template<class...A> static auto trsv(A...a){return cublasDtrsv(a...);}};
|
||||
template<> struct cublas2<C>{template<class...A> static auto trsv(A...a){return cublasCtrsv(a...);}};
|
||||
template<> struct cublas2<Z>{template<class...A> static auto trsv(A...a){return cublasZtrsv(a...);}};
|
||||
|
||||
template<> struct cublas3<S>{
|
||||
template<class...As> static auto gemm (As...as){CUBLAS_CALL(cublasSgemm(as...));}
|
||||
template<class...As> static auto syrk (As...as){CUBLAS_CALL(cublasSsyrk(as...));}
|
||||
// template<class...As> static auto herk (As...as){return CUBLAS_CALL(cublasSherk)(as...);}
|
||||
template<class...As> static auto trsm (As...as){CUBLAS_CALL(cublasStrsm(as...));}
|
||||
};
|
||||
template<> struct cublas3<D>{
|
||||
template<class...As> static auto gemm (As...as){ CUBLAS_CALL(cublasDgemm(as...));}
|
||||
template<class...As> static auto syrk (As...as){ CUBLAS_CALL(cublasDsyrk(as...));}
|
||||
// template<class...As> static auto herk (As...as){return cublas_call(cublasDherk)(as...);}
|
||||
template<class...As> static auto trsm (As...as){ CUBLAS_CALL(cublasDtrsm(as...));}
|
||||
};
|
||||
template<> struct cublas3<C>{
|
||||
template<class...As> static auto gemm (As...as){ CUBLAS_CALL(cublasCgemm(as...));}
|
||||
template<class...As> static auto syrk (As...as){ CUBLAS_CALL(cublasCsyrk(as...));}
|
||||
template<class...As> static auto herk (As...as){ CUBLAS_CALL(cublasCherk(as...));}
|
||||
template<class...As> static auto trsm (As...as){ CUBLAS_CALL(cublasCtrsm(as...));}
|
||||
};
|
||||
template<> struct cublas3<Z>{
|
||||
template<class...As> static auto gemm (As...as){ CUBLAS_CALL(cublasZgemm(as...));}
|
||||
template<class...As> static auto syrk (As...as){ CUBLAS_CALL(cublasZsyrk(as...));}
|
||||
template<class...As> static auto herk (As...as){ CUBLAS_CALL(cublasZherk(as...));}
|
||||
template<class...As> static auto trsm (As...as){ CUBLAS_CALL(cublasZtrsm(as...));}
|
||||
};
|
||||
|
||||
template<class T> struct herk_scalar;
|
||||
template<> struct herk_scalar<C>{using type = S;};
|
||||
template<> struct herk_scalar<Z>{using type = D;};
|
||||
|
||||
template<class T> struct asum_scalar;
|
||||
template<> struct asum_scalar<C>{using type = S;};
|
||||
template<> struct asum_scalar<Z>{using type = D;};
|
||||
|
||||
template<class T> using herk_scalar_t = typename herk_scalar<T>::type;
|
||||
|
||||
template<> struct cublas3<void>{
|
||||
// 2.7.1. cublas<t>gemm() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-gemm
|
||||
template<class T> static auto gemm(cublasHandle_t handle,
|
||||
cublasOperation_t transa, cublasOperation_t transb,
|
||||
int m, int n, int k,
|
||||
const T *alpha,
|
||||
const T *A, int lda,
|
||||
const T *B, int ldb,
|
||||
const T *beta,
|
||||
T *C, int ldc){MULTI_MARK_SCOPE("cublas3 gemm"); return cublas3<T>::gemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||
// 2.7.6. cublas<t>syrk() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-syrk
|
||||
template<class T> static auto syrk(cublasHandle_t handle,
|
||||
cublasFillMode_t uplo, cublasOperation_t trans,
|
||||
int n, int k,
|
||||
const T *alpha,
|
||||
const T *A, int lda,
|
||||
const T *beta,
|
||||
T *C, int ldc){return cublas3<T>::syrk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);}
|
||||
// 2.7.13. cublas<t>herk() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-herk
|
||||
template<class T2, class T3> static auto herk(cublasHandle_t handle,
|
||||
cublasFillMode_t uplo, cublasOperation_t trans,
|
||||
int n, int k,
|
||||
const herk_scalar_t<T2> *alpha,
|
||||
const T2 *A, int lda,
|
||||
const herk_scalar_t<T2> *beta,
|
||||
T3 *C, int ldc){return cublas3<T2>::herk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);}
|
||||
// 2.7.10. cublas<t>trsm() https://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-trsm
|
||||
template<class T> static auto trsm(cublasHandle_t handle,
|
||||
cublasSideMode_t side, cublasFillMode_t uplo,
|
||||
cublasOperation_t trans, cublasDiagType_t diag,
|
||||
int m, int n,
|
||||
std::add_const_t<T> *alpha,
|
||||
std::add_const_t<T> *A, int lda,
|
||||
T *B, int ldb){return cublas3<T>::trsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);}
|
||||
};
|
||||
|
||||
namespace cublas{
|
||||
|
||||
template<class T, std::enable_if_t<not std::is_integral<T>{}, int> =0> decltype(auto) translate(T t){return t;}
|
||||
template<class T, std::enable_if_t<not std::is_copy_constructible<std::decay_t<T>>{}, int> =0> T& translate(T& t){return t;}
|
||||
|
||||
auto translate(std::complex<float> const * t){return reinterpret_cast<cublas::complex<float> const*>(t);}
|
||||
auto translate(std::complex<float> * t){return reinterpret_cast<cublas::complex<float> *>(t);}
|
||||
auto translate(std::complex<double> const* t){return reinterpret_cast<cublas::complex<double> const*>(t);}
|
||||
auto translate(std::complex<double> * t){return reinterpret_cast<cublas::complex<double> *>(t);}
|
||||
|
||||
auto translate(thrust::complex<double> const* t){return reinterpret_cast<cublas::complex<double> const*>(t);}
|
||||
auto translate(thrust::complex<double> * t){return reinterpret_cast<cublas::complex<double> *>(t);}
|
||||
|
||||
template<class T> auto translate(memory::cuda::ptr<T> p) DECLRETURN(translate(raw_pointer_cast(p)))
|
||||
template<class T> auto translate(memory::cuda::managed::ptr<T> p) DECLRETURN(translate(raw_pointer_cast(p)))
|
||||
|
||||
//auto translate(context& c){return c;}
|
||||
|
||||
template<class T, std::enable_if_t<std::is_integral<T>{},int> = 0>
|
||||
auto translate(T n){
|
||||
assert(n <= +static_cast<T>(std::numeric_limits<int>::max()));
|
||||
assert(n > -static_cast<T>(std::numeric_limits<int>::max()));
|
||||
return static_cast<T>(n);
|
||||
}
|
||||
|
||||
auto translate(char O)->cublasOperation_t{
|
||||
switch(O){case 'N': return CUBLAS_OP_N; case 'T': return CUBLAS_OP_T; case 'C': return CUBLAS_OP_C;} assert(0);
|
||||
return CUBLAS_OP_N;
|
||||
}
|
||||
|
||||
//struct context : std::unique_ptr<std::decay_t<decltype(*cublasHandle_t{})>, decltype(&cublasDestroy)>{
|
||||
// context() : std::unique_ptr<std::decay_t<decltype(*cublasHandle_t{})>, decltype(&cublasDestroy)>(
|
||||
// []{MULTI_MARK_SCOPE("multi::cublas::create context"); cublasHandle_t h; cublasCreate(&h); return h;}(), &cublasDestroy
|
||||
// ){}
|
||||
// int version() const{
|
||||
// int ret; cublasGetVersion(get(), &ret); return ret;
|
||||
// }
|
||||
// context(context&& other) noexcept = default;
|
||||
// ~context() noexcept = default;
|
||||
//// 2.4.7. cublasGetPointerMode()
|
||||
// auto get_pointer_mode() const{
|
||||
// cublasPointerMode_t ret; cublasGetPointerMode(get(), &ret);
|
||||
// return static_cast<enum pointer_mode>(ret);
|
||||
// }
|
||||
//// 2.4.8. cublasSetPointerMode() https://docs.nvidia.com/cuda/cublas/index.html#cublassetpointermode
|
||||
// context& set_pointer_mode(enum pointer_mode m){
|
||||
// cublasSetPointerMode(get(), static_cast<cublasPointerMode_t>(m)); return *this;
|
||||
// }
|
||||
// //set_stream https://docs.nvidia.com/cuda/cublas/index.html#cublassetstream
|
||||
// //get_stream https://docs.nvidia.com/cuda/cublas/index.html#cublasgetstream
|
||||
// //get_pointer_mode https://docs.nvidia.com/cuda/cublas/index.html#cublasgetpointermode
|
||||
// //set_pointer_mode https://docs.nvidia.com/cuda/cublas/index.html#cublasgetpointermode
|
||||
// template<class...As> auto iamax(As...as) const DECLRETURN(cublas1<>::iamax(get(), translate(as)...))
|
||||
// template<class...As> auto asum (As...as) const DECLRETURN(cublas1<>::asum (get(), translate(as)...))
|
||||
// template<class...As> auto scal (As...as) const DECLRETURN(cublas1<>::scal (get(), translate(as)...))
|
||||
// template<class...As> auto dot (As...as) const DECLRETURN(cublas1<>::dot (get(), translate(as)...))
|
||||
// template<class...As> auto dotu (As...as) const DECLRETURN(cublas1<>::dotu (get(), translate(as)...))
|
||||
// template<class...As> auto dotc (As...as) const DECLRETURN(cublas1<>::dotc (get(), translate(as)...))
|
||||
// template<class S, class Ptr, class T>
|
||||
// auto nrm2(S n, Ptr p, S incx, memory::cuda::ptr<T> result) // no const because the method is not thread safe
|
||||
// ->decltype(cublas1<>::nrm2 (get(), translate(n), translate(p), translate(incx), translate(result))){set_pointer_mode(pointer_mode::device);
|
||||
// auto r=cublas1<>::nrm2 (get(), translate(n), translate(p), translate(incx), translate(result)); set_pointer_mode(pointer_mode::host);
|
||||
// return r;
|
||||
// }
|
||||
// template<class S, class Ptr, class T>
|
||||
// auto nrm2(S n, Ptr p, S incx, T* result) const{
|
||||
// return cublas1<>::nrm2 (get(), translate(n), translate(p), translate(incx), translate(result));
|
||||
// }
|
||||
// template<class...As> auto copy (As...as) const DECLRETURN(cublas1<>::copy (get(), translate(as)...))
|
||||
// template<class...As> auto trsv (As...as) const{return cublas2<>::trsv(get(), translate(as)...);}
|
||||
|
||||
// template<typename... As> auto gemm(As... as) DECLRETURN(cublas3<>::gemm(get(), translate(as)...))
|
||||
|
||||
// template<class...As> auto syrk (As...as) const{return cublas3<>::syrk(get(), translate(as)...);}
|
||||
// template<class...As> auto herk (As...as) const{return cublas3<>::herk(get(), translate(as)...);}
|
||||
// template<class...As> auto trsm (As...as) const{return cublas3<>::trsm(get(), translate(as)...);}
|
||||
//};
|
||||
|
||||
//context* get_default_context(){
|
||||
// thread_local context instance;
|
||||
// return &instance;
|
||||
//}
|
||||
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
|
||||
namespace blas{
|
||||
|
||||
template<> struct is_context<boost::multi::cublas::context> : std::true_type{};
|
||||
template<> struct is_context<boost::multi::cublas::context&&> : std::true_type{};
|
||||
template<> struct is_context<boost::multi::cublas::context&> : std::true_type{};
|
||||
|
||||
template<class T> boost::multi::cublas::context* default_context_of(memory::cuda:: ptr<T> const&){return boost::multi::cublas::get_default_context();}
|
||||
template<class T> boost::multi::cublas::context* default_context_of(memory::cuda::managed::ptr<T> const&){return boost::multi::cublas::get_default_context();}
|
||||
|
||||
//template<class T> boost::multi::cublas::context default_context_of(memory::cuda::managed::ptr<T>){return {};}
|
||||
|
||||
//}
|
||||
|
||||
//namespace memory{namespace cuda{
|
||||
// using boost::multi::blas::default_context_of; // to please nvcc 'default_context_of' should be declared prior to the call site or in namespace 'boost::multi::memory::cuda'
|
||||
//}}
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
|
||||
namespace memory{
|
||||
namespace cuda{
|
||||
|
||||
template<class... As>
|
||||
auto iamax(As... as)
|
||||
->decltype(cublas::context{}.iamax(as..., std::declval<int*>()), int()){
|
||||
int r; cublas::context{}.iamax(as..., &r); return r-1;}
|
||||
|
||||
template<class ComplexTconst, typename S>//, typename T = typename std::decay_t<ComplexTconst>::value_type>
|
||||
auto asum(S n, cuda::ptr<ComplexTconst> x, S incx){
|
||||
decltype(std::abs(ComplexTconst{})) r;
|
||||
cublas::context{}.asum(n, raw_pointer_cast(x), incx, &r);
|
||||
return r;
|
||||
}
|
||||
|
||||
template<class...As> auto copy(As... as) DECLRETURN(cublas::context{}.copy(as...))
|
||||
template<class...As> auto scal(As... as) DECLRETURN(cublas::context{}.scal(as...))
|
||||
//template<class...As> auto dot (As... as) DECLRETURN(cublas::context{}.dot (as...))
|
||||
template<class...As> auto dotu(As... as) DECLRETURN(cublas::context{}.dotu(as...))
|
||||
template<class...As> auto dotc(As... as) DECLRETURN(cublas::context{}.dotc(as...))
|
||||
template<class...As> auto nrm2(As... as) DECLRETURN(cublas::context{}.nrm2(as...))
|
||||
|
||||
template<class S, class Tconst, class T>
|
||||
auto trsv(char ul, char transA, char a_diag, S n, memory::cuda::ptr<Tconst> A, S lda, memory::cuda::ptr<T> X, S ldc){
|
||||
cublasFillMode_t uplo = [ul](){
|
||||
switch(ul){
|
||||
case 'U': return CUBLAS_FILL_MODE_UPPER;
|
||||
case 'L': return CUBLAS_FILL_MODE_LOWER;
|
||||
} assert(0); return CUBLAS_FILL_MODE_UPPER;
|
||||
}();
|
||||
cublasOperation_t cutransA = [transA](){
|
||||
switch(transA){
|
||||
case 'N': return CUBLAS_OP_N;
|
||||
case 'T': return CUBLAS_OP_T;
|
||||
case 'C': return CUBLAS_OP_C;
|
||||
} assert(0); return CUBLAS_OP_N;
|
||||
}();
|
||||
auto cudiag = a_diag=='N'?CUBLAS_DIAG_NON_UNIT:CUBLAS_DIAG_UNIT;
|
||||
return cublas::context{}.trsv(uplo, cutransA, cudiag, n, A, lda, X, ldc);
|
||||
}
|
||||
|
||||
template<class... As>
|
||||
auto gemm(As... as)
|
||||
->decltype(cublas::context{}.gemm(as...)){
|
||||
return cublas::context{}.gemm(as...);}
|
||||
|
||||
template<class Tconst, class T, class UL, class C, class S, class Real>
|
||||
void syrk(UL ul, C transA, S n, S k, Real alpha, multi::memory::cuda::ptr<Tconst> A, S lda, Real beta, multi::memory::cuda::ptr<T> CC, S ldc){
|
||||
cublasFillMode_t uplo = [ul](){
|
||||
switch(ul){
|
||||
case 'U': return CUBLAS_FILL_MODE_UPPER;
|
||||
case 'L': return CUBLAS_FILL_MODE_LOWER;
|
||||
} assert(0); return CUBLAS_FILL_MODE_UPPER;
|
||||
}();
|
||||
cublasOperation_t cutransA = [transA](){
|
||||
switch(transA){
|
||||
case 'N': return CUBLAS_OP_N;
|
||||
case 'T': return CUBLAS_OP_T;
|
||||
case 'C': return CUBLAS_OP_C;
|
||||
} assert(0); return CUBLAS_OP_N;
|
||||
}();
|
||||
return cublas::context{}.syrk(uplo, cutransA, n, k, &alpha, static_cast<T const*>(A), lda, &beta, static_cast<T*>(CC), ldc);
|
||||
}
|
||||
|
||||
template<class Tconst, class T, class UL, class C, class S, class Real>
|
||||
auto herk(UL ul, C transA, S n, S k, Real alpha, memory::cuda::ptr<Tconst> A, S lda, Real beta, memory::cuda::ptr<T> CC, S ldc){
|
||||
cublasFillMode_t uplo = [ul](){
|
||||
switch(ul){
|
||||
case 'U': return CUBLAS_FILL_MODE_UPPER;
|
||||
case 'L': return CUBLAS_FILL_MODE_LOWER;
|
||||
} assert(0); return CUBLAS_FILL_MODE_UPPER;
|
||||
}();
|
||||
cublasOperation_t cutransA = [transA](){
|
||||
switch(transA){
|
||||
case 'N': return CUBLAS_OP_N;
|
||||
case 'T': return CUBLAS_OP_T;
|
||||
case 'C': return CUBLAS_OP_C;
|
||||
} assert(0); return CUBLAS_OP_N;
|
||||
}();
|
||||
return cublas::context{}.herk(uplo, cutransA, n, k, &alpha, raw_pointer_cast(A), lda, &beta, raw_pointer_cast(CC), ldc);
|
||||
}
|
||||
|
||||
template<class Side, class Fill, class Trans, class Diag, typename Size, class Tconst, class T/*, class Alpha*/>
|
||||
auto trsm(Side /*cublasSideMode_t*/ side, /*cublasFillMode_t*/ Fill uplo, /*cublasOperation_t*/ Trans trans, /*cublasDiagType_t*/ Diag diag,
|
||||
Size m, Size n, T alpha, cuda::ptr<Tconst> A, Size lda, cuda::ptr<T> B, Size ldb)
|
||||
->decltype(cublas::context{}.trsm(
|
||||
side=='L'?CUBLAS_SIDE_LEFT:CUBLAS_SIDE_RIGHT, uplo=='L'?CUBLAS_FILL_MODE_LOWER:CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, diag=='N'?CUBLAS_DIAG_NON_UNIT:CUBLAS_DIAG_UNIT, m, n, &alpha, raw_pointer_cast(A), lda, raw_pointer_cast(B), ldb))
|
||||
{
|
||||
cublasOperation_t trans_cu = [&]{
|
||||
switch(trans){
|
||||
case 'N': return CUBLAS_OP_N;
|
||||
case 'T': return CUBLAS_OP_T;
|
||||
case 'C': return CUBLAS_OP_C;
|
||||
} __builtin_unreachable();
|
||||
}();
|
||||
// T alpha_{alpha};
|
||||
return cublas::context{}.trsm(
|
||||
side=='L'?CUBLAS_SIDE_LEFT:CUBLAS_SIDE_RIGHT, uplo=='L'?CUBLAS_FILL_MODE_LOWER:CUBLAS_FILL_MODE_UPPER, trans_cu, diag=='N'?CUBLAS_DIAG_NON_UNIT:CUBLAS_DIAG_UNIT, m, n, &alpha, raw_pointer_cast(A), lda, raw_pointer_cast(B), ldb);
|
||||
}
|
||||
|
||||
}}}}
|
||||
|
||||
namespace boost{namespace multi{namespace memory{namespace cuda{namespace managed{
|
||||
|
||||
using cuda::iamax;
|
||||
using cuda::asum;
|
||||
using cuda::copy;
|
||||
using cuda::scal;
|
||||
//using cuda::dot;
|
||||
using cuda::dotu;
|
||||
using cuda::dotc;
|
||||
using cuda::nrm2;
|
||||
|
||||
template<class S, class Tconst, class T>
|
||||
auto trsv(char ul, char transA, char a_diag, S n, multi::memory::cuda::managed::ptr<Tconst> A, S lda, cuda::managed::ptr<T> X, S ldc){
|
||||
cuda::trsv(ul, transA, a_diag, n, cuda::ptr<Tconst>(A), lda, cuda::ptr<T>(X), ldc);
|
||||
}
|
||||
|
||||
using cuda::gemm;
|
||||
using cuda::syrk;
|
||||
using cuda::herk;
|
||||
|
||||
template<class Side, class Fill, class Trans, class Diag, typename Size, class Tconst, class T>
|
||||
auto trsm(Side /*cublasSideMode_t*/ side, /*cublasFillMode_t*/ Fill uplo, /*cublasOperation_t*/ Trans trans, /*cublasDiagType_t*/ Diag diag,
|
||||
Size m, Size n, T alpha, cuda::managed::ptr<Tconst> A, Size lda, cuda::managed::ptr<T> B, Size ldb){
|
||||
return trsm(side, uplo, trans, diag, m, n, alpha, cuda::ptr<Tconst>(A), lda, cuda::ptr<T>(B), ldb);
|
||||
}
|
||||
|
||||
}}}}}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_CUDA
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../utility.hpp"
|
||||
|
||||
#include "../../adaptors/cuda.hpp"
|
||||
#include "../../adaptors/blas.hpp"
|
||||
#include "../../adaptors/blas/cuda.hpp"
|
||||
|
||||
#include<cassert>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
#if 0
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_version){
|
||||
multi::cublas::context c;
|
||||
BOOST_REQUIRE( c.version() >= 10100 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_iamax){
|
||||
using complex = std::complex<double>;
|
||||
complex const I{0,1};
|
||||
{
|
||||
multi::array<complex, 1> const A = {1. + 2.*I, 2., 3. + 3.*I, 4.};
|
||||
using multi::blas::iamax;
|
||||
BOOST_REQUIRE( iamax(A) == 2 );
|
||||
}
|
||||
{
|
||||
multi::cuda::array<complex, 1> const A = {1. + 2.*I, 2., 3. + 3.*I, 4.};
|
||||
using multi::blas::iamax;
|
||||
BOOST_REQUIRE( iamax(A) == 2 );
|
||||
}
|
||||
{
|
||||
multi::cuda::managed::array<complex, 1> const A = {1. + 2.*I, 2., 3. + 3.*I, 4.};
|
||||
using multi::blas::iamax;
|
||||
BOOST_REQUIRE( iamax(A) == 2 );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template<class T> void what(T&&) = delete;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_dot){
|
||||
using complex = std::complex<double>;
|
||||
complex const I{0,1};
|
||||
multi::array<complex, 1> const A = {1. + 2.*I, 2., 3. + 3.*I, 4.};
|
||||
multi::array<complex, 1> const B = {2. + 3.*I, 4., 5. + 6.*I, 7.};
|
||||
namespace blas = multi::blas;
|
||||
{
|
||||
multi::cuda::array<complex, 1> const A_gpu = A, B_gpu = B;
|
||||
using blas::dot;
|
||||
BOOST_REQUIRE( dot(blas::C(A_gpu), B_gpu) == dot(blas::C(A), B) );
|
||||
}
|
||||
{
|
||||
multi::cuda::managed::array<complex, 1> const A_mng = A, B_mng = B;
|
||||
using blas::dot;
|
||||
BOOST_REQUIRE( dot(blas::C(A_mng), A_mng) == dot(blas::C(A), A) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,167 +0,0 @@
|
|||
#ifdef COMPILATION_INSTRUCTIONS
|
||||
/usr/local/cuda-11.1/bin/nvcc -x cu -std=c++17 -use_fast_math -lpthread -D_REENTRANT -DBOOST_PP_VARIADICS -Xcudafe "--diag_suppress=implicit_return_from_non_void_function" --extended-lambda --expt-relaxed-constexpr $0 -o $0x `pkg-config --cflags --libs cudart-11.0 cublas-11.0 blas` -lboost_unit_test_framework -DBOOST_LOG_DYN_LINK -lboost_log -lboost_thread -lboost_system -lboost_log_setup -lpthread -lboost_timer&&$0x&&rm $0x; exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020-2021
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS gemm"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
#include <boost/timer/timer.hpp>
|
||||
|
||||
//#include"boost/log/trivial.hpp"
|
||||
//#define MULTI_MARK_SCOPE(MsG) BOOST_LOG_TRIVIAL(trace)<<MsG
|
||||
|
||||
//#include "../../../../adaptors/cublas/context.hpp"
|
||||
|
||||
#include "../../../cuda/cublas.hpp"
|
||||
#include "../../../../array.hpp"
|
||||
|
||||
#include "../../../../adaptors/cuda.hpp"
|
||||
#include "../../../../adaptors/blas.hpp"
|
||||
|
||||
#include<random>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_complex_3x2_3x2){
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> const a = {
|
||||
{1. + 2.*I, 5. + 2.*I},
|
||||
{9. - 1.*I, 9. + 1.*I},
|
||||
{1. + 1.*I, 2. + 2.*I}
|
||||
};
|
||||
multi::array<complex, 2> const b = {
|
||||
{ 11. - 2.*I, 5. + 2.*I},
|
||||
{ 7. - 3.*I, 2. + 1.*I},
|
||||
{ 8. - 1.*I, 1. + 1.*I}
|
||||
};
|
||||
{
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2});
|
||||
c = blas::gemm(1., blas::H(a), b); // c=ab, c⸆=b⸆a⸆
|
||||
BOOST_REQUIRE( c[1][0] == 125.-84.*I );
|
||||
}
|
||||
}
|
||||
{
|
||||
multi::cuda::array<complex, 2> const a_gpu = a;
|
||||
multi::cuda::array<complex, 2> const b_gpu = b;
|
||||
{
|
||||
multi::cuda::array<complex, 2> c_gpu({2, 2});
|
||||
c_gpu = blas::gemm(1., blas::H(a_gpu), b_gpu); // c=ab, c⸆=b⸆a⸆
|
||||
BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I );
|
||||
}
|
||||
{
|
||||
auto c_gpu =+ blas::gemm(1.0, blas::H(a_gpu), b_gpu);
|
||||
BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I );
|
||||
}
|
||||
}
|
||||
{
|
||||
multi::cuda::managed::array<complex, 2> const a_gpu = a;
|
||||
multi::cuda::managed::array<complex, 2> const b_gpu = b;
|
||||
{
|
||||
multi::cuda::managed::array<complex, 2> c_gpu({2, 2});
|
||||
blas::gemm(1., blas::H(a_gpu), b_gpu, 0., c_gpu); // c=ab, c⸆=b⸆a⸆
|
||||
BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I );
|
||||
}
|
||||
{
|
||||
auto c_gpu =+ blas::gemm(1.0, blas::H(a_gpu), b_gpu);
|
||||
BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_complex_3x2_3x2_with_context){
|
||||
// using complex = std::complex<double>; complex const I{0, 1};
|
||||
// namespace blas = multi::blas;
|
||||
// multi::array<complex, 2> const a = {
|
||||
// {1. + 2.*I, 5. + 2.*I},
|
||||
// {9. - 1.*I, 9. + 1.*I},
|
||||
// {1. + 1.*I, 2. + 2.*I}
|
||||
// };
|
||||
// multi::array<complex, 2> const b = {
|
||||
// { 11. - 2.*I, 5. + 2.*I},
|
||||
// { 7. - 3.*I, 2. + 1.*I},
|
||||
// { 8. - 1.*I, 1. + 1.*I}
|
||||
// };
|
||||
// {
|
||||
// {
|
||||
// multi::blas::context ctx;
|
||||
// multi::array<complex, 2> c({2, 2});
|
||||
// blas::gemm(ctx, 1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆
|
||||
// BOOST_REQUIRE( c[1][0] == 125.-84.*I );
|
||||
// }
|
||||
// }
|
||||
// {
|
||||
// multi::cublas::context ctx;
|
||||
// multi::cuda::array<complex, 2> const a_gpu = a;
|
||||
// multi::cuda::array<complex, 2> const b_gpu = b;
|
||||
// {
|
||||
// multi::cuda::array<complex, 2> c_gpu({2, 2});
|
||||
// blas::gemm(ctx, 1., blas::H(a_gpu), b_gpu, 0., c_gpu); // c=ab, c⸆=b⸆a⸆
|
||||
// BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I );
|
||||
// }
|
||||
// {
|
||||
// auto c_gpu =+ blas::gemm(&ctx, blas::H(a_gpu), b_gpu);
|
||||
// BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I );
|
||||
// }
|
||||
// }
|
||||
// {
|
||||
// multi::cublas::context ctx;
|
||||
// multi::cuda::managed::array<complex, 2> const a_gpu = a;
|
||||
// multi::cuda::managed::array<complex, 2> const b_gpu = b;
|
||||
// {
|
||||
// multi::cuda::managed::array<complex, 2> c_gpu({2, 2});
|
||||
// blas::gemm(ctx, 1., blas::H(a_gpu), b_gpu, 0., c_gpu); // c=ab, c⸆=b⸆a⸆
|
||||
// BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I );
|
||||
// }
|
||||
// {
|
||||
// auto c_gpu =+ blas::gemm(&ctx, blas::H(a_gpu), b_gpu);
|
||||
// BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I );
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
#if 0
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_context_timing){
|
||||
using complex = std::complex<double>;//complex const I{0, 1};
|
||||
|
||||
multi::array<complex, 2> A({1000, 1000});
|
||||
multi::array<complex, 2> B( {1000, 1000});
|
||||
multi::array<complex, 2> C({size(A), size(~B)});
|
||||
A[99][99] = B[11][22] = C[33][44] = 1.0;
|
||||
std::cerr<< "memory " << (A.num_elements()+ B.num_elements() + C.num_elements())*sizeof(complex)/1e6 <<" MB"<<std::endl;
|
||||
|
||||
{
|
||||
auto rand = [d=std::uniform_real_distribution<>{0., 10.}, g=std::mt19937{}]() mutable{return complex{d(g), d(g)};};
|
||||
std::generate(A.elements().begin(), A.elements().end(), rand);
|
||||
std::generate(B.elements().begin(), B.elements().end(), rand);
|
||||
}
|
||||
namespace blas = multi::blas;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t; // 2.398206s
|
||||
for(auto i = 0; i != 10; ++i){
|
||||
blas::context ctx;
|
||||
blas::gemm(ctx, 1, A, B, 0, C);
|
||||
}
|
||||
}
|
||||
using device_array = multi::cuda::array<complex, 2>;
|
||||
{
|
||||
device_array A_gpu = A, B_gpu = B, C_gpu({size(A), size(~B)});
|
||||
|
||||
boost::timer::auto_cpu_timer t; // 0.707426s
|
||||
for(auto i = 0; i != 10; ++i){
|
||||
multi::cublas::context ctx;
|
||||
blas::gemm(ctx, 1, A_gpu, B_gpu, 0, C_gpu);
|
||||
}
|
||||
}
|
||||
{
|
||||
device_array A_gpu = A, B_gpu = B, C_gpu({size(A), size(~B)});
|
||||
|
||||
boost::timer::auto_cpu_timer t; // 0.613534s
|
||||
multi::cublas::context ctx;
|
||||
for(auto i = 0; i != 10; ++i) blas::gemm(ctx, 1, A_gpu, B_gpu, 0, C_gpu);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
#ifdef COMPILATION_INSTRUCTIONS
|
||||
$CXXX $CXXFLAGS $0 -o $0x `pkg-config --libs blas` -Wno-deprecated-declarations `pkg-config --cflags --libs cudart-11.0 cublas-11.0 blas` -lboost_unit_test_framework&&$0x&&rm $0x; exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS iamax"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../../../adaptors/blas.hpp"
|
||||
#include "../../../../adaptors/cuda.hpp"
|
||||
#include "../../../../adaptors/blas/cuda.hpp"
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_iamax){
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
{
|
||||
multi::array<complex, 1> const A = {1. + 2.*I, 2., 3. + 3.*I, 4.};
|
||||
using multi::blas::iamax;
|
||||
BOOST_REQUIRE( iamax(A) == 2 );
|
||||
}
|
||||
{
|
||||
multi::cuda::array<complex, 1> const A = {1. + 2.*I, 2., 3. + 3.*I, 4.};
|
||||
using multi::blas::iamax;
|
||||
BOOST_REQUIRE( iamax(A) == 2 );
|
||||
}
|
||||
{
|
||||
multi::cuda::managed::array<complex, 1> const A = {1. + 2.*I, 2., 3. + 3.*I, 4.};
|
||||
using multi::blas::iamax;
|
||||
BOOST_REQUIRE( iamax(A) == 2 );
|
||||
}
|
||||
}
|
||||
|
|
@ -1,136 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2019-2021
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_DOT_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_DOT_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
#include "../blas/numeric.hpp" // is_complex
|
||||
#include "../blas/operations.hpp" // blas::C
|
||||
|
||||
namespace boost{
|
||||
namespace multi::blas{
|
||||
|
||||
using core::dot ;
|
||||
using core::dotu;
|
||||
using core::dotc;
|
||||
|
||||
template<class Context, class XIt, class Size, class YIt, class RPtr>
|
||||
auto dot_n(Context&& ctxt, XIt x_first, Size count, YIt y_first, RPtr rp){
|
||||
if constexpr(is_complex<typename XIt::value_type>{}){
|
||||
;;;; if constexpr (!is_conjugated<XIt>{} and !is_conjugated<YIt>{}) std::forward<Context>(ctxt)->dotu(count, base(x_first) , stride(x_first), base(y_first), stride(y_first), rp);
|
||||
else if constexpr (!is_conjugated<XIt>{} and is_conjugated<YIt>{}) std::forward<Context>(ctxt)->dotc(count, underlying(base(y_first)), stride(y_first), base(x_first), stride(x_first), rp);
|
||||
else if constexpr ( is_conjugated<XIt>{} and !is_conjugated<YIt>{}) std::forward<Context>(ctxt)->dotc(count, underlying(base(x_first)), stride(x_first), base(y_first), stride(y_first), rp);
|
||||
else if constexpr ( is_conjugated<XIt>{} and is_conjugated<YIt>{}) static_assert(!sizeof(XIt*), "not implemented in blas");
|
||||
}else{
|
||||
std::forward<Context>(ctxt)->dot (count, base(x_first) , stride(x_first), base(y_first), stride(y_first), rp);
|
||||
}
|
||||
struct{XIt x_last; YIt y_last;} ret{x_first + count, y_first + count};
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class XIt, class Size, class YIt, class RPtr>
|
||||
auto dot_n(XIt x_first, Size count, YIt y_first, RPtr rp){//->decltype(dot_n(blas::context{}, x_first, count, y_first, rp)){
|
||||
if constexpr(is_conjugated<XIt>{}){
|
||||
auto ctxtp = blas::default_context_of(underlying(x_first.base()));
|
||||
return dot_n(ctxtp, x_first, count, y_first, rp);
|
||||
}else{
|
||||
auto ctxtp = blas::default_context_of(x_first.base());
|
||||
return dot_n(ctxtp, x_first, count, y_first, rp);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Context, class X1D, class Y1D, class R>
|
||||
R&& dot(Context&& ctxt, X1D const& x, Y1D const& y, R&& r){
|
||||
assert( size(x) == size(y) );
|
||||
return blas::dot_n(std::forward<Context>(ctxt), begin(x), size(x), begin(y), &r), std::forward<R>(r);
|
||||
}
|
||||
|
||||
template<class X1D, class Y1D, class R>
|
||||
R&& dot(X1D const& x, Y1D const& y, R&& r){
|
||||
assert( size(x) == size(y) );
|
||||
if constexpr(is_conjugated<X1D>{}){
|
||||
auto ctxtp = blas::default_context_of(underlying(x.base()));
|
||||
return blas::dot(ctxtp, x, y, r);
|
||||
}else{
|
||||
auto ctxtp = blas::default_context_of(x.base());
|
||||
return blas::dot(ctxtp, x, y, r);
|
||||
}
|
||||
}
|
||||
|
||||
template<class ContextPtr, class ItX, class Size, class ItY>
|
||||
class dot_ptr{
|
||||
ContextPtr ctxt_;
|
||||
ItX x_first_;
|
||||
Size count_;
|
||||
ItY y_first_;
|
||||
protected:
|
||||
dot_ptr(ContextPtr ctxt, ItX x_first, Size count, ItY y_first) : ctxt_{ctxt}, x_first_{x_first}, count_{count}, y_first_{y_first}{}
|
||||
public:
|
||||
dot_ptr(dot_ptr const&) = default;
|
||||
template<class ItOut, class Size2>
|
||||
friend constexpr auto copy_n(dot_ptr first, [[maybe_unused]] Size2 count, ItOut d_first)
|
||||
->decltype(blas::dot_n(std::declval<ContextPtr>(), std::declval<ItX>(), Size{} , std::declval<ItY>(), d_first), d_first + count){assert(count == 1);
|
||||
return blas::dot_n(first.ctxt_ , first.x_first_ , first.count_, first.y_first_ , d_first), d_first + count;}
|
||||
|
||||
template<class ItOut, class Size2>
|
||||
friend constexpr auto uninitialized_copy_n(dot_ptr first, Size2 count, ItOut d_first)
|
||||
->decltype(blas::dot_n(std::declval<ContextPtr>(), std::declval<ItX>(), Size{} , std::declval<ItY>(), d_first), d_first + count){assert(count == 1);
|
||||
return blas::dot_n(first.ctxt_ , first.x_first_ , first.count_, first.y_first_ , d_first), d_first + count;}
|
||||
// ->decltype(copy_n(first, count, d_first)){ // nvcc is not detecting friend copy_n
|
||||
// return copy_n(first, count, d_first);}
|
||||
};
|
||||
|
||||
template<class ContextPtr, class X, class Y, class Ptr = dot_ptr<ContextPtr, typename X::const_iterator, typename X::size_type, typename Y::const_iterator>>
|
||||
struct dot_ref : private Ptr{
|
||||
dot_ref(dot_ref const&) = delete;
|
||||
using decay_type = decltype(typename X::value_type{}*typename Y::value_type{});
|
||||
dot_ref(ContextPtr ctxt, X const& x, Y const& y) : Ptr{ctxt, begin(x), size(x), begin(y)}{assert(size(x)==size(y));}
|
||||
constexpr Ptr const& operator&() const&{return *this;}
|
||||
decay_type decay() const{decay_type r; copy_n(operator&(), 1, &r); return r;}
|
||||
operator decay_type() const&{return decay();}
|
||||
#if not defined(__CUDACC__) or not defined(__INTEL_COMPILER)
|
||||
friend auto operator*(decay_type const& lhs, dot_ref const& self){return lhs*self.decay();}
|
||||
#endif
|
||||
decay_type operator+() const{return decay();}
|
||||
bool operator==(dot_ref const& other) const{return decay() == other.decay();}
|
||||
bool operator!=(dot_ref const& other) const{return decay() != other.decay();}
|
||||
template<class Other>
|
||||
auto operator==(Other const& other) const
|
||||
->decltype(decay()==other){
|
||||
return decay()==other;}
|
||||
template<class Other>
|
||||
auto operator!=(Other const& other) const
|
||||
->decltype(decay()!=other){
|
||||
return decay()!=other;}
|
||||
};
|
||||
|
||||
template<class Context, class X, class Y> [[nodiscard]]
|
||||
dot_ref<Context, X, Y> dot(Context const& ctxt, X const& x, Y const& y){return {ctxt, x, y};}
|
||||
|
||||
//template<class X, class Y> [[nodiscard]]
|
||||
//dot_ref<blas::context, X, Y> dot(X const& x, Y const& y){return {blas::context{}, x, y};}
|
||||
|
||||
template<class X, class Y> [[nodiscard]]
|
||||
auto dot(X const& x, Y const& y){
|
||||
if constexpr(is_conjugated<X>{}){
|
||||
auto ctxtp = blas::default_context_of(underlying(x.base()));
|
||||
return blas::dot(ctxtp, x, y);
|
||||
}else{
|
||||
auto ctxtp = blas::default_context_of(x.base());
|
||||
return blas::dot(ctxtp, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
namespace operators{
|
||||
template<class X1D, class Y1D> [[nodiscard]]
|
||||
auto operator,(X1D const& x, Y1D const& y)
|
||||
->decltype(dot(x, y)){
|
||||
return dot(x, y);}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_FILLING_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_FILLING_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
#include "../blas/operations.hpp"
|
||||
#include "../../array_ref.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
|
||||
enum class filling : char{
|
||||
lower = 'U',
|
||||
upper = 'L'
|
||||
};
|
||||
|
||||
MAYBE_UNUSED static constexpr filling U = filling::upper;
|
||||
MAYBE_UNUSED static constexpr filling L = filling::lower;
|
||||
|
||||
filling flip(filling side){
|
||||
switch(side){
|
||||
case filling::lower: return filling::upper;
|
||||
case filling::upper: return filling::lower;
|
||||
} __builtin_unreachable();
|
||||
}
|
||||
|
||||
filling operator-(filling side){return flip(side);}
|
||||
filling operator+(filling side){return side;}
|
||||
|
||||
template<class A2D, std::enable_if_t<is_conjugated<A2D>{}, int> =0>
|
||||
filling detect_triangular_aux(A2D const& A, std::false_type){
|
||||
{
|
||||
for(auto i = size(A); i != 0; --i){
|
||||
auto const asum_up = blas::asum(begin(A[i-1])+i, end(A[i-1]));
|
||||
if(asum_up!=asum_up) return filling::lower;
|
||||
else if(asum_up!=0.) return filling::upper;
|
||||
|
||||
auto const asum_lo = blas::asum(begin(rotated(A)[i-1])+i, end(rotated(A)[i-1]));
|
||||
if(asum_lo!=asum_lo) return filling::upper;
|
||||
else if(asum_lo!=0.) return filling::lower;
|
||||
}
|
||||
}
|
||||
return filling::lower;
|
||||
}
|
||||
|
||||
template<class A2D>
|
||||
filling detect_triangular(A2D const& A);
|
||||
|
||||
template<class A2D, std::enable_if_t<is_conjugated<A2D>{}, int> =0>
|
||||
filling detect_triangular_aux(A2D const& A){
|
||||
return flip(detect_triangular(hermitized(A)));
|
||||
}
|
||||
|
||||
template<class A2D>
|
||||
filling detect_triangular(A2D const& A){
|
||||
#if defined(__cpp_if_constexpr)
|
||||
if constexpr(not is_conjugated<A2D>{}){
|
||||
using blas::asum;
|
||||
for(auto i = size(A); i != 0; --i){
|
||||
auto const asum_up = asum(A[i-1]({i, A[i-1].size()}));
|
||||
if(asum_up!=asum_up) return filling::lower;
|
||||
else if(asum_up!=0.) return filling::upper;
|
||||
|
||||
auto const asum_lo = asum(rotated(A)[i-1]({i, rotated(A)[i-1].size()}));
|
||||
if(asum_lo!=asum_lo) return filling::upper;
|
||||
else if(asum_lo!=0.) return filling::lower;
|
||||
}
|
||||
}else{
|
||||
return flip(detect_triangular(hermitized(A)));
|
||||
}
|
||||
return filling::lower;
|
||||
#else
|
||||
return detect_triangular_aux(A);//, is_conjugated<A2D>{});//std::integral_constant<bool, not is_hermitized<A2D>()>{});
|
||||
#endif
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
}
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_FILLING
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi adaptors side"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../utility.hpp"
|
||||
#include "../blas/nrm2.hpp"
|
||||
|
||||
#include<complex>
|
||||
#include<cassert>
|
||||
#include<iostream>
|
||||
#include<numeric>
|
||||
#include<algorithm>
|
||||
|
||||
using std::cout;
|
||||
|
||||
template<class M>
|
||||
decltype(auto) print(M const& C){
|
||||
using boost::multi::size;
|
||||
for(int i = 0; i != size(C); ++i){
|
||||
for(int j = 0; j != size(C[i]); ++j) cout<< C[i][j] <<' ';
|
||||
cout<<std::endl;
|
||||
}
|
||||
return cout<<"---"<<std::endl;
|
||||
}
|
||||
|
||||
namespace multi = boost::multi;
|
||||
using complex = std::complex<double>;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_side){
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,259 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2019-2021
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_GEMM_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_GEMM_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
#include "../blas/gemv.hpp"
|
||||
#include "../blas/numeric.hpp"
|
||||
#include "../blas/operations.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
|
||||
using core::gemm;
|
||||
|
||||
template<class It>
|
||||
auto xbase_aux(It const& it, std::true_type const&)
|
||||
->decltype(underlying(base(it))){
|
||||
return underlying(base(it));}
|
||||
|
||||
template<class It>
|
||||
auto xbase_aux(It const& it, std::false_type const&)
|
||||
->decltype(base(it)){
|
||||
return base(it);}
|
||||
|
||||
template<class It>
|
||||
auto xbase(It const& it)
|
||||
->decltype(xbase_aux(it, std::integral_constant<bool, is_conjugated<It>{}>{})){
|
||||
return xbase_aux(it, std::integral_constant<bool, is_conjugated<It>{}>{});}
|
||||
|
||||
template<class Context, class It2DA, class Size, class It2DB, class It2DC>
|
||||
auto gemm_n(Context&& ctxt, typename It2DA::element alpha, It2DA a_first, Size a_count, It2DB b_first, typename It2DA::element beta, It2DC c_first)
|
||||
//->decltype(std::forward<Context>(ctxt).gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, xbase(b_first), b_first->size() , xbase(a_first), a_first->size(), &beta, c_first.base(), c_first->size() ), It2DC{})
|
||||
try{
|
||||
assert( b_first->size() == c_first->size() );
|
||||
assert( a_first.stride()==1 or a_first->stride()==1 );
|
||||
assert( b_first.stride()==1 or b_first->stride()==1 );
|
||||
assert( c_first.stride()==1 or c_first->stride()==1 );
|
||||
|
||||
if(a_count != 0){
|
||||
#define CTXT std::forward<Context>(ctxt)
|
||||
;;;;; if constexpr(!is_conjugated<It2DA>{} and !is_conjugated<It2DB>{}){
|
||||
;;;;; if(a_first->stride()==1 and b_first->stride()==1 and c_first->stride()==1){
|
||||
;;;; if( a_count==1 and b_first->size()==1 ){CTXT.gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->size() , base(a_first), a_first->size() , &beta, base(c_first), c_first->size() );}
|
||||
else if( a_count==1 ){CTXT.gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first->size() );}
|
||||
else {CTXT.gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first. stride());}
|
||||
}else if(a_first->stride()==1 and b_first->stride()==1 and c_first. stride()==1){
|
||||
if (a_count==1) {CTXT.gemm('T', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first. stride(), base(b_first), b_first->size() , &beta, base(c_first), a_first->size() );}
|
||||
else {CTXT.gemm('T', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first. stride(), base(b_first), b_first.stride(), &beta, base(c_first), c_first->stride());}
|
||||
}else if(a_first. stride()==1 and b_first->stride()==1 and c_first->stride()==1){
|
||||
if (a_count==1) {CTXT.gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first->stride(), &beta, base(c_first), a_first->size() );}
|
||||
else {CTXT.gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first.stride());}
|
||||
}else if(a_first. stride()==1 and b_first->stride()==1 and c_first. stride()==1){
|
||||
if (a_count==1) {CTXT.gemm('N', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), a_first->size() , &beta, base(c_first), b_first->size() );}
|
||||
else {CTXT.gemm('N', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), b_first. stride(), &beta, base(c_first), c_first->stride());}
|
||||
}else if(a_first->stride()==1 and b_first.stride()==1 and c_first. stride()==1){
|
||||
;;;; if(a_count==1 and b_first->size()){CTXT.gemm('N', 'N', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->size() , base(a_first), a_first->size() , &beta, base(c_first), c_first->stride());}
|
||||
else if(a_count==1) {CTXT.gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first->stride());}
|
||||
else {CTXT.gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first.stride() , &beta, base(c_first), c_first->stride());}
|
||||
}else if(a_first->stride()==1 and b_first. stride()==1 and c_first->stride()==1){
|
||||
if (a_count==1) {CTXT.gemm('T', 'N', a_count, c_first->size(), a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first.stride());}
|
||||
else {CTXT.gemm('T', 'N', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first.stride(), &beta, base(c_first), c_first.stride());}
|
||||
}else if(a_first. stride()==1 and b_first.stride( )==1 and c_first. stride()==1){
|
||||
{CTXT.gemm('N', 'N', c_first->size(), a_count, a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), b_first->stride(), &beta, base(c_first), c_first->stride());}
|
||||
}else if(a_first. stride()==1 and b_first.stride( )==1 and c_first->stride()==1){
|
||||
{CTXT.gemm('T', 'T', a_count, c_first->size(), a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first. stride());}
|
||||
}else assert(0);
|
||||
}else if constexpr(!is_conjugated<It2DA>{} and is_conjugated<It2DB>{}){
|
||||
;;;;; if(a_first->stride()==1 and b_first->stride()==1 and c_first->stride()==1){
|
||||
if(b_first->size()==1) {CTXT.gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first.stride());}
|
||||
else {CTXT.gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first.stride());}
|
||||
}else if(a_first->stride()==1 and b_first. stride()==1 and c_first->stride()==1){
|
||||
if (a_count==1) {CTXT.gemm('C', 'N', a_count, c_first->size(), a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first.stride());}
|
||||
else {CTXT.gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first.stride(), &beta, base(c_first), c_first.stride());}
|
||||
}else if(a_first->stride()==1 and b_first. stride()==1 and c_first. stride()==1){
|
||||
{CTXT.gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first->stride());}
|
||||
}else if(a_first. stride()==1 and b_first. stride()==1 and c_first. stride()==1){
|
||||
{CTXT.gemm('C', 'T', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first->stride());}
|
||||
}else if(a_first. stride()==1 and b_first. stride()==1 and c_first->stride()==1){
|
||||
{CTXT.gemm('C', 'T', a_count, c_first->size(), a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first. stride());}
|
||||
}else assert(0);
|
||||
}else if constexpr( is_conjugated<It2DA>{} and !is_conjugated<It2DB>{}){
|
||||
;;;;; if(a_first. stride()==1 and b_first->stride()==1 and c_first->stride()==1){
|
||||
if (a_count==1) {CTXT.gemm('N', 'C', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), underlying(base(a_first)), a_first->stride(), &beta, base(c_first), a_first->size() );}
|
||||
else {CTXT.gemm('N', 'C', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), underlying(base(a_first)), a_first->stride(), &beta, base(c_first), c_first.stride());}
|
||||
}else assert(0);
|
||||
}else if constexpr( is_conjugated<It2DA>{} and is_conjugated<It2DB>{}){
|
||||
;;;;; if(a_first. stride()==1 and b_first. stride()==1 and c_first->stride()==1){
|
||||
{CTXT.gemm('C', 'C', a_count, c_first->size(), a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), underlying(base(a_first)), a_first->stride(), &beta, base(c_first), c_first. stride());}
|
||||
}else assert(0);
|
||||
}
|
||||
#undef CTXT
|
||||
}
|
||||
return c_first + a_count;
|
||||
}catch(std::logic_error& e){
|
||||
using std::to_string;
|
||||
throw std::logic_error{
|
||||
"couldn't do "+std::string(__PRETTY_FUNCTION__)+" of layout a_count="+std::to_string(a_count)
|
||||
+" a_strides="+to_string(a_first.stride())+","+to_string(a_first->stride())+" a->size="+to_string(a_first->size())
|
||||
+" b_strides="+to_string(b_first.stride())+","+to_string(b_first->stride())+" b->size="+to_string(b_first->size())
|
||||
+" c_strides="+to_string(c_first.stride())+","+to_string(c_first->stride())+" c->size="+to_string(c_first->size())
|
||||
+" because " + e.what()
|
||||
};
|
||||
}
|
||||
|
||||
template<class It2DA, class Size, class It2DB, class It2DC, class Context = blas::context> // TODO automatic deduction of context
|
||||
auto gemm_n(typename It2DA::element alpha, It2DA a_first, Size a_count, It2DB b_first, typename It2DA::element beta, It2DC c_first)
|
||||
->decltype(gemm_n(Context{}, alpha, a_first, a_count, b_first, beta, c_first)){
|
||||
return gemm_n(Context{}, alpha, a_first, a_count, b_first, beta, c_first);}
|
||||
|
||||
template<class Context, class A, class B, class C>
|
||||
C&& gemm(Context&& ctx, typename A::element alpha, A const& a, B const& b, typename A::element beta, C&& c){
|
||||
assert( size( a) == size( c) );
|
||||
if(not a.is_empty()) assert( size(~a) == size( b) );
|
||||
if constexpr(is_conjugated<C>{}){blas::gemm (std::forward<Context>(ctx), conj(alpha), conj(a), conj(b) , conj(beta), conj(c) );}
|
||||
else {blas::gemm_n(std::forward<Context>(ctx), alpha , begin(a), size(a), begin(b), beta , begin(c));}
|
||||
return std::forward<C>(c);
|
||||
}
|
||||
|
||||
template<class A, class B, class C>
|
||||
C&& gemm(typename A::element alpha, A const& a, B const& b, typename A::element beta, C&& c){
|
||||
return gemm(blas::context{}, alpha, a, b, beta, std::forward<C>(c));
|
||||
}
|
||||
|
||||
template<class ContextPtr, class Scalar, class ItA, class ItB, class DecayType>
|
||||
class gemm_range;
|
||||
|
||||
template<class Ext>
|
||||
struct gemm_reference{ // TODO implement this in terms of gemv_range
|
||||
Ext x;
|
||||
Ext const& extensions() const{return x;}
|
||||
friend Ext const& extensions(gemm_reference const& self){return self.extensions();}
|
||||
};
|
||||
|
||||
template<class ContextPtr, class Scalar, class ItA, class ItB>
|
||||
class gemm_iterator{
|
||||
ContextPtr ctxtp_;
|
||||
Scalar s_;
|
||||
ItA a_it_;
|
||||
ItB b_begin_;
|
||||
gemm_iterator(ContextPtr ctxtp, Scalar s, ItA a_it, ItB b_begin) : ctxtp_{ctxtp}, s_{s}, a_it_{a_it}, b_begin_{b_begin}{}
|
||||
template<class ContextPtr2, class Scalar2, class ItA2, class ItB2, class DecayType2>
|
||||
friend class gemm_range;
|
||||
public:
|
||||
gemm_iterator(gemm_iterator const&) = default;
|
||||
using difference_type = typename std::iterator_traits<ItA>::difference_type;
|
||||
using value_type = typename std::iterator_traits<ItA>::value_type;
|
||||
using pointer = void*;
|
||||
using reference = gemm_reference<decltype(b_begin_->extensions())>;
|
||||
using iterator_category = std::random_access_iterator_tag; // using iterator_category = std::input_iterator_tag;
|
||||
|
||||
static_assert( std::is_base_of<std::random_access_iterator_tag, typename std::iterator_traits<gemm_iterator>::iterator_category>{} );
|
||||
|
||||
gemm_iterator& operator+=(difference_type n){a_it_ += n; return *this;}
|
||||
gemm_iterator& operator-=(difference_type n){a_it_ -= n; return *this;}
|
||||
|
||||
gemm_iterator& operator++(){return operator+=(1);} // required by random access concept requires even if not used explicitly
|
||||
gemm_iterator& operator--(){return operator-=(1);}
|
||||
|
||||
auto operator+(difference_type n) const{gemm_iterator ret{*this}; ret+=n; return ret;}
|
||||
|
||||
friend difference_type operator-(gemm_iterator const& a, gemm_iterator const& b){assert(a.b_begin_ == b.b_begin_);
|
||||
return a.a_it_ - b.a_it_;
|
||||
}
|
||||
friend bool operator==(gemm_iterator const& a, gemm_iterator const& b){return a.a_it_ == b.a_it_;}
|
||||
friend bool operator!=(gemm_iterator const& a, gemm_iterator const& b){return a.a_it_ != b.a_it_;}
|
||||
|
||||
template<class ItOut>
|
||||
friend auto copy_n(gemm_iterator const& first, difference_type count, ItOut d_first)
|
||||
->decltype(blas::gemm_n(*std::declval<ContextPtr>(), std::declval<Scalar>(), std::declval<ItA>(), count, std::declval<ItB>(), 0., d_first)) try{
|
||||
return blas::gemm_n(*first.ctxtp_ , first.s_ , first.a_it_ , count, first.b_begin_ , 0., d_first);
|
||||
}catch(std::exception const& e){
|
||||
throw std::logic_error(
|
||||
"in " + std::string(__PRETTY_FUNCTION__) + "\nCouldn't decay product of arrays of size " + std::to_string(count) +"x"+ std::to_string(first.a_it_->size()) + " and " +
|
||||
std::to_string(first.a_it_->size())+ "x" +std::to_string(first.b_begin_->size()) + " into " + std::to_string(count) +"x" + std::to_string(first.b_begin_->size()) +
|
||||
"\nbecause\n"+e.what()
|
||||
);
|
||||
}
|
||||
|
||||
template<class ItOut>
|
||||
friend auto copy(gemm_iterator const& first, gemm_iterator const& last, ItOut d_first){assert(first.s_ == last.s_);
|
||||
return copy_n(first, last - first, d_first);
|
||||
}
|
||||
|
||||
template<class ItOut>
|
||||
friend auto uninitialized_copy_n(gemm_iterator const& first, difference_type count, ItOut d_first){
|
||||
return copy_n(first, count, d_first);
|
||||
}
|
||||
|
||||
template<class ItOut>
|
||||
friend auto uninitialized_copy(gemm_iterator const& first, gemm_iterator const& last, ItOut d_first){assert( first.s_ == last.s_ );
|
||||
return uninitialized_copy_n(first, last - first, d_first);}
|
||||
|
||||
reference operator*() const{return {b_begin_->extensions()};}
|
||||
};
|
||||
|
||||
template<class ContextPtr, class Scalar, class ItA, class ItB, class DecayType>
|
||||
class gemm_range{
|
||||
ContextPtr ctxtp_;
|
||||
Scalar s_;
|
||||
ItA a_begin_;
|
||||
ItA a_end_;
|
||||
ItB b_begin_;
|
||||
public:
|
||||
gemm_range(gemm_range const&) = delete;
|
||||
gemm_range(ContextPtr ctxtp, Scalar s, ItA a_first, ItA a_last, ItB b_first) : ctxtp_{ctxtp}, s_{s}, a_begin_{a_first}, a_end_{a_last}, b_begin_{b_first}{}
|
||||
using iterator = gemm_iterator<ContextPtr, Scalar, ItA, ItB>;
|
||||
using decay_type = DecayType;
|
||||
using size_type = typename decay_type::size_type;
|
||||
iterator begin() const{return {ctxtp_, s_, a_begin_, b_begin_};}
|
||||
iterator end() const{return {ctxtp_, s_, a_end_ , b_begin_};}
|
||||
friend auto begin(gemm_range const& self){return self.begin();}
|
||||
friend auto end (gemm_range const& self){return self.end ();}
|
||||
size_type size() const{return a_end_ - a_begin_;}
|
||||
typename decay_type::extensions_type extensions() const{return size()*b_begin_->extensions();}
|
||||
friend auto extensions(gemm_range const& self){return self.extensions();}
|
||||
// operator decay_type() const{return decay_type(*this);} // do not use curly { }
|
||||
decay_type operator+() const{return *this;}
|
||||
template<class Arr>
|
||||
friend Arr&& operator+=(Arr&& a, gemm_range const& gr){
|
||||
blas::gemm_n(*gr.ctxtp_, gr.s_, gr.a_begin_, gr.a_end_ - gr.a_begin_, gr.b_begin_, 1., a.begin());
|
||||
return std::forward<Arr>(a);
|
||||
}
|
||||
};
|
||||
|
||||
template<class ContextPtr, class Scalar, class A2D, class B2D, class=std::enable_if_t<is_context<decltype(*ContextPtr{})>{}> >
|
||||
gemm_range<ContextPtr, Scalar, typename A2D::const_iterator, typename B2D::const_iterator, typename A2D::decay_type/*B2D*/>
|
||||
gemm(ContextPtr ctxtp, Scalar s, A2D const& a, B2D const& b){
|
||||
return {ctxtp, s, begin(a), end(a), begin(b)};
|
||||
}
|
||||
|
||||
//#pragma warning (disable:1011)
|
||||
//#pragma diag_suppress 0117 //"implicit_return_from_non_void_function"
|
||||
//#pragma diag_suppress 940 //"implicit_return_from_non_void_function"
|
||||
// -Xcudafe "--diag_suppress=implicit_return_from_non_void_function"
|
||||
template< class Scalar, class A2D, class B2D>
|
||||
auto gemm( Scalar s, A2D const& a, B2D const& b){
|
||||
if constexpr(is_conjugated<A2D>{}){
|
||||
auto ctxtp = blas::default_context_of(underlying(a.base()));
|
||||
return blas::gemm(ctxtp, s, a, b);
|
||||
}else{
|
||||
auto ctxtp = blas::default_context_of(a.base());
|
||||
return blas::gemm(ctxtp, s, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
namespace operators{
|
||||
template<class A2D, class B2D>
|
||||
auto operator*(A2D const& A, B2D const& B)
|
||||
->decltype(+blas::gemm(1., A, B)){
|
||||
return +blas::gemm(1., A, B);}
|
||||
}
|
||||
|
||||
}}}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,152 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_GEMV_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_GEMV_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
#include "../blas/dot.hpp"
|
||||
|
||||
#include "./../../detail/../utility.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi::blas{
|
||||
|
||||
using core::gemv;
|
||||
|
||||
template<class Context, class A, class MIt, class Size, class XIt, class B, class YIt>
|
||||
auto gemv_n(Context&& ctxt, A a, MIt m_first, Size count, XIt x_first, B b, YIt y_first){
|
||||
assert(m_first->stride()==1 or m_first.stride()==1); // blas doesn't implement this case
|
||||
assert( x_first.base() != y_first.base() );
|
||||
if constexpr(not is_conjugated<MIt>{}){
|
||||
assert( y_first.base() != m_first.base() );
|
||||
;;;; if(m_first .stride()==1) std::forward<Context>(ctxt).gemv('N', count, m_first->size(), a, m_first.base() , m_first->stride(), x_first.base(), x_first.stride(), b, y_first.base(), y_first.stride());
|
||||
else if(m_first->stride()==1) std::forward<Context>(ctxt).gemv('T', m_first->size(), count, a, m_first.base() , m_first. stride(), x_first.base(), x_first.stride(), b, y_first.base(), y_first.stride());
|
||||
else assert(0);
|
||||
}else{
|
||||
assert( y_first.base() != underlying(m_first.base()) );
|
||||
;;;; if(m_first->stride()==1) std::forward<Context>(ctxt).gemv('C', m_first->size(), count, a, underlying(m_first.base()), m_first. stride(), x_first.base(), x_first.stride(), b, y_first.base(), y_first.stride());
|
||||
else if(m_first. stride()==1) assert(0); // not implemented in blas (use cblas?)
|
||||
else assert(0); // not implemented in blas
|
||||
}
|
||||
struct{
|
||||
MIt m_last;
|
||||
YIt y_last;
|
||||
} ret{m_first + count, y_first + count};
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class A, class MIt, class Size, class XIt, class B, class YIt>
|
||||
auto gemv_n(A a, MIt m_first, Size count, XIt x_first, B b, YIt y_first){
|
||||
return gemv_n(blas::context{}, a, m_first, count, x_first, b, y_first);
|
||||
}
|
||||
|
||||
template<class A, class M, class V, class B, class W>
|
||||
W&& gemv(A const& a, M const& m, V const& v, B const& b, W&& w){
|
||||
assert(size( m) == size(w) );
|
||||
assert(size(~m) == size(v) );
|
||||
gemv_n(a, begin(m), size(m), begin(v), b, begin(w));
|
||||
return std::forward<W>(w);
|
||||
}
|
||||
|
||||
template<class Scalar, class It2D, class It1D, class Context>
|
||||
class gemv_iterator{
|
||||
Scalar alpha_ = 1.;
|
||||
It2D m_it_;
|
||||
It1D v_first_;
|
||||
Context ctxt_;
|
||||
public:
|
||||
using difference_type = typename std::iterator_traits<It2D>::difference_type;
|
||||
using value_type = typename std::iterator_traits<It1D>::value_type;
|
||||
using pointer = void;
|
||||
using reference = void;
|
||||
using iterator_category = std::random_access_iterator_tag;
|
||||
// using iterator_category = std::output_iterator_tag;
|
||||
// friend difference_type distance(gemv_iterator const& a, gemv_iterator const& b){assert(a.v_first_ == b.v_first_);
|
||||
// return b.m_it_ - a.m_it_;
|
||||
// }
|
||||
friend difference_type operator-(gemv_iterator const& a, gemv_iterator const& b){assert(a.v_first_ == b.v_first_);
|
||||
return a.m_it_ - b.m_it_;
|
||||
}
|
||||
template<class It1DOut>
|
||||
friend auto copy_n(gemv_iterator first, difference_type count, It1DOut result){
|
||||
if constexpr
|
||||
(std::is_same<Context, void>{}) blas::gemv_n( first.alpha_, first.m_it_, count, first.v_first_, 0., result);
|
||||
else blas::gemv_n(first.ctxt_, first.alpha_, first.m_it_, count, first.v_first_, 0., result);
|
||||
return result + count;
|
||||
}
|
||||
template<class It1DOut>
|
||||
friend auto copy(gemv_iterator first, gemv_iterator last, It1DOut result){return copy_n(first, last - first, result);}
|
||||
template<class It1DOut>
|
||||
friend auto uninitialized_copy(gemv_iterator first, gemv_iterator last, It1DOut result){
|
||||
static_assert(std::is_trivially_default_constructible<typename It1DOut::value_type>{});
|
||||
return copy(first, last, result);
|
||||
}
|
||||
gemv_iterator(Scalar alpha, It2D m_it, It1D v_first, Context ctxt)
|
||||
: alpha_{alpha}, m_it_{m_it}, v_first_{v_first}, ctxt_{ctxt}{}
|
||||
value_type operator*() const{return 0.;}
|
||||
};
|
||||
|
||||
template<class Scalar, class It2D, class It1D, class DecayType, class Context>
|
||||
class gemv_range{
|
||||
Scalar alpha_ = 1.;
|
||||
It2D m_begin_;
|
||||
It2D m_end_;
|
||||
It1D v_first_;
|
||||
Context ctxt_ = {};
|
||||
public:
|
||||
gemv_range(gemv_range const&) = delete;
|
||||
gemv_range(Scalar alpha, It2D m_first, It2D m_last, It1D v_first)
|
||||
: alpha_{alpha}, m_begin_{m_first}, m_end_{m_last}, v_first_{v_first}{
|
||||
assert(m_begin_.stride() == m_end_.stride());
|
||||
}
|
||||
gemv_range(Context&& ctxt, Scalar alpha, It2D m_first, It2D m_last, It1D v_first)
|
||||
: alpha_{alpha}, m_begin_{m_first}, m_end_{m_last}, v_first_{v_first}, ctxt_{std::forward<Context>(ctxt)}{
|
||||
assert(m_begin_.stride() == m_end_.stride());
|
||||
}
|
||||
using iterator = gemv_iterator<Scalar, It2D, It1D, Context>;
|
||||
using decay_type = DecayType;
|
||||
iterator begin() const{return {alpha_, m_begin_, v_first_, ctxt_};}
|
||||
iterator end() const{return {alpha_, m_end_ , v_first_, ctxt_};}
|
||||
size_type size() const{return end() - begin();}
|
||||
typename decay_type::extensions_type extensions() const{return typename decay_type::extensions_type{{0, size()}};}
|
||||
decay_type decay() const{
|
||||
decay_type ret;
|
||||
ret = *this;
|
||||
return ret;
|
||||
}
|
||||
friend auto operator+(gemv_range const& self){return self.decay();}
|
||||
template<class V>
|
||||
friend V&& operator+=(V&& v, gemv_range const& s){
|
||||
if constexpr
|
||||
(std::is_same<Context, void*>{}) blas::gemv_n( s.alpha_, s.m_begin_, s.m_end_ - s.m_begin_, s.v_first_, 1., v.begin());
|
||||
else blas::gemv_n(s.ctxt_, s.alpha_, s.m_begin_, s.m_end_ - s.m_begin_, s.v_first_, 1., v.begin());
|
||||
return std::forward<V>(v);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar, class M, class V>
|
||||
auto gemv(Scalar s, M const& m, V const& v)
|
||||
{//->decltype(gemv_range{s, m, v}){
|
||||
assert(size(~m) == size(v));
|
||||
return gemv_range<Scalar, typename M::const_iterator, typename V::const_iterator, typename V::decay_type, blas::context>(s, m.begin(), m.end(), v.begin());}
|
||||
|
||||
template<class Context, class Scalar, class M, class V>
|
||||
auto gemv(Context&& ctxt, Scalar s, M const& m, V const& v)
|
||||
//->decltype(gemv_ranges, m, v})
|
||||
{ assert(size(~m) == size(v));
|
||||
return gemv_range<Scalar, typename M::const_iterator, typename V::const_iterator, typename V::decay_type, Context&&>(std::forward<Context>(ctxt), s, m.begin(), m.end(), v.begin());}
|
||||
|
||||
namespace operators{
|
||||
template<class M, class V>
|
||||
auto operator%(M const& m, V const& v)
|
||||
->decltype(+blas::gemv(1., m, v)){
|
||||
return +blas::gemv(1., m, v);}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,239 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS -DADD_ $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_GER_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_GER_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
|
||||
using core::ger;
|
||||
|
||||
template<class T, class It1, class Size1, class It2, class Size2, class Out>
|
||||
Out ger_n(T alpha, It1 x_first, Size1 x_n, It2 y_first, Size2 y_n, Out A_first){
|
||||
assert( A_first->size() == x_n );
|
||||
assert( A_first->stride() == 1 );
|
||||
ger(x_n, y_n, alpha, base(x_first), stride(x_first), base(y_first), stride(y_first), base(A_first), stride(A_first));
|
||||
return A_first + y_n;
|
||||
}
|
||||
|
||||
template<class T, class It1, class It2, class Out>
|
||||
Out ger(T alpha, It1 x_first, It1 x_last, It2 y_first, It2 y_last, Out A_first){
|
||||
assert( stride(x_first) == stride(x_last) );
|
||||
assert( stride(y_first) == stride(y_last) );
|
||||
return ger_n(alpha, x_first, std::distance(x_first, x_last), y_first, std::distance(y_first, y_last), A_first);
|
||||
}
|
||||
|
||||
template<class T, class X1D, class Y1D, class A2D>
|
||||
A2D&& ger(T alpha, X1D const& x, Y1D const& y, A2D&& A){
|
||||
if(stride(A) == 1){
|
||||
auto e = ger(alpha, begin(y), end(y), begin(x), end(x), begin(rotated(A)));
|
||||
assert( end(rotated(A)) == e );
|
||||
}else{
|
||||
assert( size(A) == size(y) );
|
||||
auto e = ger(alpha, begin(x), end(x), begin(y), end(y), begin(A));
|
||||
assert( end(A) == e );
|
||||
}
|
||||
return std::forward<A2D>(A);
|
||||
}
|
||||
|
||||
template<class T, class It1, class Size1, class It2, class Size2, class Out>
|
||||
Out gerc_n(T alpha, It1 x_first, Size1 x_n, It2 y_first, Size2 y_n, Out A_first){
|
||||
assert( A_first->size() == x_n );
|
||||
assert( A_first->stride() == 1 );
|
||||
gerc(x_n, y_n, alpha, base(x_first), stride(x_first), base(y_first), stride(y_first), base(A_first), stride(A_first));
|
||||
return A_first + y_n;
|
||||
}
|
||||
|
||||
template<class T, class It1, class It2, class Out>
|
||||
Out gerc(T alpha, It1 x_first, It1 x_last, It2 y_first, It2 y_last, Out A_first){
|
||||
assert( stride(x_first) == stride(x_last) );
|
||||
assert( stride(y_first) == stride(y_last) );
|
||||
return gerc_n(alpha, x_first, std::distance(x_first, x_last), y_first, std::distance(y_first, y_last), A_first);
|
||||
}
|
||||
|
||||
template<class T, class X1D, class Y1D, class A2D>
|
||||
A2D gerc(T alpha, X1D const& x, Y1D const& y, A2D&& A){
|
||||
if(stride(A) == 1){
|
||||
auto e = gerc(alpha, begin(y), end(y), begin(x), end(x), begin(rotated(A)));
|
||||
assert( end(rotated(A)) == e );
|
||||
}else{
|
||||
assert( size(A) == size(y) );
|
||||
auto e = gerc(alpha, begin(x), end(x), begin(y), end(y), begin(A));
|
||||
assert( end(A) == e );
|
||||
}
|
||||
return A;
|
||||
}
|
||||
|
||||
template<class T, class It1, class Size1, class It2, class Size2, class Out>
|
||||
Out geru_n(T alpha, It1 x_first, Size1 x_n, It2 y_first, Size2 y_n, Out A_first){
|
||||
assert( A_first->size() == x_n );
|
||||
assert( A_first->stride() == 1 );
|
||||
geru(x_n, y_n, alpha, base(x_first), stride(x_first), base(y_first), stride(y_first), base(A_first), stride(A_first));
|
||||
return A_first + y_n;
|
||||
}
|
||||
|
||||
template<class T, class It1, class It2, class Out>
|
||||
Out geru(T alpha, It1 x_first, It1 x_last, It2 y_first, It2 y_last, Out A_first){
|
||||
assert( stride(x_first) == stride(x_last) );
|
||||
assert( stride(y_first) == stride(y_last) );
|
||||
return geru_n(alpha, x_first, std::distance(x_first, x_last), y_first, std::distance(y_first, y_last), A_first);
|
||||
}
|
||||
|
||||
template<class T, class X1D, class Y1D, class A2D>
|
||||
A2D geru(T alpha, X1D const& x, Y1D const& y, A2D&& A){
|
||||
if(stride(A) == 1){
|
||||
auto e = geru(alpha, begin(y), end(y), begin(x), end(x), begin(rotated(A)));
|
||||
assert( end(rotated(A)) == e );
|
||||
}else{
|
||||
assert( size(A) == size(y) );
|
||||
auto e = geru(alpha, begin(x), end(x), begin(y), end(y), begin(A));
|
||||
assert( end(A) == e );
|
||||
}
|
||||
return A;
|
||||
}
|
||||
|
||||
}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_GER
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi blas ger"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../utility.hpp"
|
||||
|
||||
#include<complex>
|
||||
#include<cassert>
|
||||
#include<iostream>
|
||||
#include<numeric>
|
||||
#include<algorithm>
|
||||
|
||||
using std::cout;
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_ger){
|
||||
namespace blas = multi::blas;
|
||||
{
|
||||
multi::array<double, 2> A = {
|
||||
{0., 0. ,0.},
|
||||
{0., 0., 0.}
|
||||
};
|
||||
multi::array<double, 1> const x = { 0., 0., 1.};
|
||||
multi::array<double, 1> const y = { 0., 1.};
|
||||
blas::ger(1., x, y, A); // A = a*A + (y^T)(x)
|
||||
for(int i = 0; i != size(A); ++i){
|
||||
for(int j = 0; j != size(A[i]); ++j)
|
||||
std::cout << A[i][j] << ' ';
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
// a = {{2., 3.}, {1., 4.}, {1., 0.}}; GER[1, {1., 2., 5.}, {-2., 1.}, a]; Print[a] : {{0., 4.}, {-3., 6.}, {-9., 5.}}
|
||||
// assert( A[1][1] == 6. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> A = {
|
||||
{0., 0.},
|
||||
{0., 0.},
|
||||
{0., 0.}
|
||||
};
|
||||
multi::array<double, 1> const x = {0., 0., 1.};
|
||||
multi::array<double, 1> const y = {0., 1.};
|
||||
blas::ger(1., x, y, rotated(A)); // A^T = a*A^T + (y^T)(x) and A = a*A + (x^T)y
|
||||
// a = {{2., 3.}, {1., 4.}, {1., 0.}}; GER[1, {1., 2., 5.}, {-2., 1.}, a]; Print[a] : {{0., 4.}, {-3., 6.}, {-9., 5.}}
|
||||
for(int i = 0; i != size(A); ++i){
|
||||
for(int j = 0; j != size(A[i]); ++j)
|
||||
std::cout << A[i][j] << ' ';
|
||||
std::cout << std::endl;
|
||||
}
|
||||
// std::cout << A[1][2] << std::endl;
|
||||
// assert( A[1][2] == 1. );
|
||||
}
|
||||
{
|
||||
// multi::array<double, 2> A = {
|
||||
// {2., 3., 6., 8.},
|
||||
// {4., 1., 6., 8.},
|
||||
// {0., 1., 6., 8.}
|
||||
// };
|
||||
// assert( A[1][2] == 6. );
|
||||
// multi::array<double, 1> const x = { 0., 1., 0.};
|
||||
// multi::array<double, 1> const y = { 0., 0., 1., 0.};
|
||||
|
||||
// multi::blas::ger(0., x, y, rotated(A)); //
|
||||
|
||||
// a = {{2., 3.}, {1., 4.}, {1., 0.}}; GER[1, {1., 2., 5.}, {-2., 1.}, a]; Print[a] : {{0., 4.}, {-3., 6.}, {-9., 5.}}
|
||||
// assert( A[1][1] == 4. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> A = {
|
||||
{2., 3., 6., 8.},
|
||||
{4., 1., 6., 8.},
|
||||
{0., 1., 6., 8.}
|
||||
};
|
||||
multi::array<double, 1> const x = { 1., 2., 5.};
|
||||
multi::array<double, 1> const y = {-2., 1., 1., 1.};
|
||||
blas::ger(1., x, y, A); //
|
||||
// a = {{2., 3.}, {1., 4.}, {1., 0.}}; GER[1, {1., 2., 5.}, {-2., 1.}, a]; Print[a] : {{0., 4.}, {-3., 6.}, {-9., 5.}}
|
||||
// assert( A[1][1] == 4. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> a = {
|
||||
{2., 1., 1.},
|
||||
{3., 4., 0.}
|
||||
};
|
||||
multi::array<double, 1> const x = { 1., 2., 5.};
|
||||
multi::array<double, 1> const y = {-2., 1.};
|
||||
blas::ger(1., x, y, rotated(a));
|
||||
// a = {{2., 3.}, {1., 4.}, {1., 0.}}; GER[1, {1., 2., 5.}, {-2., 1.}, a]; Print[a] : {{0., 4.}, {-3., 6.}, {-9., 5.}}
|
||||
assert( a[1][1] == 6. );
|
||||
}
|
||||
#if 0
|
||||
{
|
||||
multi::array<std::complex<double>, 2> a = {
|
||||
{2., 3.},
|
||||
{1., 4.},
|
||||
{1.,0.}
|
||||
};
|
||||
multi::array<std::complex<double>, 1> const x = { 1., 2., 5.};
|
||||
multi::array<std::complex<double>, 1> const y = {-2., 1.};
|
||||
multi::blas::gerc(1., x, y, a);
|
||||
// a = {{2., 3.}, {1., 4.}, {1., 0.}}; GER[1, {1., 2., 5.}, {-2., 1.}, a]; Print[a] : {{0., 4.}, {-3., 6.}, {-9., 5.}}
|
||||
assert( a[1][1] == 6. );
|
||||
}
|
||||
{
|
||||
multi::array<std::complex<double>, 2> a = {{2. + 1.*I, 3. + 4.*I}, {1.+3.*I, 4. + 2.*I}, {1. + 7.*I, 0.}};
|
||||
multi::array<std::complex<double>, 1> const x = { 1. + 1.*I, 2. + I*9., 5. + 4.*I};
|
||||
multi::array<std::complex<double>, 1> const y = {-2. + 8.*I, 1. + 1.*I};
|
||||
multi::blas::geru(1. + 2.*I, x, y, a); // a = alpha*outer(x, y) + a
|
||||
// a = {{2. + 1.*I, 3. + 4.*I}, {1. + 3.*I, 4. + 2.*I}, {1. + 7.*I, 0.}}; GER[1 + 2.*I, {1. + 1.*I, 2. + I*9., 5. + 4.*I}, {-2. + 8.*I, 1. + 1.*I}, a]; Print[a];
|
||||
// {{-20.-13. I,-1.+6. I},{-71.-151. I,-25.-1. I},{-105.-45. I,-17.+11. I}}
|
||||
std::cout << "a11 " << a[1][1] << std::endl;
|
||||
assert( a[1][1] == -25. - 1.*I );
|
||||
}
|
||||
{
|
||||
multi::array<std::complex<double>, 2> a = {
|
||||
{2. + 1.*I, 1. + 3.*I, 1. + 7.*I},
|
||||
{3. + 4.*I, 4. + 2.*I, 0. + 0.*I}
|
||||
};
|
||||
std::cout << "a = " << size(a) << std::endl;
|
||||
multi::array<std::complex<double>, 1> const x = { 1. + 1.*I, 2. + I*9., 5. + 4.*I};
|
||||
multi::array<std::complex<double>, 1> const y = {-2. + 8.*I, 1. + 1.*I};
|
||||
multi::blas::geru(1. + 2.*I, x, y, rotated(a)); // a = alpha*outer(x, y) + a
|
||||
// a = {{2. + 1.*I, 3. + 4.*I}, {1. + 3.*I, 4. + 2.*I}, {1. + 7.*I, 0.}}; GER[1 + 2.*I, {1. + 1.*I, 2. + I*9., 5. + 4.*I}, {-2. + 8.*I, 1. + 1.*I}, a]; Print[a];
|
||||
// {{-20.-13. I,-1.+6. I},{-71.-151. I,-25.-1. I},{-105.-45. I,-17.+11. I}}
|
||||
std::cout << "here a11 " << a[1][1] << std::endl;
|
||||
assert( a[1][1] == -25. - 1.*I );
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,909 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0x -lboost_unit_test_framework `pkg-config --libs blas` \
|
||||
`#-Wl,-rpath,/usr/local/Wolfram/Mathematica/12.0/SystemFiles/Libraries/Linux-x86-64 -L/usr/local/Wolfram/Mathematica/12.0/SystemFiles/Libraries/Linux-x86-64 -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5` \
|
||||
-lboost_timer &&$0x&&rm $0x; exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_HERK_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_HERK_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
#include "../blas/copy.hpp"
|
||||
//#include "../blas/scal.hpp"
|
||||
#include "../blas/syrk.hpp" // fallback to real case
|
||||
|
||||
#include "../blas/side.hpp"
|
||||
#include "../blas/filling.hpp"
|
||||
|
||||
#include "../blas/operations.hpp"
|
||||
|
||||
#include "../../config/NODISCARD.hpp"
|
||||
|
||||
//#include<iostream> //debug
|
||||
//#include<type_traits> // void_t
|
||||
|
||||
namespace boost{
|
||||
namespace multi{namespace blas{
|
||||
|
||||
template<class A, std::enable_if_t<not is_conjugated<A>{}, int> =0>
|
||||
auto base_aux(A&& a)
|
||||
->decltype(base(a)){
|
||||
return base(a);}
|
||||
|
||||
template<class A, std::enable_if_t< is_conjugated<A>{}, int> =0>
|
||||
auto base_aux(A&& a)
|
||||
->decltype(underlying(base(a))){
|
||||
return underlying(base(a));}
|
||||
|
||||
using core::herk;
|
||||
|
||||
template<class AA, class BB, class A2D, class C2D, class = typename A2D::element_ptr, std::enable_if_t<is_complex_array<C2D>{}, int> =0>
|
||||
C2D&& herk(filling c_side, AA alpha, A2D const& a, BB beta, C2D&& c)
|
||||
//->decltype(herk('\0', '\0', c.size(), a.size(), &alpha, base_aux(a), stride(a.rotated()), &beta, base_aux(c), stride(c)), std::forward<C2D>(c))
|
||||
{
|
||||
assert( a.size() == c.size() );
|
||||
assert( c.size() == rotated(c).size() );
|
||||
if(c.size()==0) return std::forward<C2D>(c);
|
||||
if constexpr(is_conjugated<C2D>{}){herk(flip(c_side), alpha, a, beta, hermitized(c)); return std::forward<C2D>(c);}
|
||||
{
|
||||
auto base_a = base_aux(a);
|
||||
auto base_c = base_aux(c); // static_assert( not is_conjugated<C2D>{}, "!" );
|
||||
if constexpr(is_conjugated<A2D>{}){
|
||||
// auto& ctxt = *blas::default_context_of(underlying(a.base()));
|
||||
// if you get an error here might be due to lack of inclusion of a header file with the backend appropriate for your type of iterator
|
||||
if(stride(a)==1 and stride(c)!=1) herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(c));
|
||||
else if(stride(a)==1 and stride(c)==1){
|
||||
if(size(a)==1) herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(c));
|
||||
else assert(0);
|
||||
}
|
||||
else if(stride(a)!=1 and stride(c)==1) herk(c_side==filling::upper?'U':'L', 'C', size(c), size(rotated(a)), &alpha, base_a, stride( a ), &beta, base_c, stride(rotated(c)));
|
||||
else if(stride(a)!=1 and stride(c)!=1) herk(c_side==filling::upper?'L':'U', 'C', size(c), size(rotated(a)), &alpha, base_a, stride( a ), &beta, base_c, stride( c ));
|
||||
else assert(0);
|
||||
}else{
|
||||
// auto& ctxt = *blas::default_context_of( a.base() );
|
||||
;;;; if(stride(a)!=1 and stride(c)!=1) herk(c_side==filling::upper?'L':'U', 'C', size(c), size(rotated(a)), &alpha, base_a, stride( a ), &beta, base_c, stride(c));
|
||||
else if(stride(a)!=1 and stride(c)==1){
|
||||
if(size(a)==1) herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(rotated(c)));
|
||||
else assert(0);
|
||||
}
|
||||
else if(stride(a)==1 and stride(c)!=1) assert(0);//case not implemented, herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), alpha, base_a, stride(rotated(a)), beta, base(c), stride(c));
|
||||
else if(stride(a)==1 and stride(c)==1) herk(c_side==filling::upper?'U':'L', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(rotated(c)));
|
||||
else assert(0);
|
||||
}
|
||||
}
|
||||
return std::forward<C2D>(c);
|
||||
}
|
||||
|
||||
template<class AA, class BB, class A2D, class C2D, class = typename A2D::element_ptr, std::enable_if_t<not is_complex_array<C2D>{}, int> =0>
|
||||
auto herk(filling c_side, AA alpha, A2D const& a, BB beta, C2D&& c)
|
||||
->decltype(syrk(c_side, alpha, a, beta, std::forward<C2D>(c))){
|
||||
return syrk(c_side, alpha, a, beta, std::forward<C2D>(c));}
|
||||
|
||||
//template<class AA, class BB, class A2D, class C2D, class = typename A2D::element_ptr>
|
||||
//auto herk(filling c_side, AA alpha, A2D const& a, BB beta, C2D&& c)
|
||||
//->decltype(herk_aux(c_side, alpha, a, beta, std::forward<C2D>(c), is_complex<C2D>{})){
|
||||
// return herk_aux(c_side, alpha, a, beta, std::forward<C2D>(c), is_complex<C2D>{});}
|
||||
|
||||
template<class AA, class A2D, class C2D, class = typename A2D::element_ptr>
|
||||
auto herk(filling c_side, AA alpha, A2D const& a, C2D&& c)
|
||||
->decltype(herk(c_side, alpha, a, 0., std::forward<C2D>(c))){
|
||||
return herk(c_side, alpha, a, 0., std::forward<C2D>(c));}
|
||||
|
||||
template<typename AA, class A2D, class C2D>
|
||||
auto herk(AA alpha, A2D const& a, C2D&& c)
|
||||
->decltype(herk(filling::lower, alpha, a, herk(filling::upper, alpha, a, std::forward<C2D>(c)))){
|
||||
return herk(filling::lower, alpha, a, herk(filling::upper, alpha, a, std::forward<C2D>(c)));}
|
||||
|
||||
template<class A2D, class C2D>
|
||||
auto herk(A2D const& a, C2D&& c)
|
||||
->decltype(herk(1., a, std::forward<C2D>(c))){
|
||||
return herk(1., a, std::forward<C2D>(c));}
|
||||
|
||||
/*
|
||||
template<class A2D, class C2D>
|
||||
NODISCARD("when last argument is const")
|
||||
auto herk(A2D const& a, C2D const& c)
|
||||
->decltype(herk(1., a, decay(c))){
|
||||
return herk(1., a, decay(c));}
|
||||
*/
|
||||
|
||||
template<class AA, class A2D, class Ret = typename A2D::decay_type>
|
||||
NODISCARD("when argument is read-only")
|
||||
auto herk(AA alpha, A2D const& a)//->std::decay_t<decltype(herk(alpha, a, Ret({size(a), size(a)}, get_allocator(a))))>{
|
||||
{
|
||||
return herk(alpha, a, Ret({size(a), size(a)}));//Ret({size(a), size(a)}));//, get_allocator(a)));
|
||||
}
|
||||
|
||||
template<class T> struct numeric_limits : std::numeric_limits<T>{};
|
||||
template<class T> struct numeric_limits<std::complex<T>> : std::numeric_limits<std::complex<T>>{
|
||||
static std::complex<T> quiet_NaN(){auto n=numeric_limits<T>::quiet_NaN(); return {n, n};}
|
||||
};
|
||||
|
||||
template<class AA, class A2D, class Ret = typename A2D::decay_type>
|
||||
NODISCARD("because argument is read-only")
|
||||
auto herk(filling cs, AA alpha, A2D const& a)
|
||||
->std::decay_t<
|
||||
decltype(herk(cs, alpha, a, Ret({size(a), size(a)}, 0., get_allocator(a))))>{
|
||||
return herk(cs, alpha, a, Ret({size(a), size(a)},
|
||||
#ifdef NDEBUG
|
||||
numeric_limits<typename Ret::element_type>::quiet_NaN(),
|
||||
#endif
|
||||
get_allocator(a)
|
||||
));
|
||||
}
|
||||
|
||||
template<class A2D> auto herk(filling s, A2D const& a)
|
||||
->decltype(herk(s, 1., a)){
|
||||
return herk(s, 1., a);}
|
||||
|
||||
template<class A2D> auto herk(A2D const& a)
|
||||
//->decltype(herk(1., a)){
|
||||
{ return herk(1., a);}
|
||||
|
||||
}}
|
||||
|
||||
}
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_HERK
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS herk"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../adaptors/blas/gemm.hpp"
|
||||
#include "../../adaptors/blas/nrm2.hpp"
|
||||
|
||||
#include<iostream>
|
||||
#include<numeric>
|
||||
|
||||
namespace utf = boost::unit_test;
|
||||
namespace multi = boost::multi;
|
||||
|
||||
template<class T> void what(T&&) = delete;
|
||||
|
||||
template<class M> decltype(auto) print(M const& C){
|
||||
using std::cout;
|
||||
using boost::multi::size;
|
||||
for(int i = 0; i != size(C); ++i){
|
||||
for(int j = 0; j != size(C[i]); ++j) cout << C[i][j] << ' ';
|
||||
cout << std::endl;
|
||||
}
|
||||
return cout << std::endl;
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(inq_case){
|
||||
using namespace multi::blas;
|
||||
multi::array<double, 2> const a = {
|
||||
{0, 1, 2},
|
||||
{3, 4, 5},
|
||||
{6, 7, 8},
|
||||
{9, 10, 11}
|
||||
};
|
||||
BOOST_REQUIRE( gemm(a, T(a))[1][2] == 86. );
|
||||
{
|
||||
multi::array<double, 2> c({4, 4});
|
||||
herk(1.0, a, c);
|
||||
BOOST_REQUIRE( c == gemm(a, T(a)) );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c = herk(1.0, a);
|
||||
BOOST_REQUIRE( c == gemm(a, T(a)) );
|
||||
}
|
||||
{
|
||||
BOOST_REQUIRE( herk(a) == gemm(a, T(a)) );
|
||||
}
|
||||
{
|
||||
BOOST_REQUIRE( herk(2.0, a) == gemm(2.0, a, T(a)) );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_real){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999);
|
||||
blas::herk(1., a, c);
|
||||
BOOST_REQUIRE( c[1][0] == 34 );
|
||||
BOOST_REQUIRE( c[0][1] == 34 );
|
||||
|
||||
multi::array<double, 2> const c_copy = blas::herk(1., a);
|
||||
BOOST_REQUIRE( c == c_copy );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_case){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const A = {{1., 2., 3.}};
|
||||
multi::array<double, 2> B = blas::herk(A);
|
||||
BOOST_REQUIRE( size(B) == 1 );
|
||||
BOOST_REQUIRE( B[0][0] == 1.*1. + 2.*2. + 3.*3. );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_case_scale){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const A = {{1., 2., 3.}};
|
||||
multi::array<double, 2> B = blas::herk(0.1, A);
|
||||
BOOST_REQUIRE( size(B) == 1 );
|
||||
BOOST_TEST( B[0][0] == (1.*1. + 2.*2. + 3.*3.)*0.1 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_real_case){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> const A = {{1., 2., 3.}};
|
||||
multi::array<complex, 2> B = blas::herk(1.0, A);
|
||||
BOOST_REQUIRE( size(B) == 1 );
|
||||
BOOST_REQUIRE( B[0][0] == 1.*1. + 2.*2. + 3.*3. );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_real_case_scale, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> const A = {{1., 2., 3.}};
|
||||
multi::array<complex, 2> B = blas::herk(0.1, A);
|
||||
BOOST_REQUIRE( size(B) == 1 );
|
||||
BOOST_TEST( real( B[0][0]/0.1 ) == 1.*1. + 2.*2. + 3.*3. );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> const A = {{1. + 2.*I, 2.+3.*I, 3. + 4.*I}};
|
||||
multi::array<complex, 2> B = blas::herk(A);
|
||||
BOOST_REQUIRE( size(B) == 1 );
|
||||
BOOST_REQUIRE( B[0][0] == std::norm(1. + 2.*I) + std::norm(2.+3.*I) + std::norm(3. + 4.*I) );
|
||||
|
||||
BOOST_TEST( std::sqrt(real(blas::herk(A)[0][0])) == blas::nrm2(A[0])() );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case_hermitized_out_param){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> const A = {{1. + 2.*I}, {2.+3.*I}, {3. + 4.*I}};
|
||||
multi::array<complex, 2> B({1, 1});
|
||||
BOOST_REQUIRE( size(B) == 1 );
|
||||
|
||||
blas::herk(blas::filling::upper, 1.0, blas::H(A), 0.0, B);
|
||||
|
||||
BOOST_REQUIRE( B[0][0] == std::norm(1. + 2.*I) + std::norm(2.+3.*I) + std::norm(3. + 4.*I) );
|
||||
|
||||
BOOST_TEST( std::sqrt(real(B[0][0])) == blas::nrm2(blas::T(A)[0])() );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case_hermitized){
|
||||
multi::array<complex, 2> A = {{1. + 2.*I}, {2.+3.*I}, {3. + 4.*I}};
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> B = blas::herk(blas::H(A));
|
||||
BOOST_REQUIRE( size(B) == 1 );
|
||||
BOOST_REQUIRE( B[0][0] == std::norm(1. + 2.*I) + std::norm(2.+3.*I) + std::norm(3. + 4.*I) );
|
||||
|
||||
BOOST_TEST( std::sqrt(real(blas::herk(blas::H(A))[0][0])) == blas::nrm2(rotated(A)[0])() );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case_hermitized_auto){
|
||||
namespace blas = multi::blas;
|
||||
|
||||
multi::array<complex, 2> A = {{1. + 2.*I}, {2.+3.*I}, {3. + 4.*I}};
|
||||
auto B = blas::herk(1., blas::hermitized(A));
|
||||
static_assert( std::is_same<decltype(B), multi::array<complex, 2>>{}, "!" );
|
||||
BOOST_REQUIRE( size(B) == 1 );
|
||||
BOOST_REQUIRE( B[0][0] == std::norm(1. + 2.*I) + std::norm(2.+3.*I) + std::norm(3. + 4.*I) );
|
||||
|
||||
BOOST_TEST( std::sqrt(real(blas::herk(blas::H(A))[0][0])) == blas::nrm2(rotated(A)[0])() );
|
||||
}
|
||||
|
||||
#if 1
|
||||
#if 1
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_identity){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
blas::herk(blas::filling::lower, 1., a, 0., c); // c†=c=aa†=(aa†)†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0]==complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
static_assert(blas::is_conjugated<decltype(blas::H(c))>{}, "!" );
|
||||
|
||||
blas::herk(blas::filling::lower, 1., a, 0., blas::H(c)); // c†=c=aa†=(aa†)†, `c` in upper triangular
|
||||
|
||||
BOOST_REQUIRE( blas::H(c)[1][0]==complex(50., -49.) );
|
||||
BOOST_REQUIRE( blas::H(c)[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
// multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
// blas::herk(blas::filling::lower, 1., a, 0., blas::T(c)); // c†=c=aa†=(aa†)†, `c` in lower triangular
|
||||
// BOOST_REQUIRE( transposed(c)[1][0]==complex(50., -49.) );
|
||||
// BOOST_REQUIRE( transposed(c)[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// herk(filling::lower, 1., transposed(a), 0., c); // c†=c=aT(aT)† not supported
|
||||
// print(c);
|
||||
// BOOST_REQUIRE( c[1][0]==complex(52., -90.) );
|
||||
// BOOST_REQUIRE( c[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// herk(filling::lower, 1., transposed(a), 0., hermitized(c)); // c†=c=aT(aT)† not supported
|
||||
// BOOST_REQUIRE( hermitized(c)[1][0]==complex(52., -90.) );
|
||||
// BOOST_REQUIRE( hermitized(c)[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(blas::filling::lower, 1., blas::T(a), 0., blas::T(c)); // c†=c=aT(aT)† not supported
|
||||
BOOST_REQUIRE( transposed(c)[1][0]==complex(52., -90.) );
|
||||
BOOST_REQUIRE( transposed(c)[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
blas::herk(blas::filling::lower, 1., blas::T(a), 0., blas::H(blas::T(c))); // c†=c=aT(aT)† not supported
|
||||
BOOST_REQUIRE( blas::H(blas::T(c))[1][0]==complex(52., -90.) );
|
||||
BOOST_REQUIRE( blas::H(blas::T(c))[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
// multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// using namespace multi::blas;
|
||||
// blas::herk(blas::filling::lower, 1., blas::T(a), 0., c); // c†=c=aa†=(aa†)†, `c` in lower triangular
|
||||
// BOOST_REQUIRE( c[1][0]==complex(50., -49.) );
|
||||
// BOOST_REQUIRE( c[0][1]==9999. );
|
||||
}
|
||||
#if 1
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
blas::herk(blas::U, 1., a, 0., c); // c†=c=aa†=(aa†)†, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[0][1]==complex(50., +49.) );
|
||||
BOOST_REQUIRE( c[1][0]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
blas::herk(1., a, c); // c†=c=aa†=(aa†)†
|
||||
BOOST_REQUIRE( c[0][1]==complex(50., +49.) );
|
||||
BOOST_REQUIRE( c[1][0]==complex(50., -49.) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
blas::herk(blas::L, 1., blas::H(a), 0., c); // c†=c=aa†=(aa†)†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0]==complex(52., 90.) );
|
||||
BOOST_REQUIRE( c[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
// multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// using namespace multi::blas;
|
||||
// herk(filling::lower, 1., transposed(a), 0., c); // c†=c=aa†=(aa†)†, `c` in lower triangular
|
||||
// BOOST_REQUIRE( c[0][1]==9999. );
|
||||
// BOOST_REQUIRE( c[1][0]==complex(52., 90.) );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_real_case){
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::transposed;
|
||||
using blas::hermitized;
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
|
||||
herk(filling::lower, 1., hermitized(a), 0., c);//c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1]==complex(19.,0.) );
|
||||
BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(filling::upper, 1., hermitized(a), 0., c);//c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][2]==complex(19.,0.) );
|
||||
BOOST_REQUIRE( c[2][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// herk(filling::upper, 1., hermitized(a), 0., transposed(c));//c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
// print(transposed(c));
|
||||
// BOOST_REQUIRE( c[1][2]==complex(19.,0.) );
|
||||
// BOOST_REQUIRE( c[2][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
using blas::transposed;
|
||||
// herk(filling::upper, 1., transposed(a), 0., c);//c_†=c_=a_†a_=(a_†a_)†, `c_` in lower triangular
|
||||
// BOOST_REQUIRE( c[2][1] == 9999. );
|
||||
// BOOST_REQUIRE( c[1][2] == 19. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_basic_transparent_interface){
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::hermitized;
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(filling::lower, 1., hermitized(a), 0., c); // c†=c=a†a=(a†a)†, information in `c` lower triangular
|
||||
BOOST_REQUIRE( c[2][1]==complex(41.,2.) );
|
||||
BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
using multi::blas::herk;
|
||||
herk(filling::upper, 1., hermitized(a), 0., c); // c†=c=a†a=(a†a)†, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[1][2]==complex(41., -2.) );
|
||||
BOOST_REQUIRE( c[2][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::herk;
|
||||
herk(filling::lower, 1., a, 0., c); // c†=c=aa†, `a` and `c` are c-ordering, information in c lower triangular
|
||||
BOOST_REQUIRE( c[1][0]==complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::herk;
|
||||
herk(filling::upper, 1., a, 0., c); //c†=c=aa†, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[0][1]==complex(50., 49.) );
|
||||
BOOST_REQUIRE( c[1][0]==9999. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_basic_enum_interface){
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::hermitized;
|
||||
using blas::transposed;
|
||||
{
|
||||
// multi::array<complex, 2> c({2, 2}, 8888.);
|
||||
// std::cerr << "here" << std::endl;
|
||||
// herk(filling::lower, 1., hermitized(transposed(a)), 0., c); //c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
// print(c) << std::endl;
|
||||
// std::cerr << "there" << std::endl;
|
||||
// BOOST_REQUIRE( c[0][1]==complex(41.,2.) );
|
||||
// BOOST_REQUIRE( c[1][0]==8888. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(filling::lower, 1., hermitized(a), 0., c); //c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1]==complex(41.,2.) );
|
||||
BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
using namespace multi::blas;
|
||||
herk(filling::upper, 1., hermitized(a), 0., c); //c†=c=a†a=(a†a)†, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[1][2]==complex(41., -2.) );
|
||||
BOOST_REQUIRE( c[2][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
using namespace multi::blas;
|
||||
herk(filling::lower, 1., a, 0., c); // c†=c=aa†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0]==complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
using namespace multi::blas;
|
||||
herk(filling::upper, 1., a, 0., c); // c†=c=aa†, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[0][1]==complex(50., 49.) );
|
||||
BOOST_REQUIRE( c[1][0]==9999. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_basic_explicit_enum_interface){
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
using namespace multi::blas;
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(filling::lower, 1., hermitized(a), 0., c); // c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1]==complex(41.,2.) );
|
||||
BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
BOOST_REQUIRE( herk(hermitized(a)) == gemm(hermitized(a), a) );
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// herk(filling::lower, 1., hermitized(a), 0., transposed(c)); // c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
// print(transposed(c));
|
||||
// BOOST_REQUIRE( c[2][1]==complex(41.,2.) );
|
||||
// BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
herk(filling::lower, 1., hermitized(transposed(a)), 0., transposed(c)); // c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
BOOST_REQUIRE( transposed(c)[1][0]==complex(50.,+49.) );
|
||||
BOOST_REQUIRE( transposed(c)[0][1]==9999. );
|
||||
}
|
||||
// BOOST_REQUIRE( herk(hermitized(transposed(a))) == gemm(hermitized(transposed(a)), transposed(a)) );
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(filling::upper, 1., hermitized(a), 0., c); // c†=c=a†a=(a†a)†, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[1][2]==complex(41., -2.) );
|
||||
BOOST_REQUIRE( c[2][1]==9999. );
|
||||
BOOST_REQUIRE( herk(hermitized(a)) == gemm(hermitized(a), a) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
herk(filling::lower, 1., a, 0., c); // c†=c=aa†=(aa†)†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0]==complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1]==9999. );
|
||||
BOOST_REQUIRE( herk(a) == gemm(a, hermitized(a)) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
herk(filling::upper, 1., a, 0., c); // c†=c=aa†=(aa†)†, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[0][1]==complex(50., 49.) );
|
||||
BOOST_REQUIRE( c[1][0]==9999. );
|
||||
BOOST_REQUIRE( herk(a) == gemm(a, hermitized(a)) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
herk(filling::upper, 2., a, 0., c); // c†=c=aa†=(aa†)†, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[0][1]==complex(100., 98.) );
|
||||
BOOST_REQUIRE( c[1][0]==9999. );
|
||||
BOOST_REQUIRE( herk(2., a) == gemm(2., a, hermitized(a)) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
herk(filling::upper, 1., a, 0., c); // c†=c=aa†=(aa†)†, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[0][1]==complex(50., 49.) );
|
||||
BOOST_REQUIRE( c[1][0]==9999. );
|
||||
BOOST_REQUIRE( herk(1., a) == gemm(1., a, hermitized(a)) );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_automatic_operator_interface){
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::hermitized;
|
||||
herk(filling::lower, 1., hermitized(a), 0., c); // c=c†=a†a, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1]==complex(41., 2.) );
|
||||
BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
using multi:: blas::filling;
|
||||
herk(filling::lower, 1., a, 0., c); // c=c†=aa†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0]==complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::herk;
|
||||
herk(1., a, c); // c=c†=aa†
|
||||
BOOST_REQUIRE( c[1][0]==complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1]==complex(50., +49.) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::hermitized;
|
||||
herk(filling::lower, 1., hermitized(a), 0., c); // c=c†=a†a, `c` in lower triangular
|
||||
herk(filling::upper, 1., hermitized(a), 0., c);
|
||||
BOOST_REQUIRE( c[2][1]==complex(41., 2.) );
|
||||
BOOST_REQUIRE( c[1][2]==complex(41., -2.) );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_automatic_operator_interface_implicit_no_sum){
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::hermitized;
|
||||
herk(filling::lower, 1., hermitized(a), c); // c=c†=a†a, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1]==complex(41., 2.) );
|
||||
BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::filling;
|
||||
herk(filling::lower, 1., a, c); // c=c†=aa†, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0]==complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1]==9999. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_automatic_ordering_and_symmetrization){
|
||||
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
namespace blas = multi::blas;
|
||||
using blas::herk;
|
||||
using blas::hermitized;
|
||||
using blas::filling;
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(filling::upper, 1., hermitized(a), c); // c†=c=a†a
|
||||
BOOST_REQUIRE( c[2][1]==9999. );
|
||||
BOOST_REQUIRE( c[1][2]==complex(41., -2.) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(1., hermitized(a), c); // c†=c=a†a
|
||||
BOOST_REQUIRE( c[2][1]==complex(41., +2.) );
|
||||
BOOST_REQUIRE( c[1][2]==complex(41., -2.) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
herk(filling::upper, 1., a, c); // c†=c=aa† // c implicit hermitic in upper
|
||||
BOOST_REQUIRE( c[1][0] == 9999. );
|
||||
BOOST_REQUIRE( c[0][1] == complex(50., +49.) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
herk(1., a, c); // c†=c=aa†
|
||||
BOOST_REQUIRE( c[1][0] == complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1] == complex(50., +49.) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c = herk(filling::upper, 1., a); // c†=c=aa†
|
||||
// BOOST_REQUIRE( c[1][0] == complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1] == complex(50., +49.) );
|
||||
}
|
||||
{
|
||||
using multi::blas::herk;
|
||||
using multi::blas::filling;
|
||||
multi::array<complex, 2> c = herk(1., a); // c†=c=aa†
|
||||
BOOST_REQUIRE( c[1][0] == complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1] == complex(50., +49.) );
|
||||
}
|
||||
{
|
||||
using multi::blas::herk;
|
||||
using multi::blas::hermitized;
|
||||
using multi::blas::filling;
|
||||
multi::array<complex, 2> c = herk(filling::upper, 1., hermitized(a)); // c†=c=a†a
|
||||
|
||||
BOOST_REQUIRE( size(hermitized(a))==3 );
|
||||
// BOOST_REQUIRE( c[2][1] == complex(41., +2.) );
|
||||
BOOST_REQUIRE( c[1][2] == complex(41., -2.) );
|
||||
}
|
||||
{
|
||||
using multi::blas::herk;
|
||||
using multi::blas::filling;
|
||||
multi::array<complex, 2> c = herk(filling::upper, a); // c†=c=a†a
|
||||
// what(multi::pointer_traits<decltype(base(a))>::default_allocator_of(base(a)));
|
||||
// BOOST_REQUIRE( c[1][0] == complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1] == complex(50., +49.) );
|
||||
}
|
||||
{
|
||||
using multi::blas::herk;
|
||||
using multi::blas::hermitized;
|
||||
using multi::blas::filling;
|
||||
multi::array<complex, 2> c = herk(filling::upper, hermitized(a)); // c†=c=a†a
|
||||
// BOOST_REQUIRE( c[2][1] == complex(41., +2.) );
|
||||
BOOST_REQUIRE( c[1][2] == complex(41., -2.) );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_size1_real_case){
|
||||
multi::array<complex, 2> const a = {
|
||||
{1., 3., 4.}
|
||||
};
|
||||
using namespace multi::blas;
|
||||
{
|
||||
multi::array<complex, 2> c({1, 1}, 9999.);
|
||||
herk(filling::upper, 1., a, c); // c†=c=aa†
|
||||
BOOST_TEST( c[0][0] == 26. );
|
||||
}
|
||||
BOOST_TEST( herk(a) == gemm(a, hermitized(a)) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_size1){
|
||||
multi::array<complex, 2> const a = {
|
||||
{1. + 4.*I, 3. + 2.*I, 4. - 1.*I}
|
||||
};
|
||||
using namespace multi::blas;
|
||||
{
|
||||
multi::array<complex, 2> c({1, 1}, 9999.);
|
||||
herk(filling::upper, 1., a, c); // c†=c=aa†
|
||||
BOOST_TEST( c[0][0] == 47. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_size0){
|
||||
multi::array<complex, 2> const a;
|
||||
using namespace multi::blas;
|
||||
{
|
||||
multi::array<complex, 2> c;
|
||||
herk(filling::upper, 1., a, c); // c†=c=aa†
|
||||
// BOOST_TEST( c[0][0] == 47. );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_automatic_ordering_and_symmetrization_real_case){
|
||||
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
using namespace multi::blas;
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(filling::upper, 1., hermitized(a), c); // c†=c=a†a
|
||||
// BOOST_REQUIRE( c[2][1]==19. );
|
||||
BOOST_REQUIRE( c[1][2]==19. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
herk(filling::upper, 1., a, c); // c†=c=aa†
|
||||
// BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c = herk(filling::upper, 1., a); // c†=c=aa†
|
||||
// BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c = herk(filling::upper, 1., hermitized(a)); // c†=c=a†a
|
||||
BOOST_REQUIRE( size(hermitized(a))==3 );
|
||||
// BOOST_REQUIRE( c[2][1]==19. );
|
||||
BOOST_REQUIRE( c[1][2]==19. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c = herk(filling::upper, a); // c†=c=a†a
|
||||
// BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c = herk(filling::upper, hermitized(a)); // c†=c=a†a
|
||||
// BOOST_REQUIRE( c[2][1]==19. );
|
||||
BOOST_REQUIRE( c[1][2]==19. );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_real_automatic_ordering_and_symmetrization_real_case){
|
||||
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
{
|
||||
multi::array<double, 2> c({3, 3}, 9999.);
|
||||
using multi::blas::hermitized;
|
||||
using multi::blas::herk;
|
||||
using multi::blas::filling;
|
||||
// herk(filling::upper, 1., hermitized(a), c); // c†=c=a†a
|
||||
// BOOST_REQUIRE( c[2][1]==19. );
|
||||
// BOOST_REQUIRE( c[1][2]==19. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::herk;
|
||||
using multi::blas::filling;
|
||||
herk(filling::upper, 1., a, c); // c†=c=aa†
|
||||
// BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::herk;
|
||||
using multi::blas::filling;
|
||||
herk(filling::upper, 1., a, c); // c†=c=aa†
|
||||
// BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
}
|
||||
{
|
||||
using multi::blas::herk;
|
||||
using multi::blas::filling;
|
||||
multi::array<double, 2> c = herk(filling::upper, 1., a); // c†=c=aa†
|
||||
// BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
}
|
||||
{
|
||||
using multi::blas::herk;
|
||||
multi::array<complex, 2> c = herk(a); // c†=c=a†a
|
||||
BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
}
|
||||
{
|
||||
using multi::blas::herk;
|
||||
using multi::blas::hermitized;
|
||||
multi::array<complex, 2> c = herk(hermitized(a)); // c†=c=a†a
|
||||
BOOST_REQUIRE( c[2][1]==19. );
|
||||
BOOST_REQUIRE( c[1][2]==19. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_real_case){
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
using multi::blas::filling;
|
||||
{
|
||||
static_assert( not boost::multi::blas::is_complex_array<multi::array<double, 2>>{} , "!");
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
syrk(filling::lower, 1., a, 0., c);//c†=c=aa†=(aa†)†, `c` in lower triangular
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
herk(filling::lower, 1., a, 0., c);//c†=c=aa†=(aa†)†, `c` in lower triangular
|
||||
}
|
||||
{
|
||||
static_assert( not boost::multi::blas::is_complex_array<multi::array<double, 2>>{} , "!");
|
||||
multi::array<double, 2> c = herk(filling::upper, a);//c†=c=aa†=(aa†)†, `c` in lower triangular
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_real_case_1d){
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
};
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::transposed;
|
||||
using blas::hermitized;
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(filling::lower, 1., hermitized(a), 0., c);//c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
print(c);
|
||||
BOOST_REQUIRE( c[2][1]==complex(12.,0.) );
|
||||
BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
herk(2., hermitized(a), c);//c†=c=a†a=(a†a)†, `c` in lower triangular
|
||||
|
||||
BOOST_REQUIRE( c[2][1]==complex(24.,0.) );
|
||||
BOOST_REQUIRE( c[1][2]==complex(24.,0.) );
|
||||
multi::array<complex, 2> c_gemm({3, 3});
|
||||
// gemm(2., hermitized(a), a, c_gemm);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_timing){
|
||||
multi::array<complex, 2> const a({4000, 4000}); std::iota(data_elements(a), data_elements(a) + num_elements(a), 0.2);
|
||||
multi::array<complex, 2> c({4000, 4000}, 9999.);
|
||||
boost::timer::auto_cpu_timer t;
|
||||
using multi::blas::herk;
|
||||
using multi::blas::hermitized;
|
||||
using multi::blas::filling;
|
||||
herk(filling::upper, 1., hermitized(a), c); // c†=c=a†a
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
#ifndef MULTI_ADAPTORS_BLAS_IAMAX_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_IAMAX_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
|
||||
template<class It, class Size>
|
||||
auto iamax_n(It first, Size n){
|
||||
using core::iamax;
|
||||
return iamax(n, base(first), stride(first));
|
||||
// if you get an error here make sure that you are including (and linking) the appropriate BLAS backend for your memory type
|
||||
}
|
||||
|
||||
template<class It>
|
||||
auto iamax(It first, It last)
|
||||
->decltype(iamax_n(first, std::distance(first, last))){
|
||||
return iamax_n(first, std::distance(first, last));}
|
||||
|
||||
template<class X1D>
|
||||
auto iamax(X1D const& x)
|
||||
->decltype(iamax(begin(x), end(x))){assert( not offset(x) );
|
||||
return iamax(begin(x), end(x));
|
||||
}
|
||||
|
||||
template<class X1D> auto amax(X1D const& x){return begin(x) + iamax(x);}
|
||||
|
||||
}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_IAMAX
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS iamax"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../utility.hpp"
|
||||
|
||||
#include<complex>
|
||||
#include<cassert>
|
||||
|
||||
using std::cout;
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax_real){
|
||||
multi::array<double, 1> const A = {1., 2., 3., 4.};
|
||||
|
||||
auto i = blas::iamax(A);
|
||||
BOOST_REQUIRE( i == 3 );
|
||||
BOOST_REQUIRE( A[blas::iamax(A)] == 4. );
|
||||
|
||||
BOOST_REQUIRE( *blas::amax(A) == 4. );
|
||||
}
|
||||
|
||||
using complex = std::complex<double>;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax_complex){
|
||||
multi::array<complex, 1> const A = {1., 2., 3., 4.};
|
||||
auto i = blas::iamax(A);
|
||||
BOOST_REQUIRE( i == 3 );
|
||||
BOOST_REQUIRE( A[blas::iamax(A)] == 4. );
|
||||
BOOST_REQUIRE( *blas::amax(A) == 4. );
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,194 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
//#define BOOST_NO_RTTI 1
|
||||
//#define BOOST_TYPE_INDEX_CTTI_USER_DEFINED_PARSING (39, 1, true, "T = ")
|
||||
#endif
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_NRM2_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_NRM2_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
#include "../../array.hpp"
|
||||
|
||||
#include<complex> // std::norm
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
|
||||
using core::nrm2;
|
||||
|
||||
using multi::base;
|
||||
using std::norm; // nvcc11 needs using std::FUNCTION and the FUNCTION (and it works in clang, gcc, culang, icc)
|
||||
|
||||
template<class A1D, class A0D>
|
||||
auto nrm2(A1D const& x, A0D&& r)
|
||||
->decltype(nrm2(x.size(), x.base(), x.stride(), base(r)), std::forward<A0D>(r)){
|
||||
return nrm2(x.size(), x.base(), x.stride(), base(r)), std::forward<A0D>(r);}
|
||||
|
||||
#if 0
|
||||
template<class A1D>
|
||||
auto nrm2(A1D const& x, double& r)
|
||||
->decltype(nrm2(x.size(), x.base(), x.stride(), &r), r){
|
||||
return nrm2(x.size(), x.base(), x.stride(), &r), r;}
|
||||
|
||||
template<class A1D>
|
||||
auto nrm2(A1D const& x, float& r)
|
||||
->decltype(nrm2(x.size(), x.base(), x.stride(), &r), r){
|
||||
return nrm2(x.size(), x.base(), x.stride(), &r), r;}
|
||||
#endif
|
||||
|
||||
template<
|
||||
class A1D, typename T = double, //decltype(norm(std::declval<typename A1D::value_type>())),
|
||||
class Alloc = typename std::allocator_traits<typename A1D::default_allocator_type>::template rebind_alloc<T>
|
||||
>
|
||||
NODISCARD("")
|
||||
auto nrm2(A1D const& x)
|
||||
//->std::decay_t<decltype(nrm2(x, multi::static_array<T, 0, Alloc>({}, x.get_allocator()) ))>{
|
||||
->std::decay_t<decltype(nrm2(x, multi::static_array<T, 0, Alloc>({})))>{ // x.get_allocator() in decltype doesn't work for icc
|
||||
return nrm2(x, multi::static_array<T, 0, Alloc>({}, x.get_allocator()));}
|
||||
|
||||
template<class Alloc, class A1D, typename T = decltype(norm(std::declval<typename A1D::value_type>())),
|
||||
class AllocR = typename std::allocator_traits<typename A1D::default_allocator_type>::template rebind_alloc<T>
|
||||
>
|
||||
NODISCARD("")
|
||||
auto nrm2(A1D const& x, AllocR const& alloc)
|
||||
->std::decay_t<decltype(blas::nrm2(x, multi::static_array<T, 0, AllocR>({}, alloc)))>{
|
||||
return blas::nrm2(x, multi::static_array<T, 0, AllocR>({}, alloc)) ;}
|
||||
|
||||
namespace operators{
|
||||
using std::norm;
|
||||
template<class A1D>//decltype(norm(std::declval<typename A1D::value_type>()))>
|
||||
NODISCARD("") auto operator^(A1D const& a, int n)
|
||||
->decltype(std::pow(blas::nrm2(a), n)){
|
||||
return std::pow(blas::nrm2(a), n);}
|
||||
}
|
||||
|
||||
}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_NRM2
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS nrm2"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../complex.hpp"
|
||||
|
||||
//#include<thrust/complex.h>
|
||||
|
||||
#include<boost/mpl/list.hpp>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_real){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const cA = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
|
||||
double n;
|
||||
BOOST_REQUIRE( blas::nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) );
|
||||
BOOST_REQUIRE( n == std::sqrt( 2.*2. + 6.*6 + 10.*10.) );
|
||||
BOOST_REQUIRE( blas::nrm2(rotated(cA)[1]) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) );
|
||||
|
||||
double n2 = blas::nrm2(rotated(cA)[1]);
|
||||
BOOST_REQUIRE( n == n2 );
|
||||
|
||||
multi::array<double, 1> R(4);
|
||||
blas::nrm2( rotated(cA)[1], R[2]);
|
||||
BOOST_REQUIRE( R[2] == std::sqrt( 2.*2. + 6.*6 + 10.*10.) );
|
||||
|
||||
multi::array<double, 0> R0;
|
||||
blas::nrm2( rotated(cA)[1], R0);
|
||||
BOOST_REQUIRE( R0 == std::sqrt( 2.*2. + 6.*6 + 10.*10.) );
|
||||
|
||||
BOOST_REQUIRE( blas::nrm2(rotated(cA)[1]) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) );
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptor_blas_nrm2_operators){
|
||||
multi::array<double, 1> X = {1.1,2.1,3.1, 4.1};
|
||||
double n; multi::blas::nrm2(X, n);
|
||||
BOOST_REQUIRE( n == multi::blas::nrm2(X) );
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex_real_case){
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 2> const cA = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
|
||||
using multi::blas::nrm2;
|
||||
double n;
|
||||
BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) );
|
||||
BOOST_REQUIRE( nrm2(rotated(cA)[1]) == n );
|
||||
}
|
||||
|
||||
#if 0
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex_real_case_thrust){
|
||||
using complex = thrust::complex<double>;
|
||||
multi::array<complex, 2> const cA = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
|
||||
using multi::blas::nrm2;
|
||||
double n;
|
||||
BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) );
|
||||
BOOST_REQUIRE( nrm2(rotated(cA)[1]) == n );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex_real_case_types){
|
||||
boost::mpl::for_each<boost::mpl::list<
|
||||
std ::complex<double>,
|
||||
thrust::complex<double>//,
|
||||
// boost::multi::complex<double> // TODO make this work
|
||||
>>([](auto cplx){
|
||||
multi::array<decltype(cplx), 2> const cA = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
|
||||
using multi::blas::nrm2;
|
||||
double n;
|
||||
BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) );
|
||||
BOOST_REQUIRE( nrm2(rotated(cA)[1]) == n );
|
||||
});
|
||||
}
|
||||
#endif
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex){
|
||||
using complex = std::complex<double>; complex const I{0,1};
|
||||
multi::array<complex, 2> const cA = {
|
||||
{1., 2. + 1.*I, 3., 4.},
|
||||
{5., 6. + 4.*I, 7., 8.},
|
||||
{9., 10. - 3.*I, 11., 12.}
|
||||
};
|
||||
|
||||
using multi::blas::nrm2;
|
||||
double n;
|
||||
BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1]) ) );
|
||||
BOOST_REQUIRE( nrm2(rotated(cA)[1]) == std::sqrt( norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1]) ) );
|
||||
|
||||
using namespace multi::blas::operators;
|
||||
BOOST_TEST_REQUIRE( (rotated(cA)[1]^-1) == 1/std::sqrt(norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1])) , boost::test_tools::tolerance(1e-15) );
|
||||
BOOST_TEST_REQUIRE( (rotated(cA)[1]^2) == norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1]) , boost::test_tools::tolerance(1e-15) );
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,283 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2019-2021
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_NUMERIC_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_NUMERIC_HPP
|
||||
|
||||
#include "../../memory/pointer_traits.hpp"
|
||||
#include "../../array_ref.hpp"
|
||||
#include "../../complex.hpp"
|
||||
|
||||
#include "numeric/is_complex.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi::blas{
|
||||
|
||||
template<class T> struct Complex_{T real; T imag;};
|
||||
|
||||
template<
|
||||
class A, typename Complex = typename std::decay_t<A>::element, typename T=typename Complex::value_type,
|
||||
class=std::enable_if_t<blas::numeric::is_complex_of<Complex, T>::value>
|
||||
>
|
||||
auto real(A&& a)
|
||||
->decltype(std::forward<A>(a).template reinterpret_array_cast<Complex_<T>>().template member_cast<T>(&Complex_<T>::real)){
|
||||
return std::forward<A>(a).template reinterpret_array_cast<Complex_<T>>().template member_cast<T>(&Complex_<T>::real);}
|
||||
|
||||
template<
|
||||
class A, class Complex = typename std::decay_t<A>::element_type, typename T=typename Complex::value_type,
|
||||
class=std::enable_if_t<blas::numeric::is_complex_of<Complex, T>::value>
|
||||
>
|
||||
auto imag(A&& a)
|
||||
->decltype(std::forward<A>(a).template reinterpret_array_cast<Complex_<T>>().template member_cast<T>(&Complex_<T>::imag)){
|
||||
return std::forward<A>(a).template reinterpret_array_cast<Complex_<T>>().template member_cast<T>(&Complex_<T>::imag);}
|
||||
|
||||
template<class ComplexArr, class ComplexElem = typename std::decay_t<ComplexArr>::element, typename RealElem = typename ComplexElem::value_type,
|
||||
class=std::enable_if_t<blas::numeric::is_complex_of<ComplexElem, RealElem>::value>
|
||||
>
|
||||
auto real_doubled(ComplexArr&& a){ // produces a real view of complex array with the last dimension duplicated and with interleaved real imaginary parts
|
||||
return std::forward<ComplexArr>(a).template reinterpret_array_cast<RealElem>(2).rotated().flatted().unrotated();
|
||||
}
|
||||
|
||||
template<class Ref, class Involution> class involuted;
|
||||
|
||||
template<class It, class F, class Reference = involuted<typename std::iterator_traits<It>::reference, F> > class involuter;
|
||||
|
||||
template<class Ref, class Involution>
|
||||
class involuted{
|
||||
protected:
|
||||
Ref r_; // [[no_unique_address]]
|
||||
Involution f_;
|
||||
public:
|
||||
using decay_type =std::decay_t<decltype(std::declval<Involution>()(std::declval<Ref>()))>;
|
||||
constexpr explicit involuted(Ref r, Involution f = {}) : r_{std::forward<Ref>(r)}, f_{f}{}
|
||||
involuted& operator=(involuted const& other)=delete;//{r_ = other.r_; return *this;}
|
||||
public:
|
||||
involuted(involuted const&) = delete;
|
||||
involuted(involuted&&) = default; // for C++14
|
||||
constexpr decay_type decay() const&{return f_(r_);}
|
||||
constexpr operator decay_type() &{return f_(r_);}
|
||||
constexpr operator decay_type() const&{return f_(r_);}
|
||||
constexpr operator decay_type() &&{return f_(r_);}
|
||||
constexpr auto operator*(decay_type const& other) const{return f_(r_)*other;}
|
||||
constexpr decltype(auto) operator&()&&{return involuter<decltype(&std::declval<Ref>()), Involution>{&r_, f_};}
|
||||
template<class DecayType>
|
||||
constexpr auto operator=(DecayType&& other)&
|
||||
->decltype(r_=f_(std::forward<DecayType>(other)), *this){
|
||||
return r_=f_(std::forward<DecayType>(other)), *this;}
|
||||
template<class DecayType>
|
||||
constexpr auto operator=(DecayType&& other)&&
|
||||
->decltype(r_=f_(std::forward<DecayType>(other)), *this){
|
||||
return r_=f_(std::forward<DecayType>(other)), *this;}
|
||||
template<class DecayType>
|
||||
constexpr auto operator==(DecayType&& other) const
|
||||
->decltype(this->operator decay_type()==other){
|
||||
return this->operator decay_type()==other;}
|
||||
template<class DecayType>
|
||||
constexpr auto operator!=(DecayType&& other) const
|
||||
->decltype(this->operator decay_type()!=other){
|
||||
return this->operator decay_type()!=other;}
|
||||
|
||||
friend constexpr auto operator==(decay_type const& other, involuted const& self){
|
||||
return other == self.operator decay_type();}
|
||||
|
||||
template<class DecayType, std::enable_if_t<not std::is_base_of<involuted, DecayType>{}, int> =0>
|
||||
friend constexpr auto operator==(DecayType&& other, involuted const& self){
|
||||
return other == self.operator decay_type();}
|
||||
template<class DecayType, std::enable_if_t<not std::is_base_of<involuted, DecayType>{}, int> =0>
|
||||
friend constexpr auto operator!=(DecayType&& other, involuted const& self){
|
||||
return other != self.operator decay_type();}
|
||||
// auto imag() const{return static_cast<decay_type>(*this).imag();}
|
||||
template<class Any> friend constexpr Any& operator<<(Any&& a, involuted const& self)
|
||||
// ->decltype(a << self.operator decay_type())
|
||||
{
|
||||
return a << self.operator decay_type();}
|
||||
constexpr auto conj() const&{return adl_conj(operator decay_type());}
|
||||
template<class T = void*>
|
||||
friend constexpr auto imag(involuted const& self, T = nullptr)
|
||||
->decltype(adl_imag(std::declval<decay_type>())){
|
||||
return adl_imag(self.operator decay_type());}
|
||||
};
|
||||
|
||||
#if defined(__cpp_deduction_guides)
|
||||
template<class T, class F> involuted(T&&, F)->involuted<T const, F>;
|
||||
//template<class T, class F> involuted(T&, F)->involuted<T&, F>;
|
||||
//template<class T, class F> involuted(T const&, F)->involuted<T const&, F>;
|
||||
#endif
|
||||
|
||||
//template<class It, class F>
|
||||
//class involuter;
|
||||
|
||||
template<class It, class F>
|
||||
auto get_allocator(involuter<It, F> const& s);
|
||||
|
||||
template<class It, class F>
|
||||
auto default_allocator_of(involuter<It, F> const& iv){
|
||||
return default_allocator_of(iv.it_);
|
||||
}
|
||||
|
||||
template<class It, class F, class Reference>
|
||||
class involuter{// : public std::iterator_traits<It>{
|
||||
It it_; // [[no_unique_address]]
|
||||
F f_;
|
||||
template<class, class, class> friend class involuter;
|
||||
public:
|
||||
using difference_type = typename std::iterator_traits<It>::difference_type;
|
||||
using value_type = typename std::iterator_traits<It>::value_type;
|
||||
using pointer = involuter<It, F>;//svoid; // typename std::iterator_traits<It>::pointer
|
||||
using reference = Reference;
|
||||
using iterator_category = typename std::iterator_traits<It>::iterator_category;
|
||||
using element_type = typename std::pointer_traits<It>::element_type;
|
||||
template<class U> using rebind = involuter<typename std::pointer_traits<It>::template rebind<U>, F>;
|
||||
|
||||
involuter() = default;
|
||||
constexpr explicit involuter(It it, F f = {}) : it_{std::move(it)}, f_{std::move(f)}{}
|
||||
involuter(involuter const& other) = default;
|
||||
// template<class Other, > constexpr involuter(Other const& other) : it_{other.it_}, f_{other.f_}{}
|
||||
|
||||
template<class Other, typename = decltype(_implicit_cast<It>(typename Other::underlying_type{}))>
|
||||
// cppcheck-suppress noExplicitConstructor
|
||||
constexpr involuter(Other const& o) : it_{o.it_}, f_{o.f_}{}
|
||||
template<class Other, typename = decltype(_explicit_cast<It>(typename Other::underlying_type{}))>
|
||||
constexpr explicit involuter(Other const& o, int = 0) : it_{o.it_}, f_{o.f_}{}
|
||||
|
||||
constexpr auto operator*() const {return reference{*it_, f_};}
|
||||
bool operator==(involuter const& o) const{return it_==o.it_;}
|
||||
bool operator!=(involuter const& o) const{return it_!=o.it_;}
|
||||
constexpr involuter& operator+=(typename involuter::difference_type n){it_+=n; return *this;}
|
||||
constexpr auto operator+(typename involuter::difference_type n) const{return involuter{it_+n, f_};}
|
||||
// decltype(auto) operator->() const{
|
||||
// return &const_cast<reference&>(reinterpret_cast<reference const&>(*this));
|
||||
// return reference{*it_, f_};
|
||||
// return involuter<typename std::iterator_traits<It>::pointer, F>{&*it_, f_};
|
||||
// }
|
||||
auto operator-(involuter const& other) const{return it_-other.it_;}
|
||||
explicit operator bool() const{return it_;}
|
||||
using underlying_type = It;
|
||||
friend constexpr underlying_type underlying(involuter const& self){return self.it_;}
|
||||
constexpr explicit operator It() const {return underlying(*this);}
|
||||
template<class Itt, class FF> friend auto get_allocator(involuter<Itt, FF> const&);
|
||||
friend auto default_allocator_of(involuter const& inv){
|
||||
using multi::default_allocator_of;
|
||||
return default_allocator_of(inv.it_);
|
||||
}
|
||||
using default_allocator_type = typename multi::pointer_traits<It>::default_allocator_type;
|
||||
friend auto get_allocator(involuter const& inv){
|
||||
using boost::multi::get_allocator;
|
||||
return get_allocator(inv.it_);
|
||||
}
|
||||
};
|
||||
|
||||
template<class It, class F>
|
||||
auto get_allocator(involuter<It, F> const& inv){
|
||||
using multi::get_allocator;
|
||||
return get_allocator(inv.it_);
|
||||
}
|
||||
|
||||
template<class Ref> using negated = involuted<Ref, std::negate<>>;
|
||||
template<class It> using negater = involuter<It, std::negate<>>;
|
||||
|
||||
#if 1
|
||||
struct conjugate{
|
||||
template<class T>
|
||||
decltype(auto) operator()(T&& a) const{
|
||||
// using std::conj; /*for doubles?*/
|
||||
// using std::conj;
|
||||
// std::complex<double> A = static_cast<std::complex<double>>(a);
|
||||
return multi::adl_conj(std::forward<T>(a)); // this is needed by icc
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
namespace detail{
|
||||
template<class Ref> struct conjugated : involuted<Ref, conjugate>{
|
||||
using involuted<Ref, conjugate>::involuted;
|
||||
template<class Other>
|
||||
conjugated(conjugated<Other> const& other) : involuted<Ref, conjugate>{static_cast<involuted<Ref, conjugate> const&>(other)}{}
|
||||
auto real() const{return static_cast<typename conjugated::decay_type>(*this).real();}
|
||||
auto imag() const{return static_cast<typename conjugated::decay_type>(*this).imag();}
|
||||
friend auto imag(conjugated const& self){return self.imag();}
|
||||
friend auto real(conjugated const& self){return self.real();}
|
||||
public:
|
||||
decltype(auto) operator->() const{return this;}
|
||||
// friend auto conj(conjugated const& self){
|
||||
// return conjugate{}(static_cast<typename conjugated::decay_type>(self));
|
||||
// }
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
template<class Ref> using conjugated = involuted<Ref, conjugate>;
|
||||
|
||||
template<class It> using conjugater = involuter<It, conjugate>;//, conjugated<typename std::iterator_traits<It>::reference> >;
|
||||
|
||||
template<class It> auto make_conjugater(It it){return conjugater<It>{it};}
|
||||
template<class It> It make_conjugater(conjugater<It> it){return underlying(it);}
|
||||
|
||||
template<class T> auto imag(involuted<T, conjugate> const& inv){return inv.decay().imag();}
|
||||
template<class T> auto real(involuted<T, conjugate> const& inv){return inv.decay().real();}
|
||||
|
||||
template<class T> auto has_imag_fun_aux(T const& t)->decltype(imag(t), std::true_type {});
|
||||
auto has_imag_fun_aux(... )->decltype( std::false_type{});
|
||||
template<class T> struct has_imag_fun : decltype(has_imag_fun_aux(std::declval<T>())){};
|
||||
|
||||
|
||||
template<class T> auto has_imag_mem_aux(T const& t)->decltype(t.imag(), std::true_type {});
|
||||
auto has_imag_mem_aux(... )->decltype( std::false_type{});
|
||||
template<class T> struct has_imag_mem : decltype(has_imag_mem_aux(std::declval<T>())){};
|
||||
|
||||
template<class T> struct has_imag : std::integral_constant<bool, (has_imag_fun<T>{} or has_imag_mem<T>{})>{};
|
||||
|
||||
template<class A = void> struct is_complex_array{
|
||||
template<class T> static auto _(T const& t) -> has_imag<T>;
|
||||
constexpr operator bool() const{return decltype(_(*base(std::declval<A>()))){};}
|
||||
template<class AA> constexpr auto operator()(AA&&){return _(*base(std::declval<A>()));}
|
||||
};
|
||||
|
||||
template<class V> struct is_complex : has_imag<V>{};
|
||||
|
||||
template<class A = void> struct is_conjugated{
|
||||
template<class It> static std::true_type _(conjugater<It> a);
|
||||
static std::false_type _(... );
|
||||
constexpr operator bool() const{return decltype(_(base(std::declval<A>()))){};}
|
||||
template<class AA> constexpr auto operator()(AA&&){return _(base(std::declval<A>()));}
|
||||
};
|
||||
|
||||
template<class A, class D = std::decay_t<A>, typename Elem=typename D::element_type, typename Ptr=typename D::element_ptr,
|
||||
std::enable_if_t<not is_complex_array<A>{}, int> =0>
|
||||
A&& conj(A&& a){
|
||||
// return multi::static_array_cast<Elem, conjugater<Ptr>>(a);
|
||||
return std::forward<A>(a);
|
||||
}
|
||||
|
||||
template<class A, class D = std::decay_t<A>, typename Elem=typename D::element_type, typename Ptr=typename D::element_ptr,
|
||||
std::enable_if_t<not is_conjugated<A>{} and is_complex_array<A>{}, int> =0>
|
||||
decltype(auto) conj(A&& a){
|
||||
// return multi::static_array_cast<Elem, conjugater<Ptr>>(a);
|
||||
return std::forward<A>(a).template static_array_cast<Elem, conjugater<Ptr>>();
|
||||
}
|
||||
|
||||
template<class A, class D = std::decay_t<A>, typename Elem=typename D::element_type, typename Ptr=typename D::element_ptr::underlying_type,
|
||||
std::enable_if_t< is_conjugated<A>{}, int> =0>
|
||||
auto conj(A&& a)
|
||||
->decltype(std::forward<A>(a).template static_array_cast<Elem, Ptr>()){
|
||||
return std::forward<A>(a).template static_array_cast<Elem, Ptr>();}
|
||||
// return multi::static_array_cast<Elem, Ptr>(a);}
|
||||
// return multi::static_array_cast<Elem, Ptr>(a);}
|
||||
|
||||
}
|
||||
|
||||
template<class It, class F, class Reference>
|
||||
auto default_allocator_of(multi::blas::involuter<It, F, Reference> it){
|
||||
return multi::default_allocator_of(underlying(it));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace std{
|
||||
// template<> struct is_convertible<boost::multi::blas::Complex_<double>*, std::complex<double>*> : std::true_type{};
|
||||
// template<class T> struct is_convertible<boost::multi::blas::Complex_<double>*, T*> : boost::multi::blas::numeric::is_complex_of<T, double>{};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0x -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_NUMERIC_IS_COMPLEX_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_NUMERIC_IS_COMPLEX_HPP
|
||||
|
||||
#include<complex>
|
||||
#include<type_traits>
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
namespace numeric{
|
||||
|
||||
using std::true_type;
|
||||
using std::false_type;
|
||||
|
||||
template<class T> auto has_real_fun_aux(T const& t)->decltype(real(t), true_type{});
|
||||
auto has_real_fun_aux(... )->decltype( false_type{});
|
||||
template<class T> struct has_real_fun : decltype(has_real_fun_aux(std::declval<T>())){};
|
||||
template<class T> constexpr bool has_real_fun_v = has_real_fun<T>::value;
|
||||
|
||||
template<class T> auto has_real_aux(T const& t)->decltype(t.real(), true_type{});
|
||||
auto has_real_aux(... )->decltype( false_type{});
|
||||
template<class T> struct has_real : decltype(has_real_aux(std::declval<T>())){};
|
||||
template<class T> constexpr bool has_real_v = has_real<T>::value;
|
||||
|
||||
template<class T> auto has_imag_fun_aux(T const& t)->decltype(imag(t), true_type{});
|
||||
auto has_imag_fun_aux(... )->decltype( false_type{});
|
||||
template<class T> struct has_imag_fun : decltype(has_imag_fun_aux(std::declval<T>())){};
|
||||
template<class T> constexpr bool has_imag_fun_v = has_imag_fun<T>::value;
|
||||
|
||||
template<class T> auto has_imag_aux(T const& t)->decltype(t.imag(), true_type{});
|
||||
auto has_imag_aux(... )->decltype( false_type{});
|
||||
template<class T> struct has_imag : decltype(has_imag_aux(std::declval<T>())){};
|
||||
template<class T> constexpr bool has_imag_v = has_imag<T>::value;
|
||||
|
||||
template<class T> struct is_complex : std::integral_constant<bool,
|
||||
(has_real_v<T> or has_real_fun_v<T>) and (has_imag_v<T> or has_imag_fun_v<T>)
|
||||
>{};
|
||||
|
||||
template<class V, class T> auto real_is_aux(T const& t)->typename std::is_same<decltype(t.real()), V>;
|
||||
template<class> auto real_is_aux(... )->false_type;
|
||||
template<class T, class V> struct real_is : decltype(real_is_aux<V>(std::declval<T>())){};
|
||||
|
||||
template<class V, class T> auto imag_is_aux(T const& t)->typename std::is_same<decltype(t.imag()), V>;
|
||||
template<class> auto imag_is_aux(... )->false_type;
|
||||
template<class T, class V> struct imag_is : decltype(imag_is_aux<V>(std::declval<T>())){};
|
||||
|
||||
template<class T, class V> struct is_complex_of : std::integral_constant<bool, real_is<T, V>::value and imag_is<T, V>::value>{};
|
||||
|
||||
}}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS numeric is_complex"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include<thrust/complex.h>
|
||||
|
||||
#include "../../../complex.hpp"
|
||||
#include "boost/mpl/list.hpp"
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_is_complex){
|
||||
namespace numeric = multi::blas::numeric;
|
||||
|
||||
boost::mpl::for_each<boost::mpl::list<double, float, long double>>([](auto f){
|
||||
using F = decltype(f);
|
||||
static_assert( not numeric::is_complex<F>{}, "!");
|
||||
|
||||
static_assert( numeric::is_complex<std::complex<F>>{}, "!");
|
||||
static_assert( numeric::is_complex<thrust::complex<F>>{}, "!");
|
||||
static_assert( numeric::is_complex<multi::complex<F>>{}, "!");
|
||||
|
||||
static_assert( numeric::is_complex_of<std::complex<F>, F>{}, "!");
|
||||
static_assert( not numeric::is_complex_of<F, F>{}, "!");
|
||||
});
|
||||
|
||||
|
||||
static_assert( not numeric::is_complex_of<std::complex<double>, float>{}, "!");
|
||||
static_assert( not numeric::is_complex_of<double, float>{}, "!");
|
||||
|
||||
static_assert( numeric::is_complex<std::complex<double> const&>{}, "!");
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,141 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_OPERATIONS_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_OPERATIONS_HPP
|
||||
|
||||
#include "../blas/numeric.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
|
||||
template<class M> decltype(auto) transposed(M const& m){return rotated(m);}
|
||||
|
||||
template<class A, typename D=std::decay_t<A>, typename E=typename D::element_type>
|
||||
decltype(auto) conjugated_transposed(A&& a){
|
||||
return transposed(blas::conj(std::forward<A>(a)));
|
||||
}
|
||||
|
||||
template<class A> decltype(auto) identity(A&& a){return std::forward<A>(a);}
|
||||
|
||||
template<class A>
|
||||
decltype(auto) hermitized(A&& a, std::true_type){
|
||||
return conjugated_transposed(std::forward<A>(a));
|
||||
}
|
||||
|
||||
template<class A>
|
||||
decltype(auto) hermitized(A&& a, std::false_type){
|
||||
return transposed(std::forward<A>(a));
|
||||
}
|
||||
|
||||
template<class A>
|
||||
decltype(auto) hermitized(A&& a){return conjugated_transposed(std::forward<A>(a));}
|
||||
|
||||
template<class A>
|
||||
decltype(auto) transposed(A&& a){return rotated(std::forward<A>(a));}
|
||||
|
||||
//template<class A, std::enable_if_t<std::decay_t<A>::dimensionality == 2, int> =0>
|
||||
//decltype(auto) H(A&& a){return hermitized(std::forward<A>(a));}
|
||||
|
||||
namespace operators{
|
||||
|
||||
MAYBE_UNUSED constexpr static struct {
|
||||
template<class A, std::enable_if_t<std::decay_t<A>::dimensionality == 2, int> =0>
|
||||
decltype(auto) operator()(A&& a) const{return hermitized(std::forward<A>(a));}
|
||||
template<class A, std::enable_if_t<std::decay_t<A>::dimensionality == 1, int> =0>
|
||||
[[deprecated("use blas::C instead of blas::H for conjugated vectors to avoid confusions")]]
|
||||
decltype(auto) operator()(A&& a) const{return blas::conj(std::forward<A>(a));}
|
||||
} H;
|
||||
|
||||
template<class A, class Op>
|
||||
auto operator^(A&& a, Op op)
|
||||
->decltype(op(std::forward<A>(a))){
|
||||
return op(std::forward<A>(a));}
|
||||
}
|
||||
|
||||
using operators::H;
|
||||
|
||||
template<class A, std::enable_if_t<std::decay_t<A>::dimensionality == 1, int> =0>
|
||||
decltype(auto) C(A&& a){return blas::conj(std::forward<A>(a));}
|
||||
template<class A, std::enable_if_t<std::decay_t<A>::dimensionality == 2, int> =0>
|
||||
decltype(auto) C(A&& a){return hermitized(std::forward<A>(a));}
|
||||
|
||||
namespace operators{
|
||||
|
||||
template<class A>
|
||||
auto operator*(A&& a)
|
||||
->decltype(blas::conj(std::forward<A>(a))){
|
||||
return blas::conj(std::forward<A>(a));}
|
||||
|
||||
}
|
||||
|
||||
//template<class A, std::enable_if_t<std::decay_t<A>::dimensionality == 1, int> =0>
|
||||
//[[deprecated("use blas::C instead of blas::H for conjugated vectors to avoid confusions")]]
|
||||
//decltype(auto) H(A&& a){return blas::conj(std::forward<A>(a));}
|
||||
|
||||
template<class A> decltype(auto) T(A&& a){return transposed(std::forward<A>(a));}
|
||||
template<class A> decltype(auto) N(A&& a){return identity (std::forward<A>(a));}
|
||||
|
||||
}}
|
||||
|
||||
}
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi blas operations"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../array.hpp"
|
||||
|
||||
using std::cout;
|
||||
template<class M> decltype(auto) print(M const& C){
|
||||
using boost::multi::size;
|
||||
for(int i = 0; i != size(C); ++i){
|
||||
for(int j = 0; j != size(C[i]); ++j) cout<< C[i][j] <<' ';
|
||||
cout<<std::endl;
|
||||
}
|
||||
return cout<<"---"<<std::endl;
|
||||
}
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(m){
|
||||
using complex = std::complex<double>; constexpr complex I{0., 1.};
|
||||
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> const A = {
|
||||
{1. - 3.*I, 6. + 2.*I},
|
||||
{8. + 2.*I, 2. + 4.*I},
|
||||
{2. - 1.*I, 1. + 1.*I}
|
||||
};
|
||||
using blas::hermitized;
|
||||
BOOST_REQUIRE( hermitized(A)[0][1] == conj(A[1][0]) );
|
||||
|
||||
static_assert( blas::is_conjugated<decltype(blas::H(A))>{}, "!" );
|
||||
BOOST_REQUIRE( blas::H(A)[0][1] == conj(A[1][0]) );
|
||||
|
||||
using blas::transposed;
|
||||
BOOST_REQUIRE( transposed(A)[0][1] == A[1][0] );
|
||||
|
||||
static_assert( not blas::is_conjugated<decltype(blas::T(A))>{}, "!" );
|
||||
BOOST_REQUIRE( blas::T(A)[0][1] == A[1][0] );
|
||||
|
||||
using namespace blas::operators;
|
||||
BOOST_REQUIRE( (*~A)[0][1] == conj(A[1][0]) );
|
||||
BOOST_REQUIRE( (~*A)[0][1] == conj(A[1][0]) );
|
||||
BOOST_REQUIRE( ( ~A)[0][1] == A[1][0] );
|
||||
BOOST_REQUIRE( ( *A)[0][1] == conj(A[0][1]) );
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(is_complex_array_test){
|
||||
static_assert(multi::blas::is_complex_array<multi::array<std::complex<double>, 2>>{}, "!");
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_SCAL_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_SCAL_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi::blas{
|
||||
|
||||
using core::scal;
|
||||
|
||||
template<class A, class It, class Size>
|
||||
auto scal_n(A const& a, It first, Size count)
|
||||
->decltype(scal(count, &a, first.base(), first.stride()), void()){
|
||||
scal(count, &a, first.base(), first.stride()); }
|
||||
|
||||
template<class A, class It1D>
|
||||
auto scal(A const& a, It1D first, It1D last)
|
||||
->decltype(blas::scal_n(a, first, last - first)){
|
||||
return blas::scal_n(a, first, last - first);}
|
||||
|
||||
template<class A, class X1D> // don't do this: ", typename Elem = typename X1D::element_type>"
|
||||
auto scal(A const& a, X1D&& x)
|
||||
->decltype(blas::scal(a, x.begin(), x.end()), std::forward<X1D>(x)){
|
||||
return blas::scal(a, x.begin(), x.end()), std::forward<X1D>(x);}
|
||||
|
||||
template<class A>
|
||||
class scal_range{
|
||||
A alpha_;
|
||||
public:
|
||||
using scalar_type = A;
|
||||
explicit scal_range(A const& alpha) : alpha_{alpha}{}
|
||||
template<class X1D>
|
||||
friend auto operator*=(X1D&& x, scal_range const& self)
|
||||
->decltype(std::forward<X1D>(scal(std::declval<scalar_type const&>(), x))){
|
||||
return std::forward<X1D>(scal(self.alpha_, x));}
|
||||
};
|
||||
|
||||
template<class A> auto scal(A const& a){return scal_range<A>{a};}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --libs blas` -lboost_unit_test_framework&&$0.$X&&rm $0.$X;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_SIDE_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_SIDE_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
#include "../blas/operations.hpp"
|
||||
#include "../../array_ref.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
|
||||
//enum class SIDE : char{L='L', R='R'};
|
||||
|
||||
enum side : char{
|
||||
left = 'L',
|
||||
right = 'R'//,
|
||||
// pre_multiply = 'R',
|
||||
// post_multiply = 'L'
|
||||
};
|
||||
|
||||
side swap(side s){
|
||||
switch(s){
|
||||
case side::left: return side::right;
|
||||
case side::right: return side::left;
|
||||
} __builtin_unreachable();
|
||||
}
|
||||
|
||||
}}}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS adaptors side"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_side){
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
#ifndef MULTI_ADAPTORS_BLAS_SWAP_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_SWAP_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace blas{
|
||||
|
||||
template<class It1, class It2>
|
||||
It2 swap(It1 first, It2 last, It2 first2){
|
||||
assert(stride(first) == stride(last));
|
||||
using std::distance;
|
||||
auto d = distance(first, last);
|
||||
swap(d, base(first), stride(first), base(first2), stride(first2));
|
||||
return first2 + d;
|
||||
}
|
||||
|
||||
template<class X1D, class Y1D>
|
||||
Y1D&& swap(X1D&& x, Y1D&& y){
|
||||
assert( size(x) == size(y) );
|
||||
assert( offset(x) == 0 and offset(y) == 0 );
|
||||
swap( begin(x), end(x), begin(y) );
|
||||
return std::forward<Y1D>(y);
|
||||
}
|
||||
|
||||
}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_SWAP
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS swap"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../utility.hpp"
|
||||
|
||||
#include "../blas/dot.hpp"
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_swap, *boost::unit_test::tolerance(0.00001) ){
|
||||
multi::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
// using multi::blas::swap;
|
||||
multi::blas::swap(rotated(A)[1], rotated(A)[3]); // can ambiguate with (friend) multi::swap
|
||||
BOOST_REQUIRE( A[0][1] == 4. );
|
||||
BOOST_REQUIRE( A[0][3] == 2. );
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,444 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x -lboost_unit_test_framework -lboost_timer \
|
||||
`pkg-config --libs blas` \
|
||||
`#-Wl,-rpath,/usr/local/Wolfram/Mathematica/12.0/SystemFiles/Libraries/Linux-x86-64 -L/usr/local/Wolfram/Mathematica/12.0/SystemFiles/Libraries/Linux-x86-64 -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5` \
|
||||
&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_SYRK_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_SYRK_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
#include "../blas/numeric.hpp"
|
||||
#include "../blas/filling.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{namespace blas{
|
||||
|
||||
using core::syrk;
|
||||
|
||||
template<typename AA, typename BB, class A2D, class C2D>
|
||||
auto syrk(filling c_side, AA alpha, A2D const& a, BB beta, C2D&& c)
|
||||
->decltype(syrk('\0', '\0', size(c), size(a), alpha, base(a), stride(rotated(a)), beta, base(c), stride(c)), std::forward<C2D>(c)){
|
||||
assert( size(c) == size(rotated(c)) );
|
||||
if(stride(a)==1)
|
||||
if(stride(c)==1) syrk(flip(c_side)==filling::upper?'L':'U', 'N', size(c), size(a ), alpha, base(a), stride(rotated(a)), beta, base(c), stride(rotated(c)));
|
||||
else syrk(c_side ==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), alpha, base(a), stride(rotated(a)), beta, base(c), stride( c ));
|
||||
else
|
||||
if(stride(c)==1) syrk(flip(c_side)==filling::upper?'L':'U', 'T', size(c), size(rotated(a)), alpha, base(a), stride(a), beta, base(c), stride(rotated(c)));
|
||||
else syrk(c_side ==filling::upper?'L':'U', 'T', size(c), size(rotated(a)), alpha, base(a), stride(a), beta, base(c), stride( c ));
|
||||
return std::forward<C2D>(c);
|
||||
}
|
||||
|
||||
template<typename AA, class A2D, class C2D>
|
||||
auto syrk(filling c_side, AA alpha, A2D const& a, C2D&& c)
|
||||
->decltype(syrk(c_side, alpha, a, 0., std::forward<C2D>(c))){
|
||||
return syrk(c_side, alpha, a, 0., std::forward<C2D>(c));}
|
||||
|
||||
template<typename AA, class A2D, class C2D>
|
||||
auto syrk(AA alpha, A2D const& a, C2D&& c)
|
||||
->decltype(syrk(filling::upper, alpha, a, syrk(filling::lower, alpha, a, std::forward<C2D>(c)))){
|
||||
return syrk(filling::upper, alpha, a, syrk(filling::lower, alpha, a, std::forward<C2D>(c)));}
|
||||
|
||||
template<typename AA, class A2D, class Ret = typename A2D::decay_type>
|
||||
NODISCARD("because input argument is const") // this decay in the return type is important
|
||||
auto syrk(AA alpha, A2D const& a)->std::decay_\
|
||||
t<decltype(syrk(alpha, a, Ret({size(a), size(a)}, get_allocator(a))))>{
|
||||
return syrk(alpha, a, Ret({size(a), size(a)}, get_allocator(a)));}
|
||||
|
||||
template<class A2D>
|
||||
NODISCARD("")
|
||||
auto syrk(A2D const& A)
|
||||
->decltype(syrk(1., A)){
|
||||
return syrk(1., A);}
|
||||
|
||||
}}}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_SYRK
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS syrk"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../blas/gemm.hpp"
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../utility.hpp"
|
||||
|
||||
#include <boost/timer/timer.hpp>
|
||||
|
||||
#include<complex>
|
||||
#include<cassert>
|
||||
#include<iostream>
|
||||
#include<numeric>
|
||||
#include<algorithm>
|
||||
|
||||
//#include<catch.hpp>
|
||||
|
||||
using std::cout;
|
||||
using std::cerr;
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
template<class M> decltype(auto) print(M const& C){
|
||||
using boost::multi::size;
|
||||
for(int i = 0; i != size(C); ++i){
|
||||
for(int j = 0; j != size(C[i]); ++j)
|
||||
std::cout << C[i][j] << ' ';
|
||||
std::cout << std::endl;
|
||||
}
|
||||
return std::cout << std::endl;
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_syrk_real){
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
{
|
||||
multi::array<double, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::transposed;
|
||||
syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1] == 19. );
|
||||
BOOST_REQUIRE( c[1][2] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::transposed;
|
||||
syrk(filling::upper, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][2] == 19. );
|
||||
BOOST_REQUIRE( c[2][1] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::syrk;
|
||||
syrk(filling::lower, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
syrk(filling::upper, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, a⸆a, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
BOOST_REQUIRE( c[1][0] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
syrk(filling::upper, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, a⸆a, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
BOOST_REQUIRE( c[1][0] == 9999. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_syrk_real_special_case){
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
};
|
||||
{
|
||||
multi::array<double, 2> c({1, 1}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
syrk(filling::lower, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular
|
||||
//BOOST_REQUIRE( c[1][0] == 34. );
|
||||
//BOOST_REQUIRE( c[0][1] == 9999. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_syrk_complex_real_case){
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::transposed;
|
||||
syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1] == 19. );
|
||||
BOOST_REQUIRE( c[1][2] == 9999. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_syrk_complex){
|
||||
using complex = std::complex<double>;
|
||||
constexpr auto const I = complex{0., 1.};
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::transposed;
|
||||
syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1] == complex(-3., -34.) );
|
||||
BOOST_REQUIRE( c[1][2] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
syrk(filling::lower, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0] == complex(18., -21.) );
|
||||
BOOST_REQUIRE( c[0][1] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
syrk(filling::upper, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[0][1] == complex(18., -21.) );
|
||||
BOOST_REQUIRE( c[1][0] == 9999. );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_operation_complex){
|
||||
using complex = std::complex<double>;
|
||||
constexpr auto const I = complex{0., 1.};
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::filling;
|
||||
syrk(filling::lower, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0]==complex(18., -21.) );
|
||||
BOOST_REQUIRE( c[0][1]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::transposed;
|
||||
syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(aa⸆)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1]==complex(-3.,-34.) );
|
||||
BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::transposed;
|
||||
syrk(filling::lower, 1., rotated(a), 0., c); // c⸆=c=a⸆a=(aa⸆)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1]==complex(-3.,-34.) );
|
||||
BOOST_REQUIRE( c[1][2]==9999. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_operation_real){
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::filling;
|
||||
syrk(filling::lower, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::filling;
|
||||
syrk(filling::upper, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
BOOST_REQUIRE( c[1][0] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({3, 3}, 9999.);
|
||||
using multi::blas::filling;
|
||||
syrk(filling::lower, 1., rotated(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1] == 19. );
|
||||
BOOST_REQUIRE( c[1][2] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::transposed;
|
||||
using blas::filling;
|
||||
syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[2][1] == 19. );
|
||||
BOOST_REQUIRE( c[1][2] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({3, 3}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::transposed;
|
||||
using blas::filling;
|
||||
syrk(filling::upper, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[1][2] == 19. );
|
||||
BOOST_REQUIRE( c[2][1] == 9999. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::filling;
|
||||
using multi::blas::transposed;
|
||||
syrk(filling::upper, 1., a, 0., transposed(c)); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in upper triangular
|
||||
BOOST_REQUIRE( c[0][1] == 9999. );
|
||||
BOOST_REQUIRE( c[1][0] == 34. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_implicit_zero){
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::filling;
|
||||
syrk(filling::lower, 1., a, c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 9999. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_symmetrization){
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
using multi::blas::syrk;
|
||||
using multi::blas::gemm;
|
||||
using multi::blas::T;
|
||||
syrk(1., a, c); // c⸆=c=aa⸆=(aa⸆)⸆
|
||||
BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
BOOST_REQUIRE( syrk(a) == gemm(a, T(a)) );
|
||||
}
|
||||
{
|
||||
using multi::blas::syrk;
|
||||
multi::array<double, 2> c = syrk(1., a); // c⸆=c=aa⸆=(aa⸆)⸆
|
||||
BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
}
|
||||
{
|
||||
using multi::blas::syrk;
|
||||
multi::array<double, 2> c = syrk(a); // c⸆=c=aa⸆=(aa⸆)⸆
|
||||
BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 34. );
|
||||
}
|
||||
{
|
||||
using multi::blas::transposed;
|
||||
using multi::blas::syrk;
|
||||
multi::array<double, 2> c = syrk(transposed(a)); // c⸆=c=a⸆a=(a⸆a)⸆
|
||||
BOOST_REQUIRE( c[2][1] == 19. );
|
||||
BOOST_REQUIRE( c[1][2] == 19. );
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
{
|
||||
|
||||
{
|
||||
multi::array<complex, 2> C({2, 2}, 9999.);
|
||||
syrk(1., rotated(A), rotated(C)); // C^T=C=A*A^T=(A*A^T)^T
|
||||
assert( C[1][0] == complex(18., -21.) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> C({2, 2}, 9999.);
|
||||
syrk(rotated(A), rotated(C)); // C^T=C=A*A^T=(A*A^T)^T
|
||||
assert( C[1][0] == complex(18., -21.) );
|
||||
}
|
||||
{
|
||||
complex C[2][2];
|
||||
using multi::rotated;
|
||||
syrk(rotated(A), rotated(C)); // C^T=C=A*A^T=(A*A^T)^T
|
||||
assert( C[1][0] == complex(18., -21.) );
|
||||
}
|
||||
{
|
||||
auto C = syrk(1., A); // C = C^T = A^T*A, C is a value type matrix (with C-ordering, information is everywhere)
|
||||
assert( C[1][2]==complex(-3.,-34.) );
|
||||
}
|
||||
{
|
||||
// what(rotated(syrk(A)));
|
||||
multi::array C = rotated(syrk(A)); // C = C^T = A^T*A, C is a value type matrix (with C-ordering, information is in upper triangular part)
|
||||
print(C) <<"---\n";
|
||||
}
|
||||
|
||||
}
|
||||
#if 0
|
||||
{
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
auto C = rotated(syrk(A)).decay(); // C = C^T = A^T*A, C is a value type matrix (with C-ordering, information is in upper triangular part)
|
||||
print(C) <<"---\n";
|
||||
// print(C) <<"---\n";
|
||||
}
|
||||
return 0;
|
||||
{
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
auto C = syrk(rotated(A)); // C = C^T = A^T*A, C is a value type matrix (with C-ordering)
|
||||
print(C) <<"---\n";
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_syrk_herk_fallback){
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
{
|
||||
multi::array<double, 2> c({2, 2}, 9999.);
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
syrk(filling::lower, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular
|
||||
BOOST_REQUIRE( c[1][0] == 34. );
|
||||
BOOST_REQUIRE( c[0][1] == 9999. );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*-
|
||||
#[=[Multi Test suite can be run like this:
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake .. [-DENABLE_CUDA=1]
|
||||
make -j
|
||||
ctest -j --output-on-error [-T memcheck]
|
||||
exit
|
||||
#]=]
|
||||
cmake_minimum_required(VERSION 3.11)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
project(boost-multi-adaptors-blas-test VERSION 0.1 LANGUAGES CXX)
|
||||
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||
|
||||
find_package(BLAS REQUIRED)
|
||||
find_path(BLAS_INCLUDE_DIRS cblas.h
|
||||
/usr/include
|
||||
/usr/local/include
|
||||
$ENV{BLAS_HOME}/include)
|
||||
|
||||
link_libraries(${BLAS_LIBRARIES})
|
||||
include_directories(${TEST_EXE} PRIVATE ${BLAS_INCLUDE_DIRS})
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
if(ENABLE_CUDA OR DEFINED CXXCUDA)
|
||||
enable_language(CUDA)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe \"--diag_suppress=implicit_return_from_non_void_function\"")
|
||||
endif()
|
||||
|
||||
find_package(CUDA QUIET)
|
||||
|
||||
if (CUDA_FOUND)
|
||||
message("CUDA found")
|
||||
include_directories(${CUDA_INCLUDE_DIRS})
|
||||
else()
|
||||
message("CUDA not found")
|
||||
endif()
|
||||
|
||||
enable_testing()
|
||||
list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.17
|
||||
include(CTest)
|
||||
|
||||
configure_file("config.hpp.in" ${CMAKE_BINARY_DIR}/config.hpp)
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
#file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
|
||||
set(TEST_SRCS
|
||||
axpy.cpp
|
||||
copy.cpp
|
||||
dot.cpp
|
||||
herk.cpp
|
||||
gemv.cpp
|
||||
gemm.cpp
|
||||
numeric.cpp
|
||||
scal.cpp
|
||||
traits.cpp
|
||||
trsm.cpp
|
||||
)
|
||||
|
||||
foreach(TEST_FILE ${TEST_SRCS})
|
||||
SET(TEST_EXE "${TEST_FILE}.x")
|
||||
add_executable (${TEST_EXE} ${TEST_FILE})
|
||||
if(ENABLE_CUDA OR DEFINED CXXCUDA)
|
||||
set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA)
|
||||
target_compile_options (${TEST_EXE} PRIVATE -std=c++17)
|
||||
endif()
|
||||
# target_compile_features (${TEST_EXE} PUBLIC cxx_std_17)
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS")
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS})
|
||||
target_include_directories(${TEST_EXE} PRIVATE ${Boost_INCLUDE_DIRS})
|
||||
target_link_libraries (${TEST_EXE} PRIVATE ${Boost_LIBRARIES})
|
||||
target_link_directories (${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS})
|
||||
if(NOT ENABLE_CUDA)
|
||||
target_compile_options (${TEST_EXE} PRIVATE
|
||||
-Werror -Wall -Wextra -fno-common
|
||||
$<$<CXX_COMPILER_ID:GNU>:
|
||||
-Wpedantic -Wformat-truncation -fstack-usage>#-Wconversion
|
||||
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:
|
||||
-Wpedantic -Wmove>
|
||||
$<$<CXX_COMPILER_ID:Intel>:
|
||||
-wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings -Werror -diag-error:3846
|
||||
>
|
||||
$<$<CXX_COMPILER_ID:MSVC>:
|
||||
/W4>)
|
||||
endif()
|
||||
add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE})
|
||||
endforeach()
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x `pkg-config --libs blas` -lcudart -lcublas -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS asum"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../blas/asum.hpp"
|
||||
#include "../../blas/cuda.hpp"
|
||||
#include "../../../array.hpp"
|
||||
#include "../../../adaptors/cuda.hpp"
|
||||
|
||||
#include<complex>
|
||||
#include<numeric>
|
||||
|
||||
using std::cout;
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_asum_double){
|
||||
multi::array<double, 2> const A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
using multi::blas::asum;
|
||||
BOOST_REQUIRE(asum(A[1]) == std::accumulate(begin(A[1]), end(A[1]), 0., [](auto&& a, auto&& b){return a + std::abs(b);}));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_asum_complex){
|
||||
using Z = std::complex<double>; Z const I{0, 1};
|
||||
multi::array<Z, 2> const A = {
|
||||
{1. + 2.*I, 2., 3., 4.},
|
||||
{5., 6. + 3.*I, 7., 8.},
|
||||
{9., 10., 11.+ 4.*I, 12.}
|
||||
};
|
||||
using multi::blas::asum;
|
||||
BOOST_REQUIRE(asum(A[1]) == std::accumulate(begin(A[1]), end(A[1]), 0., [](auto&& a, auto&& b){return a + std::abs(real(b)) + std::abs(imag(b));}));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_asum_double_cuda){
|
||||
multi::cuda::array<double, 2> const A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
using multi::blas::asum;
|
||||
BOOST_REQUIRE(asum(A[1]) == 26 );
|
||||
}
|
||||
|
||||
using complex = std::complex<double>; constexpr complex I{0, 1};
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_asum_complex_cuda){
|
||||
namespace blas = multi::blas;
|
||||
multi::cuda::array<complex, 2> const A = {
|
||||
{1. + 2.*I, 2., 3., 4.},
|
||||
{5., 6. + 3.*I, 7., 8.},
|
||||
{9., 10., 11.+ 4.*I, 12.}
|
||||
};
|
||||
|
||||
BOOST_REQUIRE( blas::asum(A[1]) == 29. );
|
||||
BOOST_REQUIRE( blas::asum(A[1]({0, 4})) == 29. );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_asum_complex_cuda_mutable){
|
||||
using Z = std::complex<double>; Z const I{0, 1};
|
||||
multi::cuda::array<Z, 2> A = {
|
||||
{1. + 2.*I, 2., 3., 4.},
|
||||
{5., 6. + 3.*I, 7., 8.},
|
||||
{9., 10., 11.+ 4.*I, 12.}
|
||||
};
|
||||
using multi::blas::asum;
|
||||
BOOST_REQUIRE( asum(A[1]) == Z{29.} );
|
||||
BOOST_REQUIRE( asum(A[1]({0, 4})) == Z{29.} );
|
||||
}
|
||||
|
||||
|
|
@ -1,150 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS axpy"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "config.hpp"
|
||||
|
||||
#include "../../../array.hpp"
|
||||
#include "../../blas.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_axpy_real){
|
||||
multi::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
auto const AC = A;
|
||||
multi::array<double, 1> const B = A[2];
|
||||
|
||||
blas::axpy(2., B, A[1]); // daxpy
|
||||
BOOST_REQUIRE( A[1][2] == 2.*B[2] + AC[1][2] );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_axpy_double){
|
||||
multi::array<double, 2> const cA = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
multi::array<double, 2> A = cA;
|
||||
multi::array<double, 1> const b = cA[2];
|
||||
|
||||
blas::axpy(2., b, A[1]); // A[1] = 2*b + A[1], A[1]+= a*A[1]
|
||||
BOOST_REQUIRE( A[1][2] == 2.*b[2] + cA[1][2] );
|
||||
|
||||
using complex = std::complex<double>; complex const I = {0, 1};
|
||||
multi::array<complex, 1> AC = {1. + 2.*I, 3. + 4.*I, 4. - 8.*I};
|
||||
multi::array<complex, 1> BC(size(AC), complex{0.});
|
||||
|
||||
blas::axpy(+1., blas::real(AC), blas::real(BC));
|
||||
blas::axpy(-1., blas::imag(AC), blas::imag(BC));
|
||||
|
||||
BOOST_REQUIRE( BC[2] == std::conj(AC[2]) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex){
|
||||
{
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
auto const AC = A;
|
||||
multi::array<complex, 1> const B = A[2];
|
||||
blas::axpy(2., B, A[1]); // zaxpy (2. is promoted to 2+I*0 internally and automatically)
|
||||
BOOST_REQUIRE( A[1][2] == 2.*B[2] + AC[1][2] );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_as_operator_plus_equal){
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
auto const AC = A;
|
||||
multi::array<complex, 1> const B = A[2];
|
||||
A[1] += blas::axpy(2., B); // zaxpy (2. is promoted to 2+I*0 internally and automatically)
|
||||
BOOST_REQUIRE( A[1][2] == 2.*B[2] + AC[1][2] );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_as_operator_minus_equal){
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
auto const AC = A;
|
||||
multi::array<complex, 1> const B = A[2];
|
||||
A[1] -= blas::axpy(2., B); // zaxpy (2. is promoted to 2+I*0 internally and automatically)
|
||||
BOOST_REQUIRE( A[1][2] == -2.*B[2] + AC[1][2] );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_context){
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
auto const AC = A;
|
||||
multi::array<complex, 1> const B = A[2];
|
||||
blas::axpy(blas::context{}, 2., B, A[1]); // zaxpy (2. is promoted to 2+I*0 internally and automatically)
|
||||
BOOST_REQUIRE( A[1][2] == 2.*B[2] + AC[1][2] );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_axpy_operator_minus){
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 1> x = {10., 11., 12., 13.};
|
||||
multi::array<complex, 1> y = x;
|
||||
|
||||
using blas::operators::operator-;
|
||||
using blas::operators::operator+;
|
||||
using blas::operators::operator-=;
|
||||
|
||||
BOOST_REQUIRE( (x - y)[0] == 0. );
|
||||
BOOST_REQUIRE( (y - x)[0] == 0. );
|
||||
|
||||
BOOST_REQUIRE( (x - (y+y))[0] == -x[0] );
|
||||
BOOST_REQUIRE( ((x+x) - y)[0] == +x[0] );
|
||||
|
||||
multi::array<complex, 2> A = {{1., 2.}, {3., 4.}};
|
||||
multi::array<complex, 1> B = {1., 2.};
|
||||
BOOST_REQUIRE( (A[0] - B)[0] == 0. );
|
||||
BOOST_REQUIRE( (A[0] - B)[1] == 0. );
|
||||
|
||||
multi::array<complex, 1> X = {10., 11., 12., 13.};
|
||||
multi::array<complex, 1> Y = {10., 11., 12., 13.};
|
||||
X -= Y;
|
||||
BOOST_REQUIRE( X[0] == 0. );
|
||||
}
|
||||
|
||||
#if CUDA_FOUND
|
||||
#include<thrust/complex.h>
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_thrust){
|
||||
{
|
||||
using complex = thrust::complex<double>;
|
||||
multi::array<complex, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
auto const AC = A;
|
||||
multi::array<complex, 1> const B = A[2];
|
||||
blas::axpy(2., B, A[1]); // zaxpy (2. is promoted to 2+I*0 internally and automatically)
|
||||
BOOST_REQUIRE( A[1][2] == 2.*B[2] + AC[1][2] );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
#ifndef MULTI_ADAPTORS_BLAS_TEST_CONFIG_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_TEST_CONFIG_HPP
|
||||
|
||||
#cmakedefine01 CUDA_FOUND
|
||||
|
||||
#endif
|
||||
|
|
@ -1,159 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
|
||||
#include "../../blas.hpp"
|
||||
#include "../../../array.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
#include "config.hpp"
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS copy"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_copy_n){
|
||||
multi::array<double, 1> const A = {1., 2., 3., 4.};
|
||||
multi::array<double, 1> B = {5., 6., 7., 8.};
|
||||
blas::copy_n(A.begin(), A.size(), B.begin());
|
||||
BOOST_REQUIRE( B == A );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_copy_it){
|
||||
multi::array<double, 1> const A = {1., 2., 3., 4.};
|
||||
multi::array<double, 1> B = {5., 6., 7., 8.};
|
||||
blas::copy(A.begin(), A.end(), B.begin());
|
||||
BOOST_REQUIRE( B == A );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_copy){
|
||||
multi::array<double, 1> const A = {1., 2., 3., 4.};
|
||||
{
|
||||
multi::array<double, 1> B = {5., 6., 7., 8.};
|
||||
blas::copy(A, B); // segmentation fault in clang-11
|
||||
BOOST_REQUIRE( B == A );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> B = {5., 6., 7., 8.};
|
||||
BOOST_REQUIRE( size(B) == size(A) );
|
||||
B = blas::copy(A);
|
||||
BOOST_REQUIRE( B == A );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_copy_real){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE( A[0][2] == 3. );
|
||||
BOOST_REQUIRE( A[2][2] == 11. );
|
||||
|
||||
blas::copy(A[0], A[2]);
|
||||
BOOST_REQUIRE( A[0][2] == 3. );
|
||||
BOOST_REQUIRE( A[2][2] == 3. );
|
||||
|
||||
// multi::blas::copy(begin(A[1]), end(A[1]), begin(A[2])); // dcopy
|
||||
blas::copy( A[1]({0, size(A[1])}), A[2]({0, size(A[1])}) );
|
||||
BOOST_REQUIRE( A[1][3] == 8. );
|
||||
BOOST_REQUIRE( A[2][3] == 8. );
|
||||
|
||||
multi::array<double, 1> AR3 = blas::copy(rotated(A)[3]); // dcopy
|
||||
BOOST_REQUIRE( AR3[1] == A[1][3] );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_copy_row){
|
||||
multi::array<double, 2> const A = {
|
||||
{1., 2., 3.},
|
||||
{4., 5., 6.},
|
||||
{7., 8., 9.}
|
||||
};
|
||||
multi::array<double, 1> B(3);
|
||||
blas::copy(rotated(A)[0], B);
|
||||
BOOST_REQUIRE( B == rotated(A)[0] );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_copy_complex){
|
||||
using complex = std::complex<double>; constexpr complex I{0, 1};
|
||||
multi::array<complex, 2> A = {
|
||||
{1. + 3.*I, 2. + 4.*I, 3. + 5.*I, 4. + 6.*I},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
blas::copy(A[0], A[2]);
|
||||
BOOST_REQUIRE( A[0][2] == 3. + 5.*I );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_copy_context){
|
||||
multi::array<double, 1> const A = {1., 2., 3., 4.};
|
||||
blas::context ctx;
|
||||
{
|
||||
multi::array<double, 1> B = {5., 6., 7., 8.};
|
||||
blas::copy(ctx, A, B);
|
||||
BOOST_REQUIRE( A == B );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> B = {5., 6., 7., 8.};
|
||||
BOOST_REQUIRE( size(B) == size(A) );
|
||||
B = blas::copy(ctx, A);
|
||||
BOOST_REQUIRE( A == B );
|
||||
}
|
||||
}
|
||||
|
||||
#if CUDA_FOUND
|
||||
#include<thrust/complex.h>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_copy_thrust){
|
||||
|
||||
multi::array<thrust::complex<double>, 1> const a(10, thrust::complex<double>{});
|
||||
multi::array<thrust::complex<double>, 1> b(10);
|
||||
blas::copy(a, b);
|
||||
|
||||
BOOST_REQUIRE( a == b );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_text_copy_interop){
|
||||
|
||||
static_assert( std::is_convertible<std::complex<double>, thrust::complex<double>>{} );
|
||||
static_assert( std::is_convertible<thrust::complex<double>, std::complex<double>>{} );
|
||||
multi::array<std::complex<double>, 1> a(10, std::complex<double>{});
|
||||
multi::array<thrust::complex<double>, 1> b(10);
|
||||
blas::copy(a, b);
|
||||
|
||||
BOOST_REQUIRE( a == b );
|
||||
}
|
||||
#endif
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_copy_cuda_complex){
|
||||
// namespace cuda = multi::cuda;
|
||||
// cuda::array<complex, 2> A = {
|
||||
// {1. + 3.*I, 2. + 4.*I, 3. + 5.*I, 4. + 6.*I},
|
||||
// {5., 6., 7., 8.},
|
||||
// {9., 10., 11., 12.}
|
||||
// };
|
||||
|
||||
// blas::copy(A[0], A[2]);
|
||||
// BOOST_REQUIRE( A[0][2] == 3. + 5.*I );
|
||||
// BOOST_REQUIRE( A[2][2] == 3. + 5.*I );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_copy_cuda_managed_complex){
|
||||
// namespace cuda = multi::cuda;
|
||||
// namespace blas = multi::blas;
|
||||
|
||||
// cuda::managed::array<complex, 2> A = {
|
||||
// {1. + 3.*I, 2. + 4.*I, 3. + 5.*I, 4. + 6.*I},
|
||||
// {5., 6., 7., 8.},
|
||||
// {9., 10., 11., 12.}
|
||||
// };
|
||||
// blas::copy(A[0], A[2]);
|
||||
// BOOST_REQUIRE( A[0][2] == 3. + 5.*I );
|
||||
// BOOST_REQUIRE( A[2][2] == 3. + 5.*I );
|
||||
//}
|
||||
|
||||
|
||||
|
|
@ -1,407 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
// © Alfredo A. Correa 2019-2021
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS dot"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "config.hpp"
|
||||
|
||||
#include "../../blas/dot.hpp"
|
||||
|
||||
#include "../../../array.hpp"
|
||||
//#include "../../blas/cuda.hpp"
|
||||
//#include "../../../adaptors/cuda.hpp"
|
||||
|
||||
#include<cassert>
|
||||
#include<complex>
|
||||
#include<numeric>
|
||||
#include<type_traits>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(blas_dot_context){
|
||||
multi::array<float, 1> const A = {1.,2.,3.};
|
||||
multi::array<float, 1> const B = {1.,2.,3.};
|
||||
blas::context ctxt;
|
||||
auto C = +blas::dot(&ctxt, A, B);
|
||||
BOOST_REQUIRE( C == std::inner_product(begin(A), end(A), begin(B), 0.F) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(blas_dot_no_context){
|
||||
multi::array<float, 1> const A = {1.,2.,3.};
|
||||
multi::array<float, 1> const B = {1.,2.,3.};
|
||||
auto C = +blas::dot(A, B);
|
||||
BOOST_REQUIRE( C == std::inner_product(begin(A), end(A), begin(B), 0.F) );
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param){
|
||||
multi::array<float, 1> const A = {1.,2.,3.};
|
||||
multi::array<float, 1> const B = {1.,2.,3.};
|
||||
float C = NAN;
|
||||
blas::dot(A, B, C);
|
||||
BOOST_REQUIRE( C == std::inner_product(begin(A), end(A), begin(B), 0.F) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex){
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 1> const A = {1.,2.,3.};
|
||||
multi::array<complex, 1> const B = {1.,2.,3.};
|
||||
complex C;
|
||||
blas::dot(A, B, C);
|
||||
BOOST_REQUIRE( C == std::inner_product(begin(A), end(A), begin(B), complex{0.}, std::plus<>{}, [](auto const& a, auto const& b){return a*std::conj(b);}) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex_C){
|
||||
using complex = std::complex<double>; complex const I{0., 1.};
|
||||
multi::array<complex, 1> const A = {1.,2., 3.};
|
||||
multi::array<complex, 1> const B = {1.,2. + 2.*I, 3.};
|
||||
complex C;
|
||||
blas::dot(blas::C(A), B, C);
|
||||
BOOST_REQUIRE( C == std::inner_product(begin(A), end(A), begin(B), complex{0.}, std::plus<>{}, [](auto const& a, auto const& b){return conj(a)*b;}) );
|
||||
}
|
||||
|
||||
#if defined(CUDA_FOUND) and CUDA_FOUND
|
||||
#include<thrust/complex.h>
|
||||
BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex_C_thrust){
|
||||
using complex = thrust::complex<double>; complex const I{0., 1.};
|
||||
multi::array<complex, 1> const A = {1.,2., 3.};
|
||||
multi::array<complex, 1> const B = {1.,2. + 2.*I, 3.};
|
||||
complex C;
|
||||
blas::dot(blas::C(A), B, C);
|
||||
BOOST_REQUIRE( C == std::inner_product(begin(A), end(A), begin(B), complex{0.}, std::plus<>{}, [](auto& a, auto& b){return conj(a)*b;}) );
|
||||
}
|
||||
#endif
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_dot_strided){
|
||||
multi::array<double, 2> const CA = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
double d = std::numeric_limits<double>::quiet_NaN();
|
||||
blas::dot_n(begin(CA[1]), size(CA[1]), begin(CA[2]), &d);
|
||||
BOOST_REQUIRE( d == std::inner_product(begin(CA[1]), begin(CA[2]), end(CA[1]), 0.) );
|
||||
|
||||
double d2 = blas::dot(CA[1], CA[2]);
|
||||
BOOST_REQUIRE( d == d2 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_dot_strided_context){
|
||||
multi::array<double, 2> const CA = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
double d = std::numeric_limits<double>::quiet_NaN();
|
||||
blas::context ctxt;
|
||||
blas::dot_n(&ctxt, begin(CA[1]), size(CA[1]), begin(CA[2]), &d);
|
||||
BOOST_REQUIRE( d == std::inner_product(begin(CA[1]), begin(CA[2]), end(CA[1]), 0.) );
|
||||
|
||||
double d2 = blas::dot(CA[1], CA[2]);
|
||||
BOOST_REQUIRE( d == d2 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_dot_1d_real){
|
||||
|
||||
multi::array<float, 1> V = {1., 2., 3.};
|
||||
multi::array<float, 1> W = {1., 2., 3.};
|
||||
|
||||
using blas::dot;
|
||||
BOOST_REQUIRE( 14. == dot(V, W) );
|
||||
BOOST_REQUIRE( dot(V, W) == 14. );
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_real){
|
||||
multi::array<double, 2> const cA = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
{
|
||||
double d = blas::dot(cA[1], cA[2]);
|
||||
BOOST_REQUIRE( d==std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.) );
|
||||
}
|
||||
{
|
||||
double d = NAN;
|
||||
blas::dot(cA[1], cA[2], d);
|
||||
BOOST_REQUIRE( d==std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.) );
|
||||
}
|
||||
{
|
||||
double d = NAN;
|
||||
auto d2 = blas::dot(cA[1], cA[2], d);
|
||||
BOOST_REQUIRE( d==d2 );
|
||||
}
|
||||
// {
|
||||
// multi::array<double, 0> d;
|
||||
// auto d2 = blas::dot(cA[1], cA[2], d);
|
||||
// BOOST_REQUIRE( d == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.) );
|
||||
// }
|
||||
{
|
||||
double d = blas::dot(cA[1], cA[2]);
|
||||
BOOST_REQUIRE( d == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.) );
|
||||
BOOST_REQUIRE( blas::dot(cA[1], cA[2]) == blas::dot(cA[2], cA[1]) );
|
||||
}
|
||||
// {
|
||||
// double s;
|
||||
// blas::dot(cA[1], cA[1], s);
|
||||
// BOOST_REQUIRE( std::sqrt(s)==blas::nrm2(cA[1]) );
|
||||
// }
|
||||
{
|
||||
// auto d1 = blas::dot(cA[1], cA[1]);
|
||||
// auto d2 = blas::dot(blas::conj(cA[1]), cA[1]);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(inq_case){
|
||||
multi::array<double, 1> v1(10, +1.0);
|
||||
multi::array<double, 1> v2(10, -1.0);
|
||||
|
||||
using blas::dot;
|
||||
using blas::hermitized;
|
||||
using blas::conj;
|
||||
|
||||
auto a = dot(v1, v2);
|
||||
auto b = dot(hermitized(v1), v2);
|
||||
|
||||
BOOST_REQUIRE(a == b);
|
||||
|
||||
auto c = dot(blas::conj(v1), v2); // conjugation doesn't do anything for real array
|
||||
BOOST_REQUIRE(c == a);
|
||||
|
||||
auto d_arr = dot(blas::C(v1), v2);
|
||||
BOOST_REQUIRE(d_arr == a);
|
||||
|
||||
static_assert( not std::is_same<decltype(d_arr), double>{}, "!" );
|
||||
|
||||
using blas::C;
|
||||
double d_doub = dot(C(v1), v2);
|
||||
|
||||
BOOST_REQUIRE( d_doub == d_arr );
|
||||
}
|
||||
|
||||
#if 1
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_complex){
|
||||
namespace blas = multi::blas;
|
||||
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{1. + I, 2. + 3.*I, 3.+2.*I, 4.-9.*I},
|
||||
{5. + 2.*I, 6. + 6.*I, 7.+2.*I, 8.-3.*I},
|
||||
{9. + 1.*I, 10. + 9.*I, 11.+1.*I, 12.+2.*I}
|
||||
};
|
||||
{
|
||||
complex c; blas::dot(A[1], A[2], c);
|
||||
BOOST_TEST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}) );
|
||||
}
|
||||
{
|
||||
complex c = blas::dot(A[1], A[2]);
|
||||
BOOST_TEST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}) );
|
||||
}
|
||||
{
|
||||
complex c = blas::dot(A[1], blas::C(A[2]));
|
||||
BOOST_TEST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}, std::plus<>{}, [](auto a, auto b){return a*conj(b);}) );
|
||||
}
|
||||
{
|
||||
complex c = blas::dot(blas::C(A[1]), A[2]);
|
||||
BOOST_TEST_REQUIRE( c == inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{}, std::plus<>{}, [](auto a, auto b){return conj(a)*b;}) );
|
||||
}
|
||||
{
|
||||
complex c = blas::dot(blas::conj(A[1]), A[2]);
|
||||
BOOST_TEST_REQUIRE( c == inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{}, std::plus<>{}, [](auto a, auto b){return conj(a)*b;}) );
|
||||
}
|
||||
// {
|
||||
// complex c = blas::dot(blas::C(A[1]), blas::C(A[2]));
|
||||
// BOOST_TEST_REQUIRE( c == inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{}, std::plus<>{}, [](auto a, auto b){return conj(a)*conj(b);}) );
|
||||
// }
|
||||
{
|
||||
complex c = blas::dot(blas::C(A[1]), A[2]);
|
||||
BOOST_TEST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}, std::plus<>{}, [](auto a, auto b){return conj(a)*b;}) );
|
||||
}
|
||||
// {
|
||||
// complex c = blas::dot(blas::C(A[1]), blas::C(A[2]));
|
||||
// BOOST_TEST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}, std::plus<>{}, [](auto a, auto b){return conj(a)*conj(b);}) );
|
||||
// }
|
||||
}
|
||||
|
||||
#include "config.hpp" // cuda found
|
||||
#if defined(CUDA_FOUND) and CUDA_FOUND
|
||||
|
||||
#include<thrust/complex.h>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_complex_thrust){
|
||||
namespace blas = multi::blas;
|
||||
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{1. + I, 2. + 3.*I, 3.+2.*I, 4.-9.*I},
|
||||
{5. + 2.*I, 6. + 6.*I, 7.+2.*I, 8.-3.*I},
|
||||
{9. + 1.*I, 10. + 9.*I, 11.+1.*I, 12.+2.*I}
|
||||
};
|
||||
{
|
||||
complex c;
|
||||
blas::core::dotu(size(A[1]), A[1].base(), A[1].stride(), A[2].base(), A[2].stride(), &c);
|
||||
auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.});
|
||||
BOOST_REQUIRE( c.real() == inner.real() );
|
||||
BOOST_REQUIRE( c.imag() == inner.imag() );
|
||||
}
|
||||
{
|
||||
complex c;
|
||||
blas::context::dotu(size(A[1]), A[1].base(), A[1].stride(), A[2].base(), A[2].stride(), &c);
|
||||
auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.});
|
||||
BOOST_REQUIRE( c.real() == inner.real() );
|
||||
BOOST_REQUIRE( c.imag() == inner.imag() );
|
||||
}
|
||||
{
|
||||
complex c;
|
||||
blas::dot_n(begin(A[1]), size(A[1]), begin(A[2]), &c);
|
||||
auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.});
|
||||
BOOST_REQUIRE( c == inner );
|
||||
}
|
||||
{
|
||||
complex c;
|
||||
blas::dot(A[1], A[2], c);
|
||||
auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.});
|
||||
BOOST_REQUIRE( c == inner );
|
||||
}
|
||||
{
|
||||
complex c = blas::dot(A[1], A[2]);
|
||||
auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.});
|
||||
BOOST_REQUIRE( c == inner );
|
||||
}
|
||||
{
|
||||
auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.});
|
||||
BOOST_REQUIRE( +blas::dot(A[1], A[2]) == inner );
|
||||
}
|
||||
{
|
||||
complex c; blas::dot(A[1], A[2], c);
|
||||
BOOST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}) );
|
||||
}
|
||||
{
|
||||
complex c = blas::dot(A[1], A[2]);
|
||||
BOOST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}) );
|
||||
}
|
||||
{
|
||||
complex c = blas::dot(A[1], blas::C(A[2]));
|
||||
BOOST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}, std::plus<>{}, [](auto a, auto b){return a*conj(b);}) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
BOOST_AUTO_TEST_CASE(blas_dot){
|
||||
// multi::array<float, 1> const A = {1.,2.,3.};
|
||||
// multi::array<float, 1> const B = {1.,2.,3.};
|
||||
// {
|
||||
// float f = blas::dot(A, B); // uses cast operator decay
|
||||
// BOOST_REQUIRE( f == std::inner_product(begin(A), end(A), begin(B), 0.f) );
|
||||
// }
|
||||
// {
|
||||
// float f2;
|
||||
// *multi::array_ptr<float, 0>(&f2, {}) = blas::dot(A, B); // uses custom copy
|
||||
// BOOST_REQUIRE( f2 == std::inner_product(begin(A), end(A), begin(B), 0.f) );
|
||||
// }
|
||||
// {
|
||||
// multi::array<float, 0> F = blas::dot(A, B);
|
||||
// BOOST_REQUIRE( F() == std::inner_product(begin(A), end(A), begin(B), 0.f) );
|
||||
// }
|
||||
|
||||
// using complex = std::complex<double>; complex const I{0, 1};
|
||||
// {
|
||||
// multi::array<complex, 1> const A = {I, 2.*I, 3.*I};
|
||||
// BOOST_TEST( blas::dot(A, A).decay() == std::inner_product(begin(A), end(A), begin(A), complex{0.}) );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 1> const A = {I, 1. + 2.*I, 3.*I};
|
||||
// multi::array<complex, 1> const B = {I, 1. + 2.*I, 3.*I};
|
||||
|
||||
// BOOST_TEST( blas::dot(A, B).decay() == std::inner_product(begin(A), end(A), begin(B), complex{0.}, std::plus<>{}, [](auto&& a, auto&& b){return a*b;}) );
|
||||
//// BOOST_REQUIRE(
|
||||
//// std::inner_product(begin(A), end(A), begin( B ), std::complex<double>{0.}, std::plus<>{}, [](auto&& a, auto&& b){return a*std::conj(b);})
|
||||
//// ==s
|
||||
//// std::inner_product(begin(A), end(A), begin(blas::C(B)), std::complex<double>{0.}, std::plus<>{}, [](auto&& a, auto&& b){return a*b;})
|
||||
//// );
|
||||
// BOOST_REQUIRE( blas::dot(A, blas::C(B)).decay() == std::inner_product(begin(A), end(A), begin(B), complex{0.}, std::plus<>{}, [](auto&& a, auto&& b){return a*std::conj(b);}) );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 1> const a = {1. + I, 2. + 3.*I, 3. + 2.*I, 4. - 9.*I};
|
||||
// multi::array<complex, 1> const b = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I};
|
||||
// {
|
||||
// multi::array<complex, 0> c({}, complex{});
|
||||
// blas::dot(a, b, c);
|
||||
// BOOST_TEST( c() == 19. - 27.*I );
|
||||
// }
|
||||
// }
|
||||
// {
|
||||
// cuda::array<complex, 1> const acu = {1. + I, 2. + 3.*I, 3. + 2.*I, 4. - 9.*I};
|
||||
// cuda::array<complex, 1> const bcu = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I};
|
||||
|
||||
// {
|
||||
// cuda::array<complex, 0> ccu;
|
||||
// blas::dot(acu, bcu, ccu);
|
||||
// BOOST_REQUIRE( ccu() == 19. - 27.*I );
|
||||
// }
|
||||
// BOOST_REQUIRE( blas::C(bcu)[1] == 6. - 6.*I );
|
||||
// {
|
||||
// cuda::array<complex, 0> ccu;
|
||||
// static_assert( multi::blas::is_complex_array<multi::array<complex, 1>>{}, "!" );
|
||||
// static_assert( multi::blas::is_complex_array<cuda::array<complex, 1>>{}, "!" );
|
||||
// blas::dot(acu, blas::C(bcu), ccu);
|
||||
// BOOST_REQUIRE( ccu() == 121. - 43.*I );
|
||||
// }
|
||||
// {
|
||||
// auto const ccu = blas::dot(acu, blas::C(bcu));
|
||||
// BOOST_REQUIRE( ccu() == 121. - 43.*I );
|
||||
// }
|
||||
// {
|
||||
// cuda::array<complex, 1> ccu = {1, 2, 3};
|
||||
// blas::dot(acu, blas::C(bcu), ccu[0]);
|
||||
// BOOST_REQUIRE( ccu[0] == 121. - 43.*I );
|
||||
// }
|
||||
// {
|
||||
// cuda::array<complex, 2> ccu({1, 1});
|
||||
// blas::dot(acu, blas::C(bcu), ccu[0][0]);
|
||||
// BOOST_REQUIRE( ccu[0][0] == 121. - 43.*I );
|
||||
// }
|
||||
// }
|
||||
// {
|
||||
// namespace cuda = multi::cuda;
|
||||
// cuda::managed::array<complex, 1> const amcu = {1. + I, 2. + 3.*I, 3. + 2.*I, 4. - 9.*I};
|
||||
// cuda::managed::array<complex, 1> const bmcu = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I};
|
||||
// {
|
||||
// cuda::managed::array<complex, 0> cmcu;
|
||||
// blas::dot(amcu, bmcu, cmcu);
|
||||
// BOOST_REQUIRE( cmcu() == 19.- I*27. );
|
||||
// }
|
||||
// {
|
||||
// cuda::array<complex, 1> cmcu = {1, 2, 3};
|
||||
// blas::dot(amcu, blas::C(bmcu), cmcu[0]);
|
||||
// BOOST_REQUIRE( cmcu[0] == complex(121., -43.) );
|
||||
// }
|
||||
// }
|
||||
// {
|
||||
// using complex = std::complex<double>; complex const I{0, 1};
|
||||
// cuda::array<complex, 1> const acu = {1. + I, 2. + 3.*I, 3. + 2.*I, 4. - 9.*I};
|
||||
// cuda::array<complex, 1> const bcu = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I};
|
||||
// {
|
||||
// cuda::array<complex, 0> ccu;
|
||||
// blas::dot(acu, bcu, ccu);
|
||||
// BOOST_REQUIRE( ccu() == 19. - 27.*I );
|
||||
// }
|
||||
// }
|
||||
// {
|
||||
// using complex = thrust::complex<double>; complex const I{0, 1};
|
||||
// cuda::managed::array<complex, 1> const acu = {1. + I, 2. + 3.*I, 3. + 2.*I, 4. - 9.*I};
|
||||
// cuda::managed::array<complex, 1> const bcu = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I};
|
||||
// {
|
||||
// cuda::managed::array<complex, 0> ccu;
|
||||
// blas::dot(acu, bcu, ccu);
|
||||
// BOOST_REQUIRE( ccu() == 19. - 27.*I );
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -1,277 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS gemv"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "config.hpp"
|
||||
|
||||
#include "../../../adaptors/blas/gemv.hpp"
|
||||
#include "../../../array.hpp"
|
||||
|
||||
#include "../../../array.hpp"
|
||||
#include "../../../utility.hpp"
|
||||
|
||||
#include "../../blas/axpy.hpp"
|
||||
#include "../../blas/dot.hpp"
|
||||
#include "../../blas/gemm.hpp"
|
||||
#include "../../blas/nrm2.hpp"
|
||||
|
||||
#include<random>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
template<class T> void what(T&&) = delete;
|
||||
|
||||
template<class M, class VI, class VO>
|
||||
void MV(M const& m, VI const& x, VO&& y){
|
||||
std::transform(
|
||||
begin(m), end(m), begin(y),
|
||||
[&x](auto&& row){return std::inner_product(begin(row), end(row), begin(x), 0.);}
|
||||
);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_gemv){//, *utf::tolerance(0.0001)){
|
||||
|
||||
multi::array<double, 2> const M = {
|
||||
{ 9., 24., 30., 9.},
|
||||
{ 4., 10., 12., 7.},
|
||||
{14., 16., 36., 1.}
|
||||
};
|
||||
multi::array<double, 1> const v = {1.1, 2.1, 3.1, 4.1};
|
||||
{
|
||||
multi::array<double, 1> w(size(M));
|
||||
blas::gemv_n(1., begin(M), size(M), begin(v), 0., begin(w));
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.0001 );
|
||||
BOOST_REQUIRE_CLOSE( w[2] , +blas::dot(M[2], v) , 0.0001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w(size(M));
|
||||
multi::array<double, 2> const MT = ~M;
|
||||
blas::gemv_n(1., begin(~MT), size(~MT), begin(v), 0., begin(w));
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.0001 );
|
||||
BOOST_REQUIRE_CLOSE( w[2] , +blas::dot(M[2], v), 0.0001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w(size(M));
|
||||
auto mv = blas::gemv(1., M, v);
|
||||
copy_n(mv.begin(), mv.size(), w.begin());
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.00001 );
|
||||
|
||||
multi::array<double, 1> w2(size(M));
|
||||
MV(M, v, w2);
|
||||
BOOST_REQUIRE_CLOSE( w2[0] , w[0], 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w(size(M));
|
||||
w = blas::gemv(1., M, v);
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w = blas::gemv(1., M, v);
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w(size(M), 0.);
|
||||
w += blas::gemv(1., M, v);
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w = {4., 5., 6.};
|
||||
blas::gemv(1.1, M, v, 1., w); // y = a*M*x + b*y
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 105.43 , 0.00001 );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_gemv_real){//, *utf::tolerance(0.0001)){
|
||||
namespace blas = multi::blas;
|
||||
|
||||
using std::abs;
|
||||
multi::array<double, 2> const M = {
|
||||
{ 9., 24., 30., 9.},
|
||||
{ 4., 10., 12., 7.},
|
||||
{14., 16., 36., 1.}
|
||||
};
|
||||
multi::array<double, 1> const X = {1.1, 2.1, 3.1, 4.1};
|
||||
{
|
||||
multi::array<double, 1> Y = {4.,5.,6.};
|
||||
double const a = 1.1;
|
||||
double const b = 1.2;
|
||||
blas::gemv(a, M, X, b, Y); // y = a*M*x + b*y
|
||||
|
||||
multi::array<double, 1> const Y3 = {214.02, 106.43, 188.37};
|
||||
BOOST_REQUIRE( abs(Y[1] - Y3[1]) < 2e-14 );
|
||||
}
|
||||
{
|
||||
auto Y = +blas::gemv(1., M, X);
|
||||
BOOST_REQUIRE_CLOSE( Y[0] , +blas::dot(M[0], X) , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( Y[1] , +blas::dot(M[1], X) , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( Y[2] , +blas::dot(M[2], X) , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> const a = {1., 2., 3.};
|
||||
multi::array<double, 1> const b = {4., 5., 6.};
|
||||
multi::array<double, 1> const dot = blas::gemv(1., multi::array<double, 2>({a}), b);
|
||||
BOOST_REQUIRE( dot[0] == blas::dot(a, b) );
|
||||
}
|
||||
{
|
||||
using blas::operators::operator%;
|
||||
using blas::operators::operator-;
|
||||
using blas::operators::operator^;
|
||||
BOOST_REQUIRE_SMALL( ((~+~M)%X - M%X)^2 , 1e-13 );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_gemv_real_complex){
|
||||
namespace blas = multi::blas;
|
||||
using complex = std::complex<double>; //#define I *std::complex<double>(0, 1)
|
||||
using std::abs;
|
||||
multi::array<complex, 2> const M = {
|
||||
{ 9., 24., 30., 9.},
|
||||
{ 4., 10., 12., 7.},
|
||||
{14., 16., 36., 1.}
|
||||
};
|
||||
multi::array<complex, 1> const X = {1.1, 2.1, 3.1, 4.1};
|
||||
{
|
||||
multi::array<complex, 1> Y = {4., 5., 6.};
|
||||
double const a = 1.1;
|
||||
double const b = 1.2;
|
||||
blas::gemv(a, M, X, b, Y); // y = a*M*x + b*y
|
||||
|
||||
multi::array<complex, 1> const Y3 = {214.02, 106.43, 188.37};
|
||||
|
||||
using blas::operators::operator-;
|
||||
double const n2 = blas::nrm2(Y - Y3);
|
||||
BOOST_REQUIRE_SMALL( n2 , 1e-13);
|
||||
}
|
||||
}
|
||||
|
||||
#if CUDA_FOUND
|
||||
#include<thrust/complex.h>
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_gemv_real_complex_thrust){
|
||||
namespace blas = multi::blas;
|
||||
using complex = thrust::complex<double>; //#define I *std::complex<double>(0, 1)
|
||||
using std::abs;
|
||||
multi::array<complex, 2> const M = {
|
||||
{ 9., 24., 30., 9.},
|
||||
{ 4., 10., 12., 7.},
|
||||
{14., 16., 36., 1.}
|
||||
};
|
||||
multi::array<complex, 1> const X = {1.1, 2.1, 3.1, 4.1};
|
||||
{
|
||||
multi::array<complex, 1> Y = {4., 5., 6.};
|
||||
double const a = 1.1;
|
||||
double const b = 1.2;
|
||||
blas::gemv(a, M, X, b, Y); // y = a*M*x + b*y
|
||||
|
||||
multi::array<complex, 1> const Y3 = {214.02, 106.43, 188.37};
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> Y = {4., 5., 6.};
|
||||
blas::gemv(1.1, M, X, 1., Y); // y = a*M*x + b*y
|
||||
BOOST_REQUIRE( Y[1] == 105.43 );
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_gemv_complex){
|
||||
|
||||
namespace blas = multi::blas;
|
||||
using complex = std::complex<double>; std::complex<double> const I{0, 1};
|
||||
|
||||
using std::abs;
|
||||
multi::array<complex, 2> const M = {{2. + 3.*I, 2. + 1.*I, 1. + 2.*I}, {4. + 2.*I, 2. + 4.*I, 3. + 1.*I},
|
||||
{7. + 1.*I, 1. + 5.*I, 0. + 3.*I}};
|
||||
multi::array<complex, 1> const X = {1. + 2.*I, 2. + 1.*I, 9. + 2.*I};
|
||||
BOOST_REQUIRE(( +blas::gemv(1., M, X) == multi::array<complex, 1>{4. + 31.*I, 25. + 35.*I, -4. + 53.*I} ));
|
||||
|
||||
auto MT = +~M;
|
||||
BOOST_REQUIRE(( +blas::gemv(1., ~MT, X) == multi::array<complex, 1>{4. + 31.*I, 25. + 35.*I, -4. + 53.*I} ));
|
||||
|
||||
// auto MH = +*~M;
|
||||
BOOST_REQUIRE( +blas::gemv(1., ~M, X) == (multi::array<complex, 1>{63. + 38.*I, -1. + 62.*I, -4. + 36.*I}) );
|
||||
BOOST_REQUIRE( +blas::gemv(1., ~M, X) == +blas::gemv(1., MT, X) );// == multi::array<complex, 1>{4. + 31.*I, 25. + 35.*I, -4. + 53.*I} ));
|
||||
|
||||
// BOOST_REQUIRE( +blas::gemv(1., *M, X) == (multi::array<complex, 1>{26. - 15.*I, 45. - 3.*I, 22. - 23.*I}) );
|
||||
// BOOST_REQUIRE( +blas::gemv(1., ~*M, X) == (multi::array<complex, 1>{83. + 6.*I, 31. - 46.*I, 18. - 26.*I}) ); // not supported by blas
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_gemv_temporary){
|
||||
|
||||
using complex = std::complex<double>;
|
||||
|
||||
multi::array<complex, 2> const A = {
|
||||
{1., 0., 0.},
|
||||
{0., 1., 0.},
|
||||
{0., 0., 1.}
|
||||
};
|
||||
|
||||
auto const B = [](auto _){
|
||||
auto rand = [d=std::normal_distribution<>{}, g=std::mt19937{1}]()mutable{return complex{d(g), d(g)};}; // NOLINT(cert-msc32-c,cert-msc51-cpp): test purposes
|
||||
std::generate(_.elements().begin(), _.elements().end(), rand);
|
||||
return _;
|
||||
}(multi::array<complex, 2>({3, 3}));
|
||||
|
||||
using blas::operators::operator*;
|
||||
using blas::operators::operator-;
|
||||
using blas::operators::operator^;
|
||||
BOOST_REQUIRE( (((A*B)[0] - B[0])^2) == 0. );
|
||||
BOOST_REQUIRE( (((A*B)[1] - B[1])^2) == 0. );
|
||||
BOOST_REQUIRE( (((A*B)[2] - B[2])^2) == 0. );
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_gemv_context){//, *utf::tolerance(0.0001)){
|
||||
|
||||
multi::array<double, 2> const M = {
|
||||
{ 9., 24., 30., 9.},
|
||||
{ 4., 10., 12., 7.},
|
||||
{14., 16., 36., 1.}
|
||||
};
|
||||
multi::array<double, 1> const v = {1.1, 2.1, 3.1, 4.1};
|
||||
|
||||
blas::context ctxt;
|
||||
{
|
||||
multi::array<double, 1> w(size(M));
|
||||
blas::gemv_n(ctxt, 1., begin(M), size(M), begin(v), 0., begin(w));
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.0001 );
|
||||
BOOST_REQUIRE_CLOSE( w[2] , +blas::dot(M[2], v) , 0.0001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w(size(M));
|
||||
multi::array<double, 2> const MT = ~M;
|
||||
blas::gemv_n(ctxt, 1., begin(~MT), size(~MT), begin(v), 0., begin(w));
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( w[2] , +blas::dot(M[2], v) , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w(size(M));
|
||||
auto&& mv = blas::gemv(ctxt, 1., M, v);
|
||||
copy_n(mv.begin(), mv.size(), w.begin());
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w(size(M));
|
||||
w = blas::gemv(ctxt, 1., M, v);
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w = blas::gemv(ctxt, 1., M, v);
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3 , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w(size(M), 0.);
|
||||
w += blas::gemv(ctxt, 1., M, v);
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 91.3, 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 1> w = {4., 5., 6.};
|
||||
w += blas::gemv(ctxt, 1.1, M, v);
|
||||
BOOST_REQUIRE_CLOSE( w[1] , 105.43, 0.00001 );
|
||||
}
|
||||
|
||||
}
|
|
@ -1,272 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x -lcudart -lcublas -lboost_unit_test_framework `pkg-config --libs blas`&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS herk"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
//#include "../../../adaptors/cuda.hpp" // multi::cuda ns
|
||||
|
||||
//#include "../../../adaptors/blas/cuda.hpp"
|
||||
#include "../../../adaptors/blas/gemm.hpp"
|
||||
#include "../../../adaptors/blas/herk.hpp"
|
||||
|
||||
#include "../../../array.hpp"
|
||||
|
||||
namespace multi = boost::multi;
|
||||
//namespace cuda = multi::cuda;
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk){
|
||||
namespace blas = multi::blas;
|
||||
using complex = std::complex<double>; constexpr complex I{0, 1};
|
||||
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
blas::herk(a, c);
|
||||
BOOST_REQUIRE( c[1][0] == complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1] == complex(50., +49.) );
|
||||
|
||||
multi::array<complex, 2> const c_copy = blas::herk(1., a);
|
||||
BOOST_REQUIRE( c == c_copy );
|
||||
|
||||
BOOST_REQUIRE( +blas::gemm(1., a, blas::H(a)) == blas::herk(a) );
|
||||
}
|
||||
}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_blas_cuda_herk_complex){
|
||||
// namespace blas = multi::blas;
|
||||
// multi::array<complex, 2> const a = {
|
||||
// { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
// { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
// };
|
||||
// {
|
||||
// cuda::array<complex, 2> const acu = a;
|
||||
// BOOST_REQUIRE(a == acu);
|
||||
|
||||
// cuda::array<complex, 2> ccu({2, 2}, 9999.);
|
||||
// blas::herk(acu, ccu);
|
||||
// BOOST_REQUIRE( ccu[1][0] == complex(50., -49.) );
|
||||
// BOOST_REQUIRE( ccu[0][1] == complex(50., +49.) );
|
||||
|
||||
// cuda::array<complex, 2> const ccu_copy = blas::herk(1., acu);
|
||||
// BOOST_REQUIRE( blas::herk(1., acu) == ccu );
|
||||
// }
|
||||
// {
|
||||
// cuda::managed::array<complex, 2> const amcu = a; BOOST_REQUIRE(a == amcu);
|
||||
// cuda::managed::array<complex, 2> cmcu({2, 2}, 9999.);
|
||||
|
||||
// blas::herk(1., amcu, cmcu);
|
||||
// BOOST_REQUIRE( cmcu[1][0] == complex(50., -49.) );
|
||||
// BOOST_REQUIRE( cmcu[0][1] == complex(50., +49.) );
|
||||
|
||||
// cuda::managed::array<complex, 2> const cmcu_copy = blas::herk(1., amcu);
|
||||
// BOOST_REQUIRE( cmcu_copy == cmcu );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// blas::herk(1., blas::H(a), c);
|
||||
// BOOST_REQUIRE( c[2][1] == complex(41, +2) );
|
||||
// BOOST_REQUIRE( c[1][2] == complex(41, -2) );
|
||||
|
||||
// multi::array<complex, 2> const c_copy = blas::herk(1., blas::H(a));
|
||||
// BOOST_REQUIRE( c_copy == c );
|
||||
// }
|
||||
// {
|
||||
// cuda::array<complex, 2> const acu = a;
|
||||
// BOOST_REQUIRE(a == acu);
|
||||
|
||||
// cuda::array<complex, 2> ccu({3, 3}, 9999.);
|
||||
|
||||
// blas::herk(1., blas::H(acu), ccu);
|
||||
// BOOST_REQUIRE( ccu[2][1] == complex(41, +2) );
|
||||
// BOOST_REQUIRE( ccu[1][2] == complex(41, -2) );
|
||||
|
||||
// cuda::array<complex, 2> const ccu_copy = blas::herk(1., blas::H(acu));
|
||||
// BOOST_REQUIRE( ccu_copy == ccu );
|
||||
// }
|
||||
// {
|
||||
// cuda::managed::array<complex, 2> const acu = a; BOOST_REQUIRE(a == acu);
|
||||
// cuda::managed::array<complex, 2> ccu({3, 3}, 9999.);
|
||||
|
||||
// blas::herk(1., blas::H(acu), ccu);
|
||||
// BOOST_REQUIRE( ccu[2][1] == complex(41, +2) );
|
||||
// BOOST_REQUIRE( ccu[1][2] == complex(41, -2) );
|
||||
|
||||
// cuda::managed::array<complex, 2> const ccu_copy = blas::herk(1., blas::H(acu));
|
||||
// BOOST_REQUIRE( ccu_copy == ccu );
|
||||
// }
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_blas_cuda_herk_n_complex){
|
||||
// namespace blas = multi::blas;
|
||||
// multi::array<complex, 2> const a = {
|
||||
// { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
// { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
// };
|
||||
// blas::context ctxt;
|
||||
// {
|
||||
// multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
// blas::herk_n(ctxt, blas::filling::upper, 1., a.begin(), a.size(), 0., c.begin());
|
||||
// BOOST_TEST_REQUIRE( c[0][1] == complex(50., +49.) );
|
||||
// BOOST_TEST_REQUIRE( c[1][0] == 9999. );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
// blas::herk_n(ctxt, blas::filling::lower, 1., a.begin(), a.size(), 0., c.begin());
|
||||
// BOOST_TEST_REQUIRE( c[0][1] == 9999. );
|
||||
// BOOST_TEST_REQUIRE( c[1][0] == complex(50., -49.) );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
// blas::herk_n(ctxt, blas::filling::lower, 1., a.begin(), a.size(), 0., c.begin());
|
||||
// blas::herk_n(ctxt, blas::filling::upper, 1., a.begin(), a.size(), 0., c.begin());
|
||||
// BOOST_TEST_REQUIRE( c[0][1] == complex(50., +49.) );
|
||||
// BOOST_TEST_REQUIRE( c[1][0] == complex(50., -49.) );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// blas::herk_n(ctxt, blas::filling::lower, 1., blas::H(a).begin(), blas::H(a).size(), 0., c.begin());
|
||||
// BOOST_TEST_REQUIRE( c[1][2] == 9999. );
|
||||
// BOOST_TEST_REQUIRE( c[2][1] == complex(41., +2.) );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// blas::herk_n(ctxt, blas::filling::upper, 1., blas::H(a).begin(), blas::H(a).size(), 0., c.begin());
|
||||
// BOOST_TEST_REQUIRE( c[1][2] == complex(41., -2.) );
|
||||
// BOOST_TEST_REQUIRE( c[2][1] == 9999. );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// blas::herk_n(ctxt, blas::filling::lower, 1., blas::H(a).begin(), blas::H(a).size(), 0., c.begin());
|
||||
// blas::herk_n(ctxt, blas::filling::upper, 1., blas::H(a).begin(), blas::H(a).size(), 0., c.begin());
|
||||
// BOOST_TEST_REQUIRE( c[1][2] == complex(41., -2.) );
|
||||
// BOOST_TEST_REQUIRE( c[2][1] == complex(41., +2.) );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// blas::herk_n(ctxt, 1., blas::H(a).begin(), blas::H(a).size(), c.begin());
|
||||
// BOOST_TEST_REQUIRE( c[1][2] == complex(41., -2.) );
|
||||
// BOOST_TEST_REQUIRE( c[2][1] == complex(41., +2.) );
|
||||
// }
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_blas_cuda_herk_row){
|
||||
// namespace blas = multi::blas;
|
||||
// auto const a = []{
|
||||
// multi::array<complex, 2> ret({1, 100});
|
||||
// std::generate(begin(ret[0]), end(ret[0]), [c=complex{1, 2}]()mutable{return c+=2.;});
|
||||
// return ret;
|
||||
// }();
|
||||
// BOOST_REQUIRE( size(a) == 1 );
|
||||
// {
|
||||
// BOOST_REQUIRE( +blas::gemm(1., a, blas::H(a)) == blas::herk(a) );
|
||||
|
||||
// cuda::array<complex, 2> const agpu = a;
|
||||
// BOOST_REQUIRE( blas::gemm(agpu, blas::H(agpu)) == blas::herk(agpu) );
|
||||
|
||||
// cuda::managed::array<complex, 2> const amng = a;
|
||||
// BOOST_REQUIRE( blas::gemm(amng, blas::H(amng)) == blas::herk(amng) );
|
||||
// }
|
||||
//}
|
||||
|
||||
//#if 1
|
||||
//BOOST_AUTO_TEST_CASE(multi_blas_cuda_herk_real){
|
||||
// namespace blas = multi::blas;
|
||||
// multi::array<double, 2> const a = {
|
||||
// { 1., 3., 4.},
|
||||
// { 9., 7., 1.}
|
||||
// };
|
||||
// {
|
||||
// multi::array<double, 2> c({2, 2}, 9999);
|
||||
// blas::herk(1., a, c);
|
||||
// BOOST_REQUIRE( c[1][0] == 34 );
|
||||
// BOOST_REQUIRE( c[0][1] == 34 );
|
||||
|
||||
// // multi::array<double, 2> const c_copy = blas::herk(1., a);
|
||||
// // BOOST_REQUIRE( c == c_copy );
|
||||
// }
|
||||
// {
|
||||
// cuda::array<double, 2> acu = a;
|
||||
// BOOST_REQUIRE(a == acu);
|
||||
|
||||
// cuda::array<double, 2> ccu({2, 2}, 9999.);
|
||||
|
||||
// // blas::herk(acu, ccu);
|
||||
// // BOOST_REQUIRE( ccu[1][0] == 34 );
|
||||
// // BOOST_REQUIRE( ccu[0][1] == 34 );
|
||||
|
||||
// // cuda::array<double, 2> const ccu_copy = blas::herk(1., acu);
|
||||
// // BOOST_REQUIRE( herk(1., acu) == ccu );
|
||||
// }
|
||||
|
||||
//}
|
||||
//#endif
|
||||
|
||||
#if 0
|
||||
{
|
||||
cuda::array<double, 2> const acu = a; BOOST_REQUIRE(a == acu);
|
||||
// cuda::array<double, 2> ccu({2, 2}, 9999.);
|
||||
using multi::blas::herk;
|
||||
cuda::array<double, 2> ccu = herk(acu);
|
||||
BOOST_REQUIRE( ccu[1][0] == 34 );
|
||||
BOOST_REQUIRE( ccu[0][1] == 34 );
|
||||
|
||||
cuda::array<double, 2> const ccu_copy = herk(1., acu);
|
||||
BOOST_REQUIRE( herk(1., acu) == ccu );
|
||||
}
|
||||
{
|
||||
cuda::managed::array<double, 2> const amcu = a; BOOST_REQUIRE(a == amcu);
|
||||
cuda::managed::array<double, 2> cmcu({2, 2}, 9999.);
|
||||
using multi::blas::herk;
|
||||
herk(1., amcu, cmcu);
|
||||
BOOST_REQUIRE( cmcu[1][0] == 34 );
|
||||
BOOST_REQUIRE( cmcu[0][1] == 34 );
|
||||
|
||||
cuda::managed::array<double, 2> const cmcu_copy = herk(1., amcu);
|
||||
BOOST_REQUIRE( cmcu_copy == cmcu );
|
||||
}
|
||||
if(0){
|
||||
multi::array<double, 2> c({3, 3}, 9999.);
|
||||
using multi::blas::herk;
|
||||
using multi::blas::hermitized;
|
||||
herk(1., hermitized(a), c);
|
||||
BOOST_REQUIRE( c[2][1] == 19 );
|
||||
BOOST_REQUIRE( c[1][2] == 19 );
|
||||
|
||||
multi::array<double, 2> const c_copy = herk(1., hermitized(a));
|
||||
BOOST_REQUIRE( c_copy == c );
|
||||
}
|
||||
if(0){
|
||||
cuda::array<double, 2> const acu = a; BOOST_REQUIRE(acu == a);
|
||||
cuda::array<double, 2> ccu({3, 3}, 9999.);
|
||||
using multi::blas::herk;
|
||||
using multi::blas::hermitized;
|
||||
herk(1., hermitized(acu), ccu);
|
||||
BOOST_REQUIRE( ccu[2][1] == 19 );
|
||||
BOOST_REQUIRE( ccu[1][2] == 19 );
|
||||
|
||||
cuda::array<double, 2> const c_copy = herk(1., hermitized(a));
|
||||
BOOST_REQUIRE( c_copy == ccu );
|
||||
}
|
||||
if(0){
|
||||
cuda::managed::array<double, 2> const amcu = a; BOOST_REQUIRE(amcu == a);
|
||||
cuda::managed::array<double, 2> cmcu({3, 3}, 9999.);
|
||||
using multi::blas::herk;
|
||||
using multi::blas::hermitized;
|
||||
herk(1., hermitized(amcu), cmcu);
|
||||
BOOST_REQUIRE( cmcu[2][1] == 19 );
|
||||
BOOST_REQUIRE( cmcu[1][2] == 19 );
|
||||
|
||||
cuda::managed::array<double, 2> const c_copy = herk(1., hermitized(a));
|
||||
BOOST_REQUIRE( c_copy == cmcu );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x `pkg-config --libs blas` -lcudart -lcublas -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS/cuBLAS iamax"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../blas/iamax.hpp"
|
||||
|
||||
#include "../../../array.hpp"
|
||||
#include "../../../adaptors/cuda.hpp"
|
||||
#include "../../../adaptors/blas/cuda.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
using std::cout;
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
using complex = std::complex<double>; constexpr complex I{0, 1};
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax){
|
||||
multi::array<complex, 2> const A = {
|
||||
{1. + 2.*I, 2., 3., 4.},
|
||||
{5., 6. + 3.*I, 7., 8.},
|
||||
{9., 10., 11.+ 4.*I, 12.}
|
||||
};
|
||||
using blas::iamax;
|
||||
auto chess = [](auto const& a, auto const& b){
|
||||
using std::abs;
|
||||
return abs(real(a))+abs(imag(a)) < abs(real(b))+abs(imag(b));
|
||||
};
|
||||
BOOST_REQUIRE(iamax(A[1])==std::max_element(begin(A[1]), end(A[1]), chess)-begin(A[1]));
|
||||
BOOST_REQUIRE(A[1][iamax(A[1])]==*std::max_element(begin(A[1]), end(A[1]), chess));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax_cuda){
|
||||
multi::cuda::array<complex, 2> const A = {
|
||||
{1. + 2.*I, 2. , 3. , 4.},
|
||||
{5. , 6. + 3.*I, 7. , 8.},
|
||||
{9. , 10. , 11.+ 4.*I, 12.}
|
||||
};
|
||||
using blas::iamax;
|
||||
BOOST_REQUIRE(iamax(A[1])==1);
|
||||
}
|
||||
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*-
|
||||
$CXX $0 -o $0x `pkg-config --libs blas` -lcudart -lcublas -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS nrm2"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../blas.hpp"
|
||||
#include "../../../array.hpp"
|
||||
#include "../../../adaptors/cuda.hpp"
|
||||
#include "../../../adaptors/blas/cuda.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
using complex = std::complex<double>; constexpr complex I{0,1};
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_nrm2){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE( blas::nrm2(A[1]) == std::sqrt(blas::dot(A[1], A[1])) );
|
||||
|
||||
{
|
||||
multi::array<complex, 1> A = {1.+I, 3.+2.*I, 3.+4.*I};
|
||||
BOOST_REQUIRE( blas::dot(A, A)() == (1.+I)*(1.+I) + (3.+2.*I)*(3.+2.*I) + (3.+4.*I)*(3.+4.*I) );
|
||||
}
|
||||
{
|
||||
multi::cuda::array<double, 2> const Agpu = A;
|
||||
multi::cuda::static_array<double, 0> n = 1.2;
|
||||
blas::nrm2(Agpu[1], n);
|
||||
}
|
||||
{
|
||||
multi::cuda::array<double, 2> Agpu = A;
|
||||
double n = 99.;
|
||||
blas::nrm2(Agpu[1], n); // cuda supports putting scalar results in CPU
|
||||
double n2{blas::nrm2(Agpu[1])};
|
||||
BOOST_REQUIRE( n == n2 );
|
||||
}
|
||||
}
|
||||
|
|
@ -1,253 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
// © Alfredo A. Correa 2019-2021
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS numeric"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "config.hpp"
|
||||
|
||||
#include "../../../array.hpp"
|
||||
#include "../../blas/numeric.hpp"
|
||||
#include "../../blas/operations.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_numeric_imag){
|
||||
using complex = std::complex<double>; constexpr complex I{0, 1};
|
||||
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 1> a = { 1. + 2.*I, 3. + 5.*I, 9. + 2.*I };
|
||||
BOOST_REQUIRE( blas::imag(a)[2] == 2. );
|
||||
BOOST_REQUIRE( blas::real(a)[2] == 9. );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_numeric_real_conjugated){
|
||||
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
|
||||
multi::array<complex, 2> B = {
|
||||
{1. - 3.*I, 6. + 2.*I},
|
||||
{8. + 2.*I, 2. + 4.*I},
|
||||
{2. - 1.*I, 1. + 1.*I}
|
||||
};
|
||||
BOOST_REQUIRE( B[0][0] == 1. - 3.*I );
|
||||
|
||||
multi::array<complex, 2> const Bconst = {
|
||||
{1. - 3.*I, 6. + 2.*I},
|
||||
{8. + 2.*I, 2. + 4.*I},
|
||||
{2. - 1.*I, 1. + 1.*I}
|
||||
};
|
||||
BOOST_REQUIRE( Bconst[0][0] == 1. - 3.*I );
|
||||
|
||||
namespace blas = multi::blas;
|
||||
auto BdataC = blas::make_conjugater(B.data_elements());
|
||||
|
||||
decltype(blas::make_conjugater(Bconst.data_elements())) ppp;// = BdataC;
|
||||
ppp = BdataC;
|
||||
|
||||
BOOST_REQUIRE( *ppp == 1. + 3.*I );
|
||||
|
||||
// static_assert( multi::blas::is_complex_array<multi::array<thrust::complex<double>, 2>>{}, "!");
|
||||
static_assert( blas::is_complex_array<decltype(B)>{} );
|
||||
static_assert(not blas::is_conjugated<decltype(B)>{} );
|
||||
|
||||
auto&& Bconj = blas::conj(B);
|
||||
static_assert( blas::is_conjugated<decltype(Bconj)>{} );
|
||||
|
||||
BOOST_REQUIRE( Bconj[0][0] == 1. + 3.*I );
|
||||
BOOST_REQUIRE( imag(*base(Bconj)) == +3 );
|
||||
|
||||
// BOOST_TEST_REQUIRE( base(Bconj)->imag() == +3 );
|
||||
BOOST_REQUIRE( rotated(Bconj)[1][0] == Bconj[0][1] );
|
||||
|
||||
// BOOST_REQUIRE( base(Bconj) == -3.*I );
|
||||
static_assert( blas::is_complex_array<decltype(Bconj)>{} );
|
||||
|
||||
BOOST_REQUIRE( blas::conj(Bconj) == B );
|
||||
|
||||
BOOST_REQUIRE( blas::conj(B)[1][0] == std::conj(B[1][0]) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_numeric_decay){
|
||||
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
|
||||
multi::array<complex, 2> B = {
|
||||
{ 1. - 3.*I, 6. + 2.*I},
|
||||
{ 8. + 2.*I, 2. + 4.*I},
|
||||
{ 2. - 1.*I, 1. + 1.*I}
|
||||
};
|
||||
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> conjB = blas::conj(B);
|
||||
|
||||
BOOST_REQUIRE( conjB[2][1] == std::conj(B[2][1]) );
|
||||
BOOST_REQUIRE( blas::conj(B)[2][1] == std::conj(B[2][1]) );
|
||||
|
||||
BOOST_REQUIRE( blas::transposed(B)[1][2] == B[2][1] );
|
||||
BOOST_REQUIRE( blas::transposed(B) == ~B );
|
||||
|
||||
BOOST_REQUIRE( blas::hermitized(B)[2][1] == blas::conj(B)[1][2] );
|
||||
BOOST_REQUIRE( blas::hermitized(B) == blas::conj(blas::transposed(B)) );
|
||||
|
||||
BOOST_REQUIRE( blas::real(B)[2][1] == std::real(B[2][1]) );
|
||||
BOOST_REQUIRE( blas::imag(B)[2][1] == std::imag(B[2][1]) );
|
||||
|
||||
multi::array<double, 2> B_real_doubled = {
|
||||
{ 1., -3., 6., 2.},
|
||||
{ 8., 2., 2., 4.},
|
||||
{ 2., -1., 1., 1.}
|
||||
};
|
||||
BOOST_REQUIRE( blas::real_doubled(B) == B_real_doubled );
|
||||
|
||||
}
|
||||
|
||||
#if defined(CUDA_FOUND) and CUDA_FOUND
|
||||
#include<thrust/complex.h>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_numeric_decay_thrust){
|
||||
|
||||
using complex = thrust::complex<double>; complex const I{0, 1};
|
||||
|
||||
multi::array<complex, 2> B = {
|
||||
{1. - 3.*I, 6. + 2.*I},
|
||||
{8. + 2.*I, 2. + 4.*I},
|
||||
{2. - 1.*I, 1. + 1.*I}
|
||||
};
|
||||
|
||||
namespace blas = multi::blas;
|
||||
multi::array<complex, 2> conjB = blas::conj(B);
|
||||
BOOST_REQUIRE( conjB[1][2] == conj(B[1][2]) );
|
||||
}
|
||||
#endif
|
||||
|
||||
//#if defined(CUDA_FOUND) and CUDA_FOUND
|
||||
//#include "../../blas/cuda.hpp"
|
||||
//#include "../../../adaptors/cuda.hpp"
|
||||
//namespace cuda = multi::cuda;
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_numeric_imag_cuda){
|
||||
// cuda::array<complex, 1> a = { 1. + 2.*I, 3. + 5.*I, 9. + 2.*I };
|
||||
// namespace blas = multi::blas;
|
||||
// BOOST_REQUIRE( blas::imag(a)[2] == 2. );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_numeric_imag_cuda_managed){
|
||||
// cuda::managed::array<complex, 1> a = { 1. + 2.*I, 3. + 5.*I, 9. + 2.*I };
|
||||
// using multi::blas::imag;
|
||||
// BOOST_REQUIRE( imag(a)[2] == 2. );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_numeric_hermitized_cuda){
|
||||
// cuda::array<complex, 2> const a = {
|
||||
// { 1. + 2.*I, 3. + 5.*I, 9. + 2.*I },
|
||||
// { 1. + 2.*I, 3. + 5.*I, 9. + 2.*I },
|
||||
// { 1. + 2.*I, 3. + 5.*I, 9. + 2.*I },
|
||||
// };
|
||||
// using multi::blas::hermitized;
|
||||
// hermitized(a);
|
||||
//}
|
||||
//#endif
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_numeric_real_imag_part){
|
||||
|
||||
using complex = std::complex<double>; complex const I{0., 1.};
|
||||
|
||||
multi::array<double, 2> A = {
|
||||
{1., 3., 4.},
|
||||
{9., 7., 1.}
|
||||
};
|
||||
multi::array<complex, 2> Acplx = A;
|
||||
BOOST_REQUIRE( Acplx[1][1] == A[1][1] );
|
||||
|
||||
multi::array<complex, 2> B = {
|
||||
{1. - 3.*I, 6. + 2.*I},
|
||||
{8. + 2.*I, 2. + 4.*I},
|
||||
{2. - 1.*I, 1. + 1.*I}
|
||||
};
|
||||
|
||||
multi::array<double, 2> Breal = {
|
||||
{1., 6.},
|
||||
{8., 2.},
|
||||
{2., 1.}
|
||||
};
|
||||
multi::array<double, 2> Bimag = {
|
||||
{-3., +2.},
|
||||
{+2., +4.},
|
||||
{-1., +1.}
|
||||
};
|
||||
|
||||
using multi::blas::real;
|
||||
using multi::blas::imag;
|
||||
|
||||
BOOST_REQUIRE( Breal == real(B) );
|
||||
BOOST_REQUIRE( real(B) == Breal );
|
||||
BOOST_REQUIRE( imag(B) == Bimag );
|
||||
|
||||
BOOST_REQUIRE( B[1][0] == 8. + 2.*I );
|
||||
BOOST_REQUIRE( B[1][0].imag() == 2. );
|
||||
|
||||
namespace blas = multi::blas;
|
||||
|
||||
BOOST_REQUIRE( blas::hermitized(B)[1][2] == std::conj( B[2][1] ) );
|
||||
|
||||
blas::hermitized(B)[1][2] = 20. + 30.*I;
|
||||
BOOST_REQUIRE( B[2][1] == 20. - 30.*I );
|
||||
// using multi::blas::hermitized;
|
||||
// BOOST_REQUIRE( hermitized(B)[0][1] == 8. - 2.*I );
|
||||
// BOOST_REQUIRE( imag(hermitized(B)[0][1]) == -2. );
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
namespace cuda = multi::cuda;
|
||||
{
|
||||
cuda::array<complex, 2> Bgpu = B;
|
||||
using multi::blas::imag;
|
||||
BOOST_REQUIRE( imag(Bgpu)[1][1] == imag(B)[1][1] );
|
||||
BOOST_REQUIRE( real(Bgpu)[1][1] == real(B)[1][1] );
|
||||
}
|
||||
{
|
||||
cuda::managed::array<complex, 2> Bgpu = B;
|
||||
using multi::blas::imag;
|
||||
BOOST_REQUIRE( imag(Bgpu)[1][1] == imag(B)[1][1] );
|
||||
BOOST_REQUIRE( real(Bgpu)[1][1] == real(B)[1][1] );
|
||||
}
|
||||
|
||||
multi::array_ref<double, 2> rB(reinterpret_cast<double*>(data_elements(B)), {size(B), 2*size(*begin(B))});
|
||||
|
||||
auto&& Bconj = multi::static_array_cast<complex, multi::blas::detail::conjugater<complex*>>(B);
|
||||
assert( size(Bconj) == size(B) );
|
||||
assert( conj(B[1][2]) == Bconj[1][2] );
|
||||
|
||||
// auto&& BH = multi::blas::hermitized(B);
|
||||
// assert( BH[1][2] == conj(B[2][1]) );
|
||||
// std::cout << BH[1][2] << " " << B[2][1] << std::endl;
|
||||
|
||||
// auto&& BH1 = multi::static_array_cast<complex, multi::blas::detail::conjugater<complex*>>(rotated(B));
|
||||
// auto&& BH2 = rotated(multi::static_array_cast<complex, multi::blas::detail::conjugater<complex*>>(B));
|
||||
|
||||
// what( BH1, BH2 );
|
||||
// using multi::blas::imag;
|
||||
|
||||
// assert( real(A)[1][2] == 1. );
|
||||
// assert( imag(A)[1][2] == -3. );
|
||||
|
||||
// print(A) <<"--\n";
|
||||
// print(real(A)) <<"--\n";
|
||||
// print(imag(A)) <<"--\n";
|
||||
|
||||
multi::array<complex, 2> C({2, 2});
|
||||
multi::array_ref<double, 2> rC(reinterpret_cast<double*>(data_elements(C)), {size(C), 2*size(*begin(C))});
|
||||
|
||||
// gemm('T', 'T', 1., A, B, 0., C);
|
||||
// gemm('T', 'T', 1., A, B, 0., C);
|
||||
// gemm('T', 'T', 1., real(A), B, 0., C);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x `pkg-config --libs blas` -lcudart -lcublas -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS operations and cuda"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../blas/dot.hpp"
|
||||
|
||||
#include "../../../array.hpp"
|
||||
#include "../../blas/cuda.hpp"
|
||||
|
||||
#include "../../../adaptors/cuda.hpp"
|
||||
#include "../../../complex.hpp"
|
||||
|
||||
#include<complex>
|
||||
#include<cassert>
|
||||
#include<numeric>
|
||||
|
||||
using std::cout;
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
using complex = std::complex<double>; constexpr complex I{0, 1};
|
||||
|
||||
BOOST_AUTO_TEST_CASE(blas_conjugated_cpu){
|
||||
multi::array<complex, 1> const a = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I};
|
||||
BOOST_REQUIRE( blas::C(a)[1] == conj(a[1]) );
|
||||
|
||||
namespace cuda = multi::cuda;
|
||||
|
||||
cuda::array<complex, 1> const agpu = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I};
|
||||
BOOST_REQUIRE( blas::C(agpu)[1] == conj(agpu[1]) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(blas_conjugated_gpu){
|
||||
#if 0
|
||||
cuda::array<complex, 1> const acu = {1. + I, 2. + 3.*I, 3. + 2.*I, 4. - 9.*I};
|
||||
cuda::array<complex, 1> const bcu = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I};
|
||||
|
||||
{
|
||||
cuda::array<complex, 0> ccu;
|
||||
blas::dot(acu, bcu, ccu);
|
||||
BOOST_REQUIRE( ccu() == 19. - 27.*I );
|
||||
}
|
||||
BOOST_REQUIRE( blas::C(bcu)[1] == 2. - 3.*I );
|
||||
#endif
|
||||
}
|
||||
|
|
@ -1,153 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS scal"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../blas/scal.hpp"
|
||||
|
||||
#include "../../../array.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_n){
|
||||
multi::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE( (A[0][2] == 3.) and (A[2][2] == 11.) );
|
||||
|
||||
blas::scal_n(2., A[2].begin(), A[2].size());
|
||||
BOOST_REQUIRE( A[0][2] == 3. and A[2][2] == 11.*2. );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_it){
|
||||
multi::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE( A[0][2] == 3. );
|
||||
BOOST_REQUIRE( A[2][2] == 11.);
|
||||
|
||||
blas::scal(2., A[2].begin(), A[2].end());
|
||||
BOOST_REQUIRE( A[0][2] == 3. );
|
||||
BOOST_REQUIRE(A[2][2] == 11.*2. );
|
||||
}
|
||||
|
||||
template<class T> void what(T&&) = delete;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_real){
|
||||
multi::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE( A[0][2] == 3. );
|
||||
BOOST_REQUIRE( A[2][2] == 11. );
|
||||
|
||||
BOOST_REQUIRE( blas::scal(1., A[2]) == A[2] );
|
||||
BOOST_REQUIRE( &blas::scal(1., A[2]) == &A[2] );
|
||||
BOOST_REQUIRE( +blas::scal(1., A[2]) == A[2] );
|
||||
|
||||
blas::scal(2., A[2]);
|
||||
BOOST_REQUIRE( A[0][2] == 3. and A[2][2] == 11.*2. );
|
||||
|
||||
BOOST_REQUIRE( &blas::scal(1., A[2]) == &A[2] );
|
||||
|
||||
}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_complex_real_case){
|
||||
// using complex = std::complex<double>;
|
||||
// multi::array<complex, 2> A = {
|
||||
// {1., 2., 3., 4.},
|
||||
// {5., 6., 7., 8.},
|
||||
// {9., 10., 11., 12.}
|
||||
// };
|
||||
// BOOST_TEST( A[0][2] == 3. );
|
||||
// BOOST_TEST( A[2][2] == 11. );
|
||||
|
||||
// blas::scal(2., A[2]); // zscal (2. is promoted to complex later)
|
||||
// BOOST_TEST( A[0][2] == 3. );
|
||||
// BOOST_REQUIRE( A[2][2] == 11.*2. );
|
||||
|
||||
// blas::scal(1./2, A[2]); // zdscal
|
||||
// BOOST_TEST( A[0][2] == 3. );
|
||||
// BOOST_TEST( A[2][1] == 10. );
|
||||
// BOOST_TEST( A[2][2] == 11. );
|
||||
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_complex){
|
||||
// multi::array<complex, 2> A = {
|
||||
// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I},
|
||||
// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I},
|
||||
// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I}
|
||||
// };
|
||||
// blas::scal(2., A[1]); // zscal (2. is promoted to complex later)
|
||||
// BOOST_TEST( A[1][2] == 14. + 8.*I );
|
||||
|
||||
// blas::scal(3.*I, A[0]);
|
||||
// BOOST_TEST( A[0][1] == (2. + 3.*I)*3.*I );
|
||||
|
||||
// blas::scal(2., blas::imag(A[2]));
|
||||
// assert( A[2][1] == 2. + 4.*I );
|
||||
//}
|
||||
|
||||
////BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_cuda_noconst){
|
||||
//// namespace cuda = multi::cuda;
|
||||
//// cuda::array<complex, 2> A = {
|
||||
//// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I},
|
||||
//// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I},
|
||||
//// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I}
|
||||
//// };
|
||||
//// blas::scal(2., A[1]); // zscal (2. is promoted to complex later)
|
||||
//// BOOST_REQUIRE( A[1][2] == 14. + 8.*I );
|
||||
|
||||
//// cuda::array<complex, 1> a = {1. + 10.*I, 2. + 20.*I, 3. + 30.*I};
|
||||
//// blas::scal(2., a);
|
||||
//// BOOST_REQUIRE(( a[1] == complex{4, 40} ));
|
||||
|
||||
////// blas::scal(3., blas::imag(a)); // gives internal compilation error in gcc
|
||||
////// BOOST_REQUIRE(( a[1] == complex{4, 120} ));
|
||||
////}
|
||||
|
||||
////BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_cuda_const){
|
||||
//// namespace cuda = multi::cuda;
|
||||
//// cuda::array<complex, 2> const A = {
|
||||
//// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I},
|
||||
//// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I},
|
||||
//// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I}
|
||||
//// };
|
||||
//// auto A1cpy = blas::scal(2., A[1]); // zscal (2. is promoted to complex later)
|
||||
//// BOOST_REQUIRE( A1cpy[2] == 14. + 8.*I );
|
||||
|
||||
////// cuda::array<complex, 1> a = {1. + 10.*I, 2. + 20.*I, 3. + 30.*I};
|
||||
////// blas::scal(2., a);
|
||||
////// BOOST_REQUIRE(( a[1] == complex{4, 40} ));
|
||||
|
||||
////// blas::scal(3., blas::imag(a));
|
||||
////// BOOST_REQUIRE(( a[1] == complex{4, 120} ));
|
||||
////}
|
||||
|
||||
//#if 0
|
||||
//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_cuda_managed){
|
||||
// cuda::managed::array<complex, 2> A = {
|
||||
// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I},
|
||||
// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I},
|
||||
// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I}
|
||||
// };
|
||||
// using blas::scal;
|
||||
// scal(2., A[1]);
|
||||
// BOOST_REQUIRE( A[1][2] == 14. + 8.*I );
|
||||
|
||||
// scal(2., blas::imag(A[1]));
|
||||
// BOOST_REQUIRE( A[1][2] == 14. + 16.*I );
|
||||
//}
|
||||
//#endif
|
||||
|
|
@ -1,90 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x; exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS swap"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../blas.hpp"
|
||||
|
||||
#include "../../../array.hpp"
|
||||
|
||||
#include<complex>
|
||||
#include<cassert>
|
||||
|
||||
using std::cout;
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(lapack_potrf, *boost::unit_test::tolerance(0.00001) ){
|
||||
{
|
||||
multi::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE( A[0][2] == 3. );
|
||||
BOOST_REQUIRE( A[2][2] == 11. );
|
||||
|
||||
multi::blas::swap(A[0], A[2]); // blas swap
|
||||
BOOST_REQUIRE( A[0][2] == 11. );
|
||||
BOOST_REQUIRE( A[2][2] == 3. );
|
||||
|
||||
swap(A[0], A[2]); // built-in swap
|
||||
BOOST_REQUIRE( A[0][2] == 3. );
|
||||
BOOST_REQUIRE( A[2][2] == 11. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE( A[0][0] == 1. );
|
||||
BOOST_REQUIRE( A[0][3] == 4. );
|
||||
|
||||
multi::blas::swap(rotated(A)[0], rotated(A)[3]); // blas swap (deep)
|
||||
BOOST_REQUIRE( A[0][0] == 4. );
|
||||
BOOST_REQUIRE( A[0][3] == 1. );
|
||||
|
||||
swap(rotated(A)[0], rotated(A)[3]); // built-in swap (deep)
|
||||
BOOST_REQUIRE( A[0][0] == 1. );
|
||||
BOOST_REQUIRE( A[0][3] == 4. );
|
||||
}
|
||||
{
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> A = {
|
||||
{1.+ 2.*I, 2., 3., 4. + 3.*I},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE( A[0][0] == 1.+ 2.*I );
|
||||
BOOST_REQUIRE( A[0][3] == 4. + 3.*I );
|
||||
multi::blas::swap(rotated(A)[0], rotated(A)[3]); // blas swap (deep)
|
||||
BOOST_REQUIRE( A[0][0] == 4. + 3.*I );
|
||||
BOOST_REQUIRE( A[0][3] == 1.+ 2.*I );
|
||||
swap(rotated(A)[0], rotated(A)[3]); // built-in swap (deep)
|
||||
BOOST_REQUIRE( A[0][0] == 1.+ 2.*I );
|
||||
BOOST_REQUIRE( A[0][3] == 4. + 3.*I );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.}
|
||||
};
|
||||
BOOST_REQUIRE( A[0][2] == 3. );
|
||||
BOOST_REQUIRE( A[2][2] == 11. );
|
||||
|
||||
auto it = multi::blas::swap(begin(A[0]), end(A[0]) - 1, begin(A[2])); // blas swap
|
||||
BOOST_REQUIRE( it == end(A[2]) - 1 );
|
||||
BOOST_REQUIRE( A[0][2] == 11. );
|
||||
BOOST_REQUIRE( A[2][2] == 3. );
|
||||
using std::swap_ranges;
|
||||
swap_ranges(begin(A[0]), end(A[0]), begin(A[2])); // built-in swap
|
||||
BOOST_REQUIRE( A[0][2] == 3. );
|
||||
BOOST_REQUIRE( A[2][2] == 11. );
|
||||
}
|
||||
}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --cflags --libs blas cuda-11.0` -lboost_unit_test_framework&&$0.$X&&rm $0.$X;exit
|
||||
#endif
|
||||
|
||||
#include "../../blas/traits.hpp"
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS traits"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "./config.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_traits){
|
||||
static_assert( blas::is_d<double>{} );
|
||||
static_assert( blas::is_s<float >{} );
|
||||
|
||||
static_assert( blas::is_c<std::complex<float>>{} );
|
||||
static_assert( blas::is_z<std::complex<double>>{} );
|
||||
}
|
||||
|
||||
#if CUDA_FOUND
|
||||
#include<thrust/complex.h>
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_blas_traits_thrust){
|
||||
static_assert( blas::is_c<thrust::complex<float>>{} );
|
||||
static_assert( blas::is_z<thrust::complex<double>>{} );
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,604 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
$CXX $0 -o $0x -lcudart -lcublas `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2021
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS trsm"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
//#include "../../../memory/adaptors/cuda/managed/ptr.hpp"
|
||||
|
||||
#include "../../../adaptors/blas/gemm.hpp"
|
||||
#include "../../../adaptors/blas/trsm.hpp"
|
||||
//#include "../../../adaptors/blas/cuda.hpp"
|
||||
|
||||
//#include "../../../adaptors/cuda.hpp"
|
||||
#include "../../../array.hpp"
|
||||
|
||||
#include <config.hpp>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
template<class Matrix>
|
||||
auto triangular(multi::blas::filling f, Matrix const& m){
|
||||
auto ret =+ m;
|
||||
switch(f){
|
||||
case multi::blas::filling::upper:
|
||||
for(multi::size_type i = 0; i != size( ret); ++i){
|
||||
for(multi::size_type j = 0; j != std::min(i, size(~ret)); ++j){
|
||||
ret[i][j] = 0.;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case multi::blas::filling::lower:
|
||||
for(multi::size_type j = 0; j != size(~ret); ++j){
|
||||
for(multi::size_type i = 0; i != std::min(j, size( ret)); ++i){
|
||||
ret[i][j] = 0.;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_0x0){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const A;
|
||||
{
|
||||
multi::array<double, 2> B;
|
||||
// B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 1., A, B);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_1x1){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const A = {
|
||||
{10.,},
|
||||
};
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{3.,},
|
||||
};
|
||||
auto const B_cpy = B;
|
||||
blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 1., A, B);
|
||||
// B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_REQUIRE_CLOSE( B[0][0] , 3./10. , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A, B))[0][0] , B_cpy[0][0] , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{3.,},
|
||||
};
|
||||
auto const B_cpy = B;
|
||||
// B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 2., A, B);
|
||||
BOOST_REQUIRE_CLOSE( B[0][0] , 2.*3./10. , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A, B))[0][0] , 2.*B_cpy[0][0] , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{3., 4., 5.},
|
||||
};
|
||||
auto const B_cpy = B;
|
||||
// B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 1., A, B);
|
||||
BOOST_REQUIRE_CLOSE( B[0][0] , 3./10. , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( B[0][1] , 4./10. , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( B[0][2] , 5./10. , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A, B))[0][1] , B_cpy[0][1] , 0.00001 );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_square){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const A = {
|
||||
{ 1., 3., 4.},
|
||||
{ NAN, 7., 1.},
|
||||
{ NAN, NAN, 8.}
|
||||
};
|
||||
auto const A_cpy = triangular(blas::filling::upper, A);
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1., 3., 4.},
|
||||
{2., 7., 1.},
|
||||
{3., 4., 2.}
|
||||
};
|
||||
auto const B_cpy = B;
|
||||
// B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., A, B);
|
||||
BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001 );
|
||||
BOOST_REQUIRE( (+blas::gemm(1., A_cpy, B))[1][2] == B_cpy[1][2] );
|
||||
}
|
||||
{
|
||||
auto const AT =+ ~A;
|
||||
auto const AT_cpy = triangular(blas::filling::lower, AT);
|
||||
multi::array<double, 2> B = {
|
||||
{1., 3., 4.},
|
||||
{2., 7., 1.},
|
||||
{3., 4., 2.}
|
||||
};
|
||||
auto const B_cpy = B;
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), B);
|
||||
BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001 );
|
||||
BOOST_REQUIRE( (+blas::gemm(1., blas::T(AT_cpy), B))[1][2] == B_cpy[1][2] );
|
||||
}
|
||||
{
|
||||
auto const AT =+ ~A;
|
||||
auto const AT_cpy = triangular(blas::filling::lower, AT);
|
||||
multi::array<double, 2> const B = {
|
||||
{1., 3., 4.},
|
||||
{2., 7., 1.},
|
||||
{3., 4., 2.}
|
||||
};
|
||||
auto BT =+ ~B;
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), blas::T(BT));
|
||||
BOOST_REQUIRE_CLOSE( blas::T(BT)[1][2] , 0.107143 , 0.001 );
|
||||
BOOST_REQUIRE( (+blas::gemm(1., blas::T(AT_cpy), blas::T(BT)))[1][2] == B[1][2] );
|
||||
}
|
||||
{
|
||||
// auto const AT =+ ~A;
|
||||
multi::array<double, 2> const B = {
|
||||
{1., 3., 4.},
|
||||
{2., 7., 1.},
|
||||
{3., 4., 2.}
|
||||
};
|
||||
auto BT =+ ~B;
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::T(BT));
|
||||
BOOST_REQUIRE_CLOSE( (~BT)[1][2] , 0.107143 , 0.001 );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 2.*I, 3. - 1.*I, 4. + 9.*I},
|
||||
{NAN , 7. + 4.*I, 1. + 8.*I},
|
||||
{NAN , NAN , 8. + 2.*I}
|
||||
};
|
||||
multi::array<complex, 2> B = {
|
||||
{1. - 9.*I, 3. + 2.*I, 4. + 3.*I},
|
||||
{2. - 2.*I, 7. - 2.*I, 1. - 1.*I},
|
||||
{3. + 1.*I, 4. + 8.*I, 2. + 7.*I}
|
||||
};
|
||||
blas::trsm(blas::side::left, blas::filling::lower, 2.+1.*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below)
|
||||
BOOST_REQUIRE_CLOSE( real(B[1][2]) , 2.33846 , 0.0001 );
|
||||
BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.0923077 , 0.0001 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_rectangular){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 2.*I, 3. - 1.*I, 4. + 9.*I},
|
||||
{NAN , 7. + 4.*I, 1. + 8.*I},
|
||||
{NAN , NAN , 8. + 2.*I}
|
||||
};
|
||||
multi::array<complex, 2> B = {
|
||||
{1. - 9.*I, 3. + 2.*I},
|
||||
{2. - 2.*I, 7. - 2.*I},
|
||||
{3. + 1.*I, 4. + 8.*I}
|
||||
};
|
||||
blas::trsm(blas::side::left, blas::filling::lower, 2.+1.*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below)
|
||||
BOOST_REQUIRE_CLOSE( real(B[2][0]) , -4.16471 , 0.0001 );
|
||||
BOOST_REQUIRE_CLOSE( imag(B[2][0]) , 8.25882 , 0.0001 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 2.*I, 3. - 1.*I, 4. + 9.*I},
|
||||
{NAN , 7. + 4.*I, 1. + 8.*I},
|
||||
{NAN , NAN , 8. + 2.*I}
|
||||
};
|
||||
multi::array<complex, 2> B = {
|
||||
{1. - 9.*I},
|
||||
{2. - 2.*I},
|
||||
{3. + 1.*I}
|
||||
};
|
||||
blas::trsm(blas::side::left, blas::filling::lower, 2.+1.*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below)
|
||||
BOOST_REQUIRE_CLOSE( real(B[2][0]) , -4.16471 , 0.0001);
|
||||
BOOST_REQUIRE_CLOSE( imag(B[2][0]) , 8.25882 , 0.0001);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column_cpu){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 2.*I, 3. - 1.*I, 4. + 9.*I},
|
||||
{NAN , 7. + 4.*I, 1. + 8.*I},
|
||||
{NAN , NAN , 8. + 2.*I}
|
||||
};
|
||||
multi::array<complex, 2> B = {
|
||||
{1. - 9.*I},
|
||||
{2. - 2.*I},
|
||||
{3. + 1.*I}
|
||||
};
|
||||
blas::trsm(blas::side::left, blas::filling::lower, 2.+1.*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below)
|
||||
BOOST_REQUIRE_CLOSE( real(B[2][0]) , -4.16471 , 0.0001 );
|
||||
BOOST_REQUIRE_CLOSE( imag(B[2][0]) , 8.25882 , 0.0001 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_hydrogen_inq_case_real){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const A = {{2.,},};
|
||||
{
|
||||
multi::array<double, 2> B = {{1., 2., 3.},};
|
||||
auto const B_cpy = B;
|
||||
blas::trsm(blas::side::left, blas::filling::lower, 1., A, B);
|
||||
BOOST_REQUIRE( B[0][1] == B_cpy[0][1]/A[0][0] );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1.},
|
||||
{2.},
|
||||
{3.},
|
||||
};
|
||||
auto const B_cpy = B;
|
||||
blas::trsm(blas::side::left, blas::filling::lower, 1., A, blas::T(B));
|
||||
BOOST_REQUIRE( blas::T(B)[0][1] == blas::T(B_cpy)[0][1]/A[0][0] );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_hydrogen_inq_case_complex){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
using complex = std::complex<double>;
|
||||
multi::array<complex, 2> const A = {{2.,},};
|
||||
{
|
||||
multi::array<complex, 2> B = {{1., 2., 3.},};
|
||||
auto const B_cpy = B;
|
||||
blas::trsm(blas::side::left, blas::filling::lower, 1., A, B);
|
||||
BOOST_REQUIRE( B[0][1] == B_cpy[0][1]/A[0][0] );
|
||||
}
|
||||
multi::array<complex, 2> B1 = {
|
||||
{1.},
|
||||
{2.},
|
||||
{3.},
|
||||
};
|
||||
multi::array<complex, 2> B2 = {
|
||||
{1.},
|
||||
{2.},
|
||||
{3.},
|
||||
};
|
||||
{
|
||||
// auto const B_cpy = B1;
|
||||
blas::trsm(blas::side::left, blas::filling::lower, 1., A, blas::H(B1));
|
||||
// BOOST_REQUIRE( (+blas::gemm(1., A, blas::H(B1)))[0][1] == blas::H(B_cpy)[0][1] );
|
||||
}
|
||||
{
|
||||
auto const B_cpy = B2;
|
||||
blas::trsm(blas::side::right, blas::filling::upper, 1., blas::H(A), B2);
|
||||
// BOOST_REQUIRE( (+blas::gemm(1., A, blas::H(B)))[0][1] == blas::H(B_cpy)[0][1] );
|
||||
BOOST_REQUIRE( (+blas::gemm(1., B2, blas::H(A)))[1][0] == B_cpy[1][0] );
|
||||
}
|
||||
BOOST_REQUIRE( B1 == B2 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_nonsquare){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const A = {
|
||||
{ 1., 3., 4.},
|
||||
{ NAN, 7., 1.},
|
||||
{ NAN, NAN, 8.}
|
||||
};
|
||||
auto const A_cpy = triangular(blas::filling::upper, A);
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1., 3., 4., 8.},
|
||||
{2., 7., 1., 9.},
|
||||
{3., 4., 2., 1.},
|
||||
};
|
||||
auto const B_cpy =+ B;
|
||||
multi::array<double, 2> BT =+ ~B;
|
||||
BOOST_REQUIRE( BT == ~B );
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001);
|
||||
BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, B))[1][2] , B_cpy[1][2] , 0.001);
|
||||
|
||||
auto const BT_cpy = BT;
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::T(BT));
|
||||
BOOST_REQUIRE_CLOSE( blas::T(BT)[1][2], 0.107143, 0.001 );
|
||||
|
||||
BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, blas::T(BT)))[1][2] , blas::T(BT_cpy)[1][2] , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1., 3., 4., 8.},
|
||||
{2., 7., 1., 9.},
|
||||
{3., 4., 2., 1.},
|
||||
};
|
||||
multi::array<double, 2> AT = ~A;
|
||||
multi::array<double, 2> BT = ~B;
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001 );
|
||||
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), blas::T(BT));
|
||||
BOOST_REQUIRE_CLOSE( (~BT)[1][2] , 0.107143, 0.001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1.},
|
||||
{2.},
|
||||
{3.},
|
||||
};
|
||||
auto const B_cpy =+ B;
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_REQUIRE_CLOSE( B[2][0] , 0.375 , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, B))[1][0] , B_cpy[1][0] , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1.},
|
||||
{2.},
|
||||
{3.},
|
||||
};
|
||||
auto const B_cpy =+ B;
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1.2, A, B);
|
||||
BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, B))[1][0] , 1.2*B_cpy[1][0] , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( (+blas::gemm(1./1.2, A_cpy, B))[1][0] , B_cpy[1][0] , 0.00001 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1.},
|
||||
{2.},
|
||||
{3.},
|
||||
};
|
||||
multi::array<double, 2> BT = rotated(B);
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::T(BT));
|
||||
BOOST_REQUIRE_CLOSE( (~BT)[2][0] , 0.375 , 0.00001);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 4.*I, 3., 4.- 10.*I},
|
||||
{ 0., 7.- 3.*I, 1.},
|
||||
{ 0., 0., 8.- 2.*I}
|
||||
};
|
||||
multi::array<complex, 2> B = {
|
||||
{1. + 1.*I, 2. + 1.*I, 3. + 1.*I},
|
||||
{5. + 3.*I, 9. + 3.*I, 1. - 1.*I}
|
||||
};
|
||||
using multi::blas::trsm;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::hermitized;
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::H(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)†
|
||||
BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.147059 , 0.001);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check){//, *utf::tolerance(0.00001)){
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 4.*I, 3., 4.- 10.*I},
|
||||
{ 0., 7.- 3.*I, 1.},
|
||||
{ 0., 0., 8.- 2.*I}
|
||||
};
|
||||
namespace blas = multi::blas;
|
||||
{
|
||||
{
|
||||
multi::array<complex, 2> B = {
|
||||
{1. + 1.*I, 5. + 3.*I},
|
||||
{2. + 1.*I, 9. + 3.*I},
|
||||
{3. + 1.*I, 1. - 1.*I},
|
||||
};
|
||||
auto S = blas::trsm(blas::side::left, blas::filling::lower, 1., blas::H(A), B); // S = A⁻¹†.B, S† = B†.A⁻¹
|
||||
BOOST_REQUIRE_CLOSE( real(S[2][1]) , 1.71608 , 0.001 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> B = {
|
||||
{1. + 1.*I, 2. + 1.*I, 3. + 1.*I},
|
||||
{5. + 3.*I, 9. + 3.*I, 1. - 1.*I}
|
||||
};
|
||||
auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::H(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹†
|
||||
BOOST_REQUIRE_CLOSE( imag(S[2][1]) , +0.147059 , 0.001);
|
||||
BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.147059 , 0.001);
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> B = {
|
||||
{1. + 1.*I, 2. + 1.*I, 3. + 1.*I},
|
||||
{5. + 3.*I, 9. + 3.*I, 1. - 1.*I}
|
||||
};
|
||||
auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 2., A, blas::H(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹†
|
||||
BOOST_REQUIRE_CLOSE( imag(S[2][1]) , +0.147059*2. , 0.001 );
|
||||
BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.147059*2. , 0.001 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_1x1_check){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
multi::array<double, 2> const A = {
|
||||
{ 4.},
|
||||
};
|
||||
{
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{5.},
|
||||
};
|
||||
auto S =+ blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 3., A, B);
|
||||
BOOST_REQUIRE( S[0][0] == 3.*5./4. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{5.},
|
||||
};
|
||||
auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 1., A, B);
|
||||
BOOST_REQUIRE( S[0][0] == 1.*5./4. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{5.},
|
||||
};
|
||||
auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 1., A, B);
|
||||
BOOST_REQUIRE( S[0][0] == 1.*5./4. );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_1x1_check){//, *utf::tolerance(0.00001)){
|
||||
using complex = std::complex<double>; complex const I = complex{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 4. + 2.*I},
|
||||
};
|
||||
namespace blas = multi::blas;
|
||||
{
|
||||
multi::array<complex, 2> B = {
|
||||
{5. + 1.*I},
|
||||
};
|
||||
auto const B_cpy =+ B;
|
||||
|
||||
blas::trsm(blas::side::left, blas::filling::upper, 3.+5.*I, A, B);
|
||||
BOOST_REQUIRE_CLOSE( real((+blas::gemm(1., A, B))[0][0]) , real((3.+5.*I)*B_cpy[0][0]) , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( imag((+blas::gemm(1., A, B))[0][0]) , imag((3.+5.*I)*B_cpy[0][0]) , 0.00001 );
|
||||
|
||||
BOOST_REQUIRE_CLOSE( real((+blas::gemm(1./(3.+5.*I), A, B))[0][0]) , real(B_cpy[0][0]) , 0.00001 );
|
||||
BOOST_REQUIRE_CLOSE( imag((+blas::gemm(1./(3.+5.*I), A, B))[0][0]) , imag(B_cpy[0][0]) , 0.00001 );
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CUDA_FOUND) and CUDA_FOUND
|
||||
#include<thrust/complex.h>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_thrust_nonsquare_default_diagonal_hermitized_gemm_check){//, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
using complex = thrust::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 4.*I, 3. , 4.- 10.*I},
|
||||
{ 0. , 7.- 3.*I, 1. },
|
||||
{ 0. , 0. , 8.- 2.*I}
|
||||
};
|
||||
{
|
||||
{
|
||||
multi::array<complex, 2> B = {
|
||||
{1. + 1.*I, 5. + 3.*I},
|
||||
{2. + 1.*I, 9. + 3.*I},
|
||||
{3. + 1.*I, 1. - 1.*I},
|
||||
};
|
||||
auto S = blas::trsm(blas::side::left, blas::filling::lower, 1., blas::H(A), B); // S = A⁻¹†.B, S† = B†.A⁻¹
|
||||
BOOST_REQUIRE_CLOSE( S[2][1].real() , 1.71608 , 0.001 );
|
||||
BOOST_REQUIRE( S == B );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> B = {
|
||||
{1. + 1.*I, 2. + 1.*I, 3. + 1.*I},
|
||||
{5. + 3.*I, 9. + 3.*I, 1. - 1.*I}
|
||||
};
|
||||
auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::H(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹†
|
||||
BOOST_REQUIRE_CLOSE( B[1][2].imag() , -0.147059 , 0.001 );
|
||||
BOOST_REQUIRE( S == blas::H(B) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> B = {
|
||||
{1. + 1.*I, 2. + 1.*I, 3. + 1.*I},
|
||||
{5. + 3.*I, 9. + 3.*I, 1. - 1.*I}
|
||||
};
|
||||
auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 2., A, blas::H(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹†
|
||||
BOOST_REQUIRE_CLOSE( B[1][2].imag() , -0.147059*2. , 0.001 );
|
||||
BOOST_REQUIRE( S == blas::H(B) );
|
||||
}
|
||||
}
|
||||
}
|
||||
//BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column_cuda, *utf::tolerance(0.00001)){
|
||||
// namespace cuda = multi::cuda;
|
||||
// cuda::array<complex, 2> A = {
|
||||
// { 1., 3., 4.},
|
||||
// {NAN, 7., 1.},
|
||||
// {NAN, NAN, 8.}
|
||||
// };
|
||||
//// multi::cuda::array<complex, 2> const B = {
|
||||
//// {1.},
|
||||
//// {2.},
|
||||
//// {3.}
|
||||
//// };
|
||||
// namespace blas = multi::blas;
|
||||
//// auto Bcpy = blas::trsm(blas::filling::upper, 1., A, B); // B ⬅ α Inv[A].B, B† ⬅ B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below)
|
||||
//// multi::array<complex, 2> Bcpu = Bcpy;
|
||||
//// BOOST_TEST_REQUIRE( std::real(Bcpu[2][0]) == 0.375 );
|
||||
//// BOOST_TEST_REQUIRE( std::imag(Bcpu[2][0]) == 0. );
|
||||
//}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
|
||||
//template<class T> void what(T&&) = delete;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_column_cuda, *utf::tolerance(0.00001)){
|
||||
multi::cuda::array<double, 2> const A = {
|
||||
{ 1., 3., 4.},
|
||||
{NAN, 7., 1.},
|
||||
{NAN, NAN, 8.}
|
||||
};
|
||||
multi::cuda::array<double, 2> B = {
|
||||
{1.},
|
||||
{2.},
|
||||
{3.}
|
||||
};
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::hermitized;
|
||||
trsm(filling::upper, 1., A, B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below)
|
||||
BOOST_REQUIRE( B[2][0] == 0.375 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column_cuda2, *utf::tolerance(0.00001)){
|
||||
multi::cuda::array<complex, 2> const A = {
|
||||
{ 1. + 2.*I, 3. - 1.*I, 4. + 9.*I},
|
||||
{NAN , 7. + 4.*I, 1. + 8.*I},
|
||||
{NAN , NAN , 8. + 2.*I}
|
||||
};
|
||||
multi::cuda::array<complex, 2> B = {
|
||||
{1. - 9.*I},
|
||||
{2. - 2.*I},
|
||||
{3. + 1.*I}
|
||||
};
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::hermitized;
|
||||
trsm(filling::lower, 2.+1.*I, hermitized(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below)
|
||||
multi::array<complex, 2> Bcpu = B;
|
||||
BOOST_REQUIRE( real(Bcpu[2][0]) == -4.16471 );
|
||||
BOOST_REQUIRE( imag(Bcpu[2][0]) == 8.25882 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_cuda_trsm_complex, *utf::tolerance(0.00001)){
|
||||
multi::cuda::array<complex, 2> const A = {
|
||||
{ 1. + 2.*I, 3. - 1.*I, 4. + 9.*I},
|
||||
{NAN , 7. + 4.*I, 1. + 8.*I},
|
||||
{NAN , NAN , 8. + 2.*I}
|
||||
};
|
||||
multi::cuda::array<complex, 2> const B = {
|
||||
{1. - 9.*I, 3. + 2.*I, 4. + 3.*I},
|
||||
{2. - 2.*I, 7. - 2.*I, 1. - 1.*I},
|
||||
{3. + 1.*I, 4. + 8.*I, 2. + 7.*I}
|
||||
};
|
||||
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::hermitized;
|
||||
// auto C = trsm(filling::lower, 2.+1.*I, hermitized(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below)
|
||||
auto C = trsm(filling::lower, 1., hermitized(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_cuda_managed_trsm_complex, *utf::tolerance(0.00001)){
|
||||
multi::cuda::managed::array<complex, 2> const A = {
|
||||
{ 1. + 2.*I, 3. - 1.*I, 4. + 9.*I},
|
||||
{NAN , 7. + 4.*I, 1. + 8.*I},
|
||||
{NAN , NAN , 8. + 2.*I}
|
||||
};
|
||||
multi::cuda::managed::array<complex, 2> const B = {
|
||||
{1. - 9.*I, 3. + 2.*I, 4. + 3.*I},
|
||||
{2. - 2.*I, 7. - 2.*I, 1. - 1.*I},
|
||||
{3. + 1.*I, 4. + 8.*I, 2. + 7.*I}
|
||||
};
|
||||
|
||||
namespace blas = multi::blas;
|
||||
using blas::filling;
|
||||
using blas::hermitized;
|
||||
auto C = trsm(filling::lower, 2.+1.*I, hermitized(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below)
|
||||
}
|
||||
#endif
|
|
@ -1,111 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x -lcudart -lcublas -lboost_unit_test_framework `pkg-config --libs blas`&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS trsv"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../../memory/adaptors/cuda/managed/ptr.hpp"
|
||||
|
||||
#include "../../../adaptors/blas/trsv.hpp"
|
||||
#include "../../../adaptors/blas/cuda.hpp"
|
||||
|
||||
#include "../../../adaptors/cuda.hpp"
|
||||
#include "../../../array.hpp"
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
template<class M> decltype(auto) print(M const& C){
|
||||
using multi::size; using std::cout;
|
||||
for(int i = 0; i != size(C); ++i){
|
||||
for(int j = 0; j != size(C[i]); ++j) cout<< C[i][j] <<' ';
|
||||
cout<<std::endl;
|
||||
}
|
||||
return cout<<std::endl;
|
||||
}
|
||||
|
||||
namespace utf = boost::unit_test;
|
||||
|
||||
using complex = std::complex<double>;
|
||||
complex const I{0, 1};
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cpu, *utf::tolerance(0.00001)){
|
||||
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 1.*I, 3. - 2.*I, 4. + 1.*I},
|
||||
{NAN , 7. - 10.*I, 1. + 2.*I},
|
||||
{NAN , NAN , 8. + 1.*I}
|
||||
};
|
||||
multi::array<complex, 1> b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I};
|
||||
blas::trsv(blas::filling::upper, blas::diagonal::general, A, b);
|
||||
BOOST_TEST_REQUIRE( real(b[0]) == -1.37259 );
|
||||
BOOST_TEST_REQUIRE( real(b[1]) == 0.2127 );
|
||||
BOOST_TEST_REQUIRE( real(b[2]) == 0.569231 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cuda, *utf::tolerance(0.0001)){
|
||||
namespace cuda = multi::cuda;
|
||||
cuda::managed::array<complex, 2> const A = {
|
||||
{ 1. + 1.*I, 3. - 2.*I, 4. + 1.*I},
|
||||
{NAN , 7. - 10.*I, 1. + 2.*I},
|
||||
{NAN , NAN , 8. + 1.*I}
|
||||
};
|
||||
cuda::managed::array<complex, 1> b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I};
|
||||
blas::trsv(blas::filling::upper, blas::diagonal::general, A, b);
|
||||
|
||||
BOOST_TEST_REQUIRE( real(b[0]) == -1.37259 );
|
||||
BOOST_TEST_REQUIRE( real(b[1]) == 0.2127 );
|
||||
BOOST_TEST_REQUIRE( real(b[2]) == 0.569231 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cuda_managed, *utf::tolerance(0.00001)){
|
||||
namespace cuda = multi::cuda;
|
||||
cuda::managed::array<complex, 2> const A = {
|
||||
{ 1. + 1.*I, 3. - 2.*I, 4. + 1.*I},
|
||||
{NAN , 7. - 10.*I, 1. + 2.*I},
|
||||
{NAN , NAN , 8. + 1.*I}
|
||||
};
|
||||
cuda::managed::array<complex, 1> b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I};
|
||||
blas::trsv(blas::filling::upper, A, b); // this operation happens in GPU when #include "adaptors/blas/cuda.hpp"
|
||||
|
||||
multi::array<complex, 1> const b_cpu = b;
|
||||
BOOST_TEST_REQUIRE( real(b_cpu[0]) == -1.37259 );
|
||||
BOOST_TEST_REQUIRE( real(b_cpu[1]) == 0.2127 );
|
||||
BOOST_TEST_REQUIRE( real(b_cpu[2]) == 0.569231 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_cuda_managed, *utf::tolerance(0.00001)){
|
||||
namespace cuda = multi::cuda;
|
||||
cuda::managed::array<double, 2> const A = {
|
||||
{ 1., 3., 4.},
|
||||
{NAN, 7., 1.},
|
||||
{NAN, NAN, 8.}
|
||||
};
|
||||
cuda::managed::array<double, 1> b = {1., 3., 4.};
|
||||
|
||||
blas::trsv(blas::filling::upper, A, b); // this operation happens in GPU when #include "adaptors/blas/cuda.hpp"
|
||||
multi::array<double, 1> const b_cpu = b;
|
||||
BOOST_TEST_REQUIRE( b_cpu[0] == -2.07143 );
|
||||
BOOST_TEST_REQUIRE( b_cpu[1] == 0.357143 );
|
||||
BOOST_TEST_REQUIRE( b_cpu[2] == 0.5 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cuda2, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
multi::cuda::array<complex, 2> const A = {
|
||||
{ 1. + 1.*I, 3. - 2.*I, 4. + 1.*I},
|
||||
{NAN , 7. - 10.*I, 1. + 2.*I},
|
||||
{NAN , NAN , 8. + 1.*I}
|
||||
};
|
||||
multi::cuda::array<complex, 1> b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I};
|
||||
blas::trsv(blas::filling::upper, blas::diagonal::general, A, b);
|
||||
BOOST_TEST_REQUIRE( real(b[0]) == -1.37259 );
|
||||
BOOST_TEST_REQUIRE( real(b[1]) == 0.2127 );
|
||||
BOOST_TEST_REQUIRE( real(b[2]) == 0.569231 );
|
||||
}
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_TRAITS_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_TRAITS_HPP
|
||||
|
||||
#include<complex>
|
||||
#include<type_traits>
|
||||
|
||||
namespace boost{
|
||||
namespace multi::blas{
|
||||
|
||||
template<class F, class=std::enable_if_t<sizeof(F)==sizeof(float ) and std::is_convertible<decltype(std::declval<F&&>()/std::declval<F&&>()), float>{} >>
|
||||
std::true_type is_s_aux(F&&);
|
||||
std::false_type is_s_aux(...);
|
||||
|
||||
template<class T> struct is_s : decltype(is_s_aux(std::declval<T>())){using archetype = float;};
|
||||
|
||||
template<class D, class=std::enable_if_t<sizeof(D)==sizeof(double) and std::is_convertible<decltype(std::declval<D&&>()/std::declval<D&&>()), double>{}>>
|
||||
std::true_type is_d_aux(D&&);
|
||||
std::false_type is_d_aux(...);
|
||||
|
||||
template<class T> struct is_d : decltype(is_d_aux(std::declval<T>())){using archetype = double;};
|
||||
|
||||
template<class C, class=std::enable_if_t<sizeof(C)==sizeof(std::complex<float>) and is_s<decltype(std::declval<C>().real())>{} and is_s<decltype(std::declval<C>().imag())>{}>>
|
||||
std::true_type is_c_aux(C&&);
|
||||
std::false_type is_c_aux(...);
|
||||
|
||||
template<class C> struct is_c : decltype(is_c_aux(std::declval<C>())){using archetype = std::complex<float>;};
|
||||
|
||||
template<class Z, class=std::enable_if_t<sizeof(Z)==sizeof(std::complex<double>) and is_d<decltype(std::declval<Z>().real())>{} and is_d<decltype(std::declval<Z>().imag())>{}>>
|
||||
std::true_type is_z_aux(Z&&);
|
||||
std::false_type is_z_aux(...);
|
||||
|
||||
template<class Z> struct is_z : decltype(is_z_aux(std::declval<Z>())){using archetype = std::complex<double>;};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0.$X -lboost_unit_test_framework `pkg-config --cflags --libs blas` -lboost_timer&&$0.$X&&rm $0.$X;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_TRSM_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_TRSM_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
#include "../blas/operations.hpp" // uplo
|
||||
#include "../blas/filling.hpp"
|
||||
#include "../blas/side.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi::blas{
|
||||
|
||||
enum class diagonal : char{
|
||||
unit = 'U',
|
||||
non_unit = 'N', general = non_unit
|
||||
};
|
||||
|
||||
using core::trsm;
|
||||
|
||||
template<class Context, class A2D, class B2D>
|
||||
decltype(auto) trsm(Context&& ctxt, blas::side a_side, blas::filling a_fill, blas::diagonal a_diag, typename A2D::element_type alpha, A2D const& a, B2D&& b) try{
|
||||
;;;; if(a_side == blas::side::left ) assert(size(~a) >= size( b));
|
||||
else if(a_side == blas::side::right) assert(size( a) >= size(~b));
|
||||
|
||||
assert( stride( a) == 1 or stride(~a) == 1 );
|
||||
assert( stride( b) == 1 or stride(~b) == 1 );
|
||||
|
||||
if(size(b)!=0){
|
||||
#define CTXT std::forward<Context>(ctxt)
|
||||
;;;; if constexpr(not is_conjugated<A2D>{} and not is_conjugated<B2D>{}){
|
||||
;;;; if(stride( a)==1 and stride( b)==1) CTXT->trsm((char) (a_side), (char)-a_fill, 'N', (char)a_diag, size( b), size(~b), alpha , base(a) , stride(~a), base(b) , stride(~b));
|
||||
else if(stride(~a)==1 and stride(~b)==1) CTXT->trsm((char)swap(a_side), (char)+a_fill, 'N', (char)a_diag, size(~b), size( b), alpha , base(a) , stride( a), base(b) , stride( b));
|
||||
else if(stride( a)==1 and stride(~b)==1) CTXT->trsm((char)swap(a_side), (char)-a_fill, 'T', (char)a_diag, size(~b), size( b), alpha , base(a) , stride(~a), base(b) , stride( b));
|
||||
else if(stride(~a)==1 and stride( b)==1) CTXT->trsm((char) (a_side), (char)+a_fill, 'T', (char)a_diag, size( b), size(~b), alpha , base(a) , stride( a), base(b) , stride(~b));
|
||||
else assert(0 && "not implemented in blas");
|
||||
}else if constexpr( is_conjugated<A2D>{} and not is_conjugated<B2D>{}){
|
||||
;;;; if(stride( a)==1 and stride(~b)==1) CTXT->trsm((char)swap(a_side), (char)-a_fill, 'C', (char)a_diag, size(~b), size( b), alpha , underlying(base(a)), stride(~a), base(b) , stride( b));
|
||||
else if(stride(~a)==1 and stride( b)==1) CTXT->trsm((char) (a_side), (char)+a_fill, 'C', (char)a_diag, size( b), size(~b), alpha , underlying(base(a)), stride( a), base(b) , stride(~b));
|
||||
else if(stride( a)==1 and stride( b)==1) assert(0 && "not implemented in blas");
|
||||
else if(stride(~a)==1 and stride(~b)==1) assert(0 && "not implemented in blas");
|
||||
else assert(0 && "not implemented in blas");
|
||||
}else if constexpr(not is_conjugated<A2D>{} and is_conjugated<B2D>{}){
|
||||
;;;; if(stride(~a)==1 and stride( b)==1) CTXT->trsm((char) (a_side), (char)+a_fill, 'C', (char)a_diag, size( b), size(~b), conj(alpha), base(a) , stride( a), underlying(base(b)), stride(~b));
|
||||
else if(stride( a)==1 and stride(~b)==1) CTXT->trsm((char)swap(a_side), (char)-a_fill, 'C', (char)a_diag, size(~b), size( b), conj(alpha), base(a) , stride(~a), underlying(base(b)), stride( b));
|
||||
else if(stride(~a)==1 and stride(~b)==1) assert(0);
|
||||
else if(stride( a)==1 and stride( b)==1) assert(0);
|
||||
else assert(0 && "not implemented in blas");
|
||||
}else if constexpr( is_conjugated<A2D>{} and is_conjugated<B2D>{}){
|
||||
;;;; if(stride( a)==1 and stride(~b)==1) CTXT->trsm((char)swap(a_side), (char)-a_fill, 'T', (char)a_diag, size(~b), size( b), conj(alpha), underlying(base(a)), stride(~a), underlying(base(b)), stride( b));
|
||||
else if(stride(~a)==1 and stride( b)==1) CTXT->trsm((char) (a_side), (char)+a_fill, 'T', (char)a_diag, size( b), size(~b), conj(alpha), underlying(base(a)), stride( a), underlying(base(b)), stride(~b));
|
||||
else if(stride(~a)==1 and stride(~b)==1) assert(0 && "not implemented in blas");
|
||||
else if(stride( a)==1 and stride( b)==1) assert(0 && "not implemented in blas");
|
||||
else assert(0 && "not implemented in blas");
|
||||
}
|
||||
#undef CTXT
|
||||
}
|
||||
return std::forward<B2D>(b);
|
||||
}catch(std::logic_error& le){
|
||||
using std::to_string;
|
||||
throw std::logic_error{
|
||||
"couldn't do "+std::string(__PRETTY_FUNCTION__)+" of layout a_side="+ (char)a_side +" a_fill="+ (char)a_fill +" a_diag="+(char)a_diag+" alpha=xx"
|
||||
+" a_conj="+ to_string(is_conjugated<A2D>{}) +" a_strides="+to_string(stride(a)) +","+ to_string(stride(~a))+" a_sizes="+to_string(size(a)) +","+ to_string(size(~a))
|
||||
+" b_conj="+ to_string(is_conjugated<B2D>{}) +" b_strides="+to_string(stride(b)) +","+ to_string(stride(~b))+" b_sizes="+to_string(size(b)) +","+ to_string(size(~b))
|
||||
+" because " + le.what()
|
||||
};
|
||||
}
|
||||
|
||||
template<class A2D, class B2D>
|
||||
decltype(auto) trsm(blas::side a_side, blas::filling a_fill, blas::diagonal a_diag, typename A2D::element_type alpha, A2D const& a, B2D&& b){
|
||||
if constexpr(not is_conjugated<A2D>{}) return trsm(default_context_of( a.base() ), a_side, a_fill, a_diag, alpha, a, std::forward<B2D>(b));
|
||||
else return trsm(default_context_of(underlying(a.base())), a_side, a_fill, a_diag, alpha, a, std::forward<B2D>(b));
|
||||
}
|
||||
|
||||
template<class Context, class A2D, class B2D>
|
||||
auto trsm(Context&& ctxt, blas::side a_side, blas::filling a_fill, typename A2D::element_type alpha, A2D const& a, B2D&& b)
|
||||
->decltype(trsm(std::forward<Context>(ctxt), a_side, a_fill, blas::diagonal::general, alpha, a, std::forward<B2D>(b))){
|
||||
return trsm(std::forward<Context>(ctxt), a_side, a_fill, blas::diagonal::general, alpha, a, std::forward<B2D>(b));}
|
||||
|
||||
template<class A2D, class B2D>
|
||||
decltype(auto) trsm(blas::side a_side, blas::filling a_fill, typename A2D::element_type alpha, A2D const& a, B2D&& b){
|
||||
if constexpr(not is_conjugated<A2D>{}) return trsm(default_context_of( a.base() ), a_side, a_fill, alpha, a, std::forward<B2D>(b));
|
||||
else return trsm(default_context_of(underlying(a.base())), a_side, a_fill, alpha, a, std::forward<B2D>(b));
|
||||
} // EDG based compilers (e.g. nvcc) need option: -Xcudafe \"--diag_suppress=implicit_return_from_non_void_function\""
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,609 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXX $0 -o $0x `pkg-config --libs blas` -lcudart -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_BLAS_TRSV_HPP
|
||||
#define MULTI_ADAPTORS_BLAS_TRSV_HPP
|
||||
|
||||
#include "../blas/core.hpp"
|
||||
|
||||
#include "../blas/operations.hpp" // uplo
|
||||
#include "../blas/filling.hpp"
|
||||
#include "../blas/side.hpp"
|
||||
|
||||
#include "../../config/NODISCARD.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{namespace blas{
|
||||
|
||||
//enum DIAG : char{U='U', N='N'};
|
||||
|
||||
enum class diagonal : char{//typename std::underlying_type<char>::type{
|
||||
unit = 'U',
|
||||
non_unit = 'N', general = non_unit
|
||||
};
|
||||
|
||||
using core::trsv;
|
||||
|
||||
template<class A, std::enable_if_t<not is_conjugated<A>{}, int> =0>
|
||||
auto trsv_base(A&& a){return base(a);}
|
||||
|
||||
template<class A, std::enable_if_t< is_conjugated<A>{}, int> =0>
|
||||
auto trsv_base(A&& a){return underlying(base(a));}
|
||||
|
||||
template<class A2D, class X1D>
|
||||
auto trsv(filling a_nonzero_side, diagonal a_diag, A2D const& a, X1D&& x)
|
||||
->decltype(trsv(static_cast<char>(flip(a_nonzero_side)), 'N', static_cast<char>(a_diag), size(x), trsv_base(a), stride(rotated(a)), trsv_base(x), stride(x)), std::forward<X1D>(x))
|
||||
{
|
||||
// if(is_conjugated(x)) trsv(a_nonzero_side, a_diag, conjugated(a), conjugated(std::forward<X1D>(x)));
|
||||
{
|
||||
auto base_a = trsv_base(a);
|
||||
auto base_x = trsv_base(x);
|
||||
if(not is_conjugated<A2D>{}){
|
||||
if(stride( a )==1) trsv(static_cast<char>(flip(a_nonzero_side)), 'N', static_cast<char>(a_diag), size(x), base_a, stride(rotated(a)), base_x, stride(x));
|
||||
else if(stride(rotated(a))==1) trsv(static_cast<char>( a_nonzero_side ), 'T', static_cast<char>(a_diag), size(x), base_a, stride( a ), base_x, stride(x));
|
||||
else assert(0);
|
||||
}else{
|
||||
if(stride( a )==1) assert(0); //TODO fallback to trsm?
|
||||
else if(stride(rotated(a))==1) trsv(static_cast<char>( a_nonzero_side ), 'C', static_cast<char>(a_diag), size(x), base_a, stride( a ), base_x, stride(x));
|
||||
else assert(0);
|
||||
}
|
||||
}
|
||||
return std::forward<X1D>(x);
|
||||
}
|
||||
|
||||
template<class A2D, class X1D>
|
||||
auto trsv(filling a_nonzero_side, A2D const& a, X1D&& x)
|
||||
->decltype(trsv(a_nonzero_side, diagonal::general, a, std::forward<X1D>(x))){
|
||||
return trsv(a_nonzero_side, diagonal::general, a, std::forward<X1D>(x));}
|
||||
|
||||
#if 0
|
||||
|
||||
|
||||
#if 1
|
||||
template<class A2D, class X1D, class Ret = typename X1D::decay_type>
|
||||
Ret trsv(filling a_nonzero_side, diagonal a_diag, A2D const& a, X1D const& x, void* = 0){
|
||||
return trsv(a_nonzero_side, a_diag, a, Ret{x});}
|
||||
|
||||
template<class A2D, class X1D, class Ret = typename X1D::decay_type>
|
||||
Ret trsv(filling a_nonzero_side, A2D const& a, X1D const& x, void* = 0){
|
||||
return trsv(a_nonzero_side, a, Ret{x});}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_TRSV
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi.BLAS trsv"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../blas/gemm.hpp"
|
||||
|
||||
#include "../../array.hpp"
|
||||
|
||||
#include<iostream>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
template<class M> decltype(auto) print_1D(M const& C){
|
||||
using boost::multi::size; using std::cout;
|
||||
for(int i = 0; i != size(C); ++i)
|
||||
cout<< C[i] <<' ';
|
||||
cout<<std::endl;
|
||||
}
|
||||
|
||||
template<class M> decltype(auto) print(M const& C){
|
||||
using boost::multi::size; using std::cout;
|
||||
for(int i = 0; i != size(C); ++i){
|
||||
for(int j = 0; j != size(C[i]); ++j)
|
||||
cout<< C[i][j] <<' ';
|
||||
cout<<std::endl;
|
||||
}
|
||||
return cout<<std::endl;
|
||||
}
|
||||
|
||||
namespace utf = boost::unit_test;
|
||||
namespace blas = multi::blas;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsv_real_square, *utf::tolerance(0.0001)){
|
||||
|
||||
|
||||
{
|
||||
multi::array<double, 2> const A = {
|
||||
{ 1., 3., 4.},
|
||||
{ NAN, 7., 1.},
|
||||
{ NAN, NAN, 8.}
|
||||
};
|
||||
multi::array<double, 1> b = {1., 3., 4.};
|
||||
blas::trsv(blas::filling::upper, blas::diagonal::general, A, b); // B<-Solve(A.X==B), B<-A⁻¹.B, B⊤<-(A⁻¹.B)⊤, B<-B⊤.A⁻¹⊤
|
||||
BOOST_TEST( b[0] == -2.07143 );
|
||||
BOOST_TEST( b[1] == 0.357143 );
|
||||
BOOST_TEST( b[2] == 0.5 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> const A = {
|
||||
{ 1., 3., 4.},
|
||||
{ NAN, 7., 1.},
|
||||
{ NAN, NAN, 8.}
|
||||
};
|
||||
multi::array<double, 1> b = {1., 3., 4.};
|
||||
blas::trsv(blas::filling::lower, blas::diagonal::general, blas::T(A), b); // B<-Solve(A.X==B), B<-A⊤⁻¹.B, B⊤<-(A⊤⁻¹.B)⊤, B<-B⊤.A⁻¹
|
||||
BOOST_TEST( b[0] == 1. );
|
||||
BOOST_TEST( b[1] == 0. );
|
||||
BOOST_TEST( b[2] == 0. );
|
||||
}
|
||||
#if 0
|
||||
{
|
||||
multi::array<double, 1> b = {3., 3., 1.};
|
||||
// trsv(filling::lower, diagonal::general, hermitized(A), b); // B<-Solve(A.X==B), B<-A⊤⁻¹.B, B⊤<-(A⊤⁻¹.B)⊤, B<-B⊤.A⁻¹
|
||||
// BOOST_TEST( b[0] == 3. );
|
||||
// BOOST_TEST( b[1] == -0.857143 );
|
||||
// BOOST_TEST( b[2] == -1.26786 );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 0
|
||||
using complex = std::complex<double>;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsv_complex_real_case_square, *utf::tolerance(0.00001)){
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1., 3., 4.},
|
||||
{NAN, 7., 1.},
|
||||
{NAN, NAN, 8.}
|
||||
};
|
||||
using blas::filling;
|
||||
using blas::diagonal;
|
||||
using blas::transposed;
|
||||
using blas::hermitized;
|
||||
using blas::conjugated;
|
||||
using blas::trsv;
|
||||
{
|
||||
multi::array<complex, 1> b = {1., 3., 4.};
|
||||
blas::trsv(filling::upper, diagonal::general, A, b); // B<-Solve(A.X==B), B<-A⁻¹.B, B⊤<-(A⁻¹.B)⊤, B<-B⊤.A⁻¹⊤
|
||||
BOOST_TEST( real(b[0]) == -2.07143 );
|
||||
BOOST_TEST( real(b[1]) == 0.357143 );
|
||||
BOOST_TEST( real(b[2]) == 0.5 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> const b = {1., 3., 4.};
|
||||
auto b_copy = blas::trsv(filling::upper, A, b); // B<-Solve(A.X==B), B<-A⁻¹.B, B⊤<-(A⁻¹.B)⊤, B<-B⊤.A⁻¹⊤
|
||||
BOOST_TEST( real(b[0]) == 1. );
|
||||
BOOST_TEST( real(b_copy[0]) == -2.07143 );
|
||||
BOOST_TEST( real(b_copy[1]) == 0.357143 );
|
||||
BOOST_TEST( real(b_copy[2]) == 0.5 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> const b = {1., 3., 4.};
|
||||
auto b_copy = blas::trsv(filling::upper, diagonal::general, A, b); // B<-Solve(A.X==B), B<-A⁻¹.B, B⊤<-(A⁻¹.B)⊤, B<-B⊤.A⁻¹⊤
|
||||
BOOST_TEST( real(b[0]) == 1. );
|
||||
BOOST_TEST( real(b_copy[0]) == -2.07143 );
|
||||
BOOST_TEST( real(b_copy[1]) == 0.357143 );
|
||||
BOOST_TEST( real(b_copy[2]) == 0.5 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> b = {3., 3., 1.};
|
||||
trsv(filling::lower, diagonal::general, transposed(A), b); // B<-Solve(A.X==B), B<-A⊤⁻¹.B, B⊤<-(A⊤⁻¹.B)⊤, B<-B⊤.A⁻¹
|
||||
BOOST_TEST( real(b[0]) == 3. );
|
||||
BOOST_TEST( real(b[1]) == -0.857143 );
|
||||
BOOST_TEST( real(b[2]) == -1.26786 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> b = {3., 3., 1.};
|
||||
// trsv(filling::lower, diagonal::general, hermitized(A), b); // B<-Solve(A.X==B), B<-A⊤⁻¹.B, B⊤<-(A⊤⁻¹.B)⊤, B<-B⊤.A⁻¹
|
||||
// BOOST_TEST( real(b[0]) == 3. );
|
||||
// BOOST_TEST( real(b[1]) == -0.857143 );
|
||||
// BOOST_TEST( real(b[2]) == -1.26786 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> b = {3., 3., 1.};
|
||||
// trsv(filling::lower, diagonal::general, hermitized(A), conjugated(b)); // B<-Solve(A.X==B), B<-A⊤⁻¹.B, B⊤<-(A⊤⁻¹.B)⊤, B<-B⊤.A⁻¹
|
||||
// BOOST_TEST( real(b[0]) == 3. );
|
||||
// BOOST_TEST( real(b[1]) == -0.857143 );
|
||||
// BOOST_TEST( real(b[2]) == -1.26786 );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsv_complex_square, *utf::tolerance(0.00001)){
|
||||
namespace blas = multi::blas;
|
||||
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 1.*I, 3. - 2.*I, 4. + 1.*I},
|
||||
{NAN , 7. - 10.*I, 1. + 2.*I},
|
||||
{NAN , NAN , 8. + 1.*I}
|
||||
};
|
||||
using blas::filling;
|
||||
using blas::diagonal;
|
||||
using blas::transposed;
|
||||
using blas::hermitized;
|
||||
using blas::conjugated;
|
||||
using blas::trsv;
|
||||
{
|
||||
multi::array<complex, 1> b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I};
|
||||
blas::trsv(filling::upper, diagonal::general, A, b); // B<-Solve(A.X==B), B<-A⁻¹.B, B⊤<-(A⁻¹.B)⊤, B<-B⊤.A⁻¹⊤
|
||||
BOOST_TEST( real(b[0]) == -1.37259 );
|
||||
BOOST_TEST( real(b[1]) == 0.2127 );
|
||||
BOOST_TEST( real(b[2]) == 0.569231 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I};
|
||||
trsv(filling::lower, diagonal::general, transposed(A), b); // B<-Solve(A.X==B), B<-A⊤⁻¹.B, B⊤<-(A⊤⁻¹.B)⊤, B<-B⊤.A⁻¹
|
||||
BOOST_TEST( real(b[0]) == 1.5 ); BOOST_TEST( imag(b[0]) == 0.5 );
|
||||
BOOST_TEST( real(b[1]) == -0.285235 ); BOOST_TEST( imag(b[1]) == -0.0503356 );
|
||||
BOOST_TEST( real(b[2]) == -0.129272 ); BOOST_TEST( imag(b[2]) == 0.28126 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I};
|
||||
trsv(filling::upper, diagonal::general, blas::H(A), b); // B<-Solve(A.X==B), B<-A⊤⁻¹.B, B⊤<-(A⊤⁻¹.B)⊤, B<-B⊤.A⁻¹
|
||||
print_1D(b);
|
||||
BOOST_TEST( real(b[0]) == -0.661693 ); BOOST_TEST( imag(b[0]) == -1.13934 );
|
||||
BOOST_TEST( real(b[1]) == 0.135261 ); BOOST_TEST( imag(b[1]) == -0.0283944 );
|
||||
BOOST_TEST( real(b[2]) == 0.415385 ); BOOST_TEST( imag(b[2]) == 0.676923 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> b = {1. - 2.*I, 3. - 1.*I, 4. - 5.*I};
|
||||
trsv(filling::upper, diagonal::general, blas::H(A), blas::conj(b)); // B<-Solve(A.X==B), B<-A⊤⁻¹.B, B⊤<-(A⊤⁻¹.B)⊤, B<-B⊤.A⁻¹
|
||||
// print_1D(b);
|
||||
// BOOST_TEST( real(conjugated(b)[0]) == -0.661693 ); BOOST_TEST( imag(conjugated(b)[0]) == -1.13934 );
|
||||
// BOOST_TEST( real(conjugated(b)[1]) == 0.135261 ); BOOST_TEST( imag(conjugated(b)[1]) == -0.0283944 );
|
||||
// BOOST_TEST( real(conjugated(b)[2]) == 0.415385 ); BOOST_TEST( imag(conjugated(b)[2]) == 0.676923 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 1> b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I};
|
||||
// trsv(filling::lower, diagonal::general, hermitized(A), b); // B<-Solve(A.X==B), B<-A⊤⁻¹.B, B⊤<-(A⊤⁻¹.B)⊤, B<-B⊤.A⁻¹
|
||||
// BOOST_TEST( real(b[0]) == -0.5 ); BOOST_TEST( imag(b[0]) == 1.5 );
|
||||
// BOOST_TEST( real(b[1]) == 0.184564 ); BOOST_TEST( imag(b[1]) == -0.620805 );
|
||||
// BOOST_TEST( real(b[2]) == 0.691791 ); BOOST_TEST( imag(b[2]) == 0.0227155 );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_1x1, *utf::tolerance(0.00001)){
|
||||
multi::array<double, 2> const A = {
|
||||
{10.,},
|
||||
};
|
||||
using multi::blas::side;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::diagonal;
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{3.,},
|
||||
};
|
||||
trsm(filling::upper, diagonal::general, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_TEST( B[0][0] == 3./10. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{3.,},
|
||||
};
|
||||
trsm(filling::upper, diagonal::general, 2., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_TEST( B[0][0] == 2.*3./10. );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{3., 4., 5.},
|
||||
};
|
||||
trsm(filling::upper, diagonal::general, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_TEST( B[0][1] == 4./10. );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_0x0, *utf::tolerance(0.00001)){
|
||||
multi::array<double, 2> const A;
|
||||
using multi::blas::side;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::diagonal;
|
||||
{
|
||||
multi::array<double, 2> B;
|
||||
trsm(filling::upper, diagonal::general, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_nonsquare, *utf::tolerance(0.00001)){
|
||||
multi::array<double, 2> const A = {
|
||||
{ 1., 3., 4.},
|
||||
{ 0., 7., 1.},
|
||||
{ 0., 0., 8.}
|
||||
};
|
||||
using multi::blas::side;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::diagonal;
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1., 3., 4., 8.},
|
||||
{2., 7., 1., 9.},
|
||||
{3., 4., 2., 1.},
|
||||
};
|
||||
multi::array<double, 2> BT = rotated(B);
|
||||
trsm(filling::upper, diagonal::general, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_TEST( B[1][2] == 0.107143 );
|
||||
|
||||
trsm(filling::upper, diagonal::general, 1., A, rotated(BT));
|
||||
BOOST_TEST( rotated(BT)[1][2] == 0.107143 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1., 3., 4., 8.},
|
||||
{2., 7., 1., 9.},
|
||||
{3., 4., 2., 1.},
|
||||
};
|
||||
multi::array<double, 2> AT = rotated(A);
|
||||
multi::array<double, 2> BT = rotated(B);
|
||||
trsm(filling::upper, diagonal::general, 1., rotated(AT), B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_TEST( B[1][2] == 0.107143 );
|
||||
|
||||
trsm(filling::upper, diagonal::general, 1., rotated(AT), rotated(BT));
|
||||
print(rotated(BT));
|
||||
BOOST_TEST( rotated(BT)[1][2] == 0.107143 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1.},
|
||||
{2.},
|
||||
{3.},
|
||||
};
|
||||
trsm(filling::upper, diagonal::general, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below)
|
||||
BOOST_TEST( B[2][0] == 0.375 );
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> B = {
|
||||
{1.},
|
||||
{2.},
|
||||
{3.},
|
||||
};
|
||||
multi::array<double, 2> BT = rotated(B);
|
||||
trsm(filling::upper, diagonal::general, 1., A, rotated(BT));
|
||||
BOOST_TEST( rotated(BT)[2][0] == 0.375 );
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_nonsquare_default_diagonal_gemm_check, *utf::tolerance(0.00001)){
|
||||
multi::array<double, 2> const A = {
|
||||
{ 1., 3., 4.},
|
||||
{ 0., 7., 1.},
|
||||
{ 0., 0., 8.}
|
||||
};
|
||||
using multi::blas::side;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::diagonal;
|
||||
{
|
||||
multi::array<double, 2> const B = {
|
||||
{1.},// 3., 4.},
|
||||
{2.},// 7., 1.},
|
||||
{3.},// 4., 2.},
|
||||
};
|
||||
using multi::blas::gemm;
|
||||
{
|
||||
auto S = trsm(filling::upper, diagonal::general, 1., A, B);
|
||||
BOOST_REQUIRE( S[2][0] == 0.375 );
|
||||
auto Bck=gemm(1., A, S);
|
||||
BOOST_REQUIRE( Bck[2][0] == 3. );
|
||||
for(int i{};i<3;++i)for(int j{};j<size(rotated(B));++j) BOOST_CHECK_SMALL(Bck[i][j]-B[i][j], 0.00001);
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> const BT = rotated(B);
|
||||
auto Bck=gemm(1., A, trsm(filling::upper, diagonal::general, 1., A, rotated(BT)));
|
||||
for(int i{};i<3;++i)for(int j{};j<size(rotated(B));++j) BOOST_CHECK_SMALL(Bck[i][j]-B[i][j], 0.00001);
|
||||
}
|
||||
{
|
||||
auto const AT = rotated(A);
|
||||
auto Bck=gemm(1., rotated(AT), trsm(filling::upper, diagonal::general, 1., rotated(AT), B));
|
||||
for(int i{};i<3;++i)for(int j{};j<size(rotated(B));++j) BOOST_CHECK_SMALL(Bck[i][j]-B[i][j], 0.00001);
|
||||
}
|
||||
{
|
||||
auto const AT =* rotated(A);
|
||||
auto const BT =* rotated(B);
|
||||
auto const Bck=gemm(1., A, trsm(filling::upper, diagonal::general, 1., rotated(AT), rotated(BT)));
|
||||
for(int i{};i<3;++i)for(int j{};j<size(rotated(B));++j) BOOST_REQUIRE_SMALL(Bck[i][j]-B[i][j], 0.00001);
|
||||
}
|
||||
{
|
||||
auto const AT =* rotated(A);
|
||||
auto const BT =* rotated(B);
|
||||
using multi::blas::trsm;
|
||||
// auto const Bck=gemm(A, trsm(rotated(AT), rotated(BT)));
|
||||
// for(int i{};i<3;++i)for(int j{};j<size(rotated(B));++j) BOOST_CHECK_SMALL(Bck[i][j]-B[i][j], 0.00001);
|
||||
}
|
||||
{
|
||||
using multi::blas::trsm;
|
||||
// auto const Bck=gemm(A, trsm(A, B));
|
||||
// for(int i{};i<3;++i)for(int j{};j<size(rotated(B));++j) BOOST_CHECK_SMALL(Bck[i][j]-B[i][j], 0.00001);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_1x1_check, *utf::tolerance(0.00001)){
|
||||
multi::array<double, 2> const A = {
|
||||
{ 4.},
|
||||
};
|
||||
using multi::blas::side;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::diagonal;
|
||||
{
|
||||
multi::array<double, 2> const B = {
|
||||
{5.},
|
||||
};
|
||||
{
|
||||
auto S = trsm(filling::upper, diagonal::general, 3., A, B);
|
||||
BOOST_REQUIRE( S[0][0] == 3.*5./4. );
|
||||
}
|
||||
{
|
||||
auto S = trsm(filling::upper, 1., A, B);
|
||||
BOOST_REQUIRE( S[0][0] == 1.*5./4. );
|
||||
}
|
||||
{
|
||||
auto S = trsm(filling::upper, A, B);
|
||||
BOOST_REQUIRE( S[0][0] == 1.*5./4. );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_1x1_check, *utf::tolerance(0.00001)){
|
||||
using complex = std::complex<double>; complex const I = complex{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 4. + 2.*I},
|
||||
};
|
||||
using multi::blas::side;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::diagonal;
|
||||
{
|
||||
multi::array<complex, 2> const B = {
|
||||
{5. + 1.*I},
|
||||
};
|
||||
using multi::blas::gemm;
|
||||
{
|
||||
auto S = trsm(filling::upper, diagonal::general, 3.+5.*I, A, B);
|
||||
BOOST_TEST( real(S[0][0]) == real((3.+5.*I)*B[0][0]/A[0][0]) );
|
||||
BOOST_TEST( imag(S[0][0]) == imag((3.+5.*I)*B[0][0]/A[0][0]) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_nonsquare_default_diagonal_one_check, *utf::tolerance(0.00001)){
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 4.*I, 3., 4.- 10.*I},
|
||||
{ 0., 7.- 3.*I, 1.},
|
||||
{ 0., 0., 8.- 2.*I}
|
||||
};
|
||||
using multi::blas::side;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::diagonal;
|
||||
{
|
||||
multi::array<complex, 2> const B = {
|
||||
{1. + 1.*I},
|
||||
{2. + 1.*I},
|
||||
{3. + 1.*I},
|
||||
};
|
||||
using multi::blas::gemm;
|
||||
{
|
||||
auto S = trsm(filling::upper, diagonal::general, 1., A, B);
|
||||
BOOST_TEST( real(S[2][0]) == 0.323529 );
|
||||
}
|
||||
{
|
||||
auto const BT = +rotated(B);
|
||||
auto S = trsm(filling::upper, diagonal::general, 1., A, rotated(BT));
|
||||
BOOST_TEST( real(S[2][0]) == 0.323529 );
|
||||
}
|
||||
{
|
||||
auto const AT = +rotated(A);
|
||||
auto S = trsm(filling::upper, diagonal::general, 1., rotated(AT), B);
|
||||
BOOST_TEST( real(S[2][0]) == 0.323529 );
|
||||
}
|
||||
{
|
||||
auto const AT = +rotated(A);
|
||||
auto const BT = +rotated(B);
|
||||
auto S = trsm(filling::upper, diagonal::general, 1., rotated(AT), rotated(BT));
|
||||
BOOST_TEST( real(S[2][0]) == 0.323529 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_nonsquare_default_diagonal_gemm_check, *utf::tolerance(0.00001)){
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 4.*I, 3., 4.- 10.*I},
|
||||
{ 0., 7.- 3.*I, 1.},
|
||||
{ 0., 0., 8.- 2.*I}
|
||||
};
|
||||
using multi::blas::side;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::diagonal;
|
||||
{
|
||||
multi::array<complex, 2> const B = {
|
||||
{1. + 1.*I, 5. + 3.*I},
|
||||
{2. + 1.*I, 9. + 3.*I},
|
||||
{3. + 1.*I, 1. - 1.*I},
|
||||
};
|
||||
using multi::blas::gemm;
|
||||
{
|
||||
auto S = trsm(filling::upper, diagonal::general, 1., A, B); // S = Ainv.B
|
||||
BOOST_TEST( real(S[2][1]) == 0.147059 );
|
||||
}
|
||||
{
|
||||
auto const BT = +rotated(B);
|
||||
auto S = trsm(filling::upper, diagonal::general, 1., A, rotated(BT));
|
||||
BOOST_TEST( real(S[2][1]) == 0.147059 );
|
||||
}
|
||||
{
|
||||
auto const AT = +rotated(A);
|
||||
auto S = trsm(filling::upper, diagonal::general, 1., rotated(AT), B);
|
||||
BOOST_TEST( real(S[2][1]) == 0.147059 );
|
||||
}
|
||||
{
|
||||
auto const AT = +rotated(A);
|
||||
auto const BT = +rotated(B);
|
||||
auto S = trsm(filling::upper, diagonal::general, 1., rotated(AT), rotated(BT));
|
||||
BOOST_TEST( real(S[2][1]) == 0.147059 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check, *utf::tolerance(0.00001)){
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 4.*I, 3., 4.- 10.*I},
|
||||
{ 0., 7.- 3.*I, 1.},
|
||||
{ 0., 0., 8.- 2.*I}
|
||||
};
|
||||
using multi::blas::filling;
|
||||
using multi::blas::diagonal;
|
||||
{
|
||||
multi::array<complex, 2> const B = {
|
||||
{1. + 1.*I, 5. + 3.*I},
|
||||
{2. + 1.*I, 9. + 3.*I},
|
||||
{3. + 1.*I, 1. - 1.*I},
|
||||
};
|
||||
using multi::blas::hermitized;
|
||||
{
|
||||
auto S = trsm(filling::lower, diagonal::general, 1., hermitized(A), B); // S = A⁻¹†.B, S† = B†.A⁻¹
|
||||
BOOST_TEST( real(S[2][1]) == 1.71608 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> const B = {
|
||||
{1. + 1.*I, 2. + 1.*I, 3. + 1.*I},
|
||||
{5. + 3.*I, 9. + 3.*I, 1. - 1.*I}
|
||||
};
|
||||
auto S =* trsm(filling::upper, 1., A, hermitized(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹†
|
||||
BOOST_TEST( imag(S[2][1]) == +0.147059 );
|
||||
BOOST_TEST( imag(B[1][2]) == -0.147059 );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> const B = {
|
||||
{1. + 1.*I, 2. + 1.*I, 3. + 1.*I},
|
||||
{5. + 3.*I, 9. + 3.*I, 1. - 1.*I}
|
||||
};
|
||||
auto S =* trsm(filling::upper, 2., A, hermitized(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹†
|
||||
BOOST_TEST( imag(S[2][1]) == +0.147059*2. );
|
||||
BOOST_TEST( imag(B[1][2]) == -0.147059*2. );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const, *utf::tolerance(0.00001)){
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const A = {
|
||||
{ 1. + 4.*I, 3., 4.- 10.*I},
|
||||
{ 0., 7.- 3.*I, 1.},
|
||||
{ 0., 0., 8.- 2.*I}
|
||||
};
|
||||
multi::array<complex, 2> B = {
|
||||
{1. + 1.*I, 2. + 1.*I, 3. + 1.*I},
|
||||
{5. + 3.*I, 9. + 3.*I, 1. - 1.*I}
|
||||
};
|
||||
using multi::blas::trsm;
|
||||
using multi::blas::filling;
|
||||
using multi::blas::hermitized;
|
||||
trsm(filling::upper, A, hermitized(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)†
|
||||
BOOST_TEST( imag(B[1][2]) == -0.147059 );
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,283 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*-
|
||||
$CXX $0 -o $0x -lcudart -lboost_unit_test_framework -lboost_timer -ldl&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2019-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_CUDA_HPP
|
||||
#define MULTI_ADAPTORS_CUDA_HPP
|
||||
|
||||
#include "../memory/adaptors/cuda/allocator.hpp"
|
||||
#include "../memory/adaptors/cuda/managed/allocator.hpp"
|
||||
#include "../adaptors/cuda/algorithms/copy.hpp"
|
||||
|
||||
#include "../array.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace cuda{
|
||||
|
||||
template<class T>
|
||||
using allocator = multi::memory::cuda::allocator<T>;
|
||||
|
||||
template<class T> using ptr = multi::memory::cuda::ptr<T>;
|
||||
|
||||
template<class T, multi::dimensionality_type D>
|
||||
using array = multi::array<T, D, cuda::allocator<T>>;
|
||||
|
||||
template<class T, multi::dimensionality_type D>
|
||||
using array_ref = multi::array_ref<T, D, cuda::ptr<T>>;
|
||||
|
||||
template<class T, multi::dimensionality_type D>
|
||||
using static_array = multi::static_array<T, D, cuda::allocator<T>>;
|
||||
|
||||
// template<class A> auto raw_array_cast(A&& a)
|
||||
// ->decltype(static_array_cast<typename A::element_type, decltype(raw_pointer_cast(base(std::forward<A>(a))))>(std::forward<A>(a))){
|
||||
// return static_array_cast<typename A::element_type, decltype(raw_pointer_cast(base(std::forward<A>(a))))>(std::forward<A>(a));}
|
||||
|
||||
template<class A> auto raw_array_cast(A&& a)
|
||||
->decltype(std::forward<A>(a).template static_array_cast<typename A::element_type, decltype(raw_pointer_cast(base(std::forward<A>(a))))>()){
|
||||
return std::forward<A>(a).template static_array_cast<typename A::element_type, decltype(raw_pointer_cast(base(std::forward<A>(a))))>();}
|
||||
|
||||
namespace managed{
|
||||
template<class T>
|
||||
using allocator = multi::memory::cuda::managed::allocator<T>;
|
||||
|
||||
template<class T> using ptr = multi::memory::cuda::managed::ptr<T>;
|
||||
|
||||
template<class T, multi::dimensionality_type D>
|
||||
using array = multi::array<T, D, cuda::managed::allocator<T>>;
|
||||
|
||||
template<class T, multi::dimensionality_type D>
|
||||
using array_ref = multi::array<T, D, multi::memory::cuda::managed::ptr<T>>;
|
||||
|
||||
template<class T, multi::dimensionality_type D>
|
||||
using static_array = multi::array<T, D, multi::memory::cuda::managed::ptr<T>>;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
auto copy(const double* first, const double* last, boost::multi::array_iterator<double, 1, boost::multi::memory::cuda::managed::ptr<double, double*>, double&> d_first){
|
||||
return copy(
|
||||
boost::multi::array_iterator<double, 1, double const*, double const&>(first),
|
||||
boost::multi::array_iterator<double, 1, double const*, double const&>(last),
|
||||
d_first
|
||||
);
|
||||
}*/
|
||||
|
||||
}}
|
||||
|
||||
#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA adaptor"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
#include<boost/timer/timer.hpp>
|
||||
|
||||
#include<chrono>
|
||||
#include<numeric>
|
||||
|
||||
template <class T>
|
||||
__attribute__((always_inline)) inline void DoNotOptimize(const T &value) {
|
||||
asm volatile("" : "+m"(const_cast<T &>(value)));
|
||||
}
|
||||
|
||||
struct watch : private std::chrono::high_resolution_clock{
|
||||
std::string label_; time_point start_;
|
||||
watch(std::string label ="") : label_{label}, start_{now()}{}
|
||||
~watch(){
|
||||
std::cerr<< label_<<": "<< std::chrono::duration<double>(now() - start_).count() <<" sec"<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace cuda = multi::cuda;
|
||||
namespace utf = boost::unit_test::framework;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_construct_1d){
|
||||
multi::array<double, 1> A(4, 99.);
|
||||
cuda::array<double, 1> Agpu{A};
|
||||
BOOST_REQUIRE( extensions(A) == extensions(Agpu) );
|
||||
BOOST_REQUIRE( Agpu == A );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_copy_1d){
|
||||
multi::array<double, 1> A(4, 99.);
|
||||
cuda::array<double, 1> Agpu(4);
|
||||
BOOST_REQUIRE( extensions(A) == extensions(Agpu) );
|
||||
Agpu({0, 4}) = A({0, 4});
|
||||
BOOST_REQUIRE( Agpu == A );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_construct_2d){
|
||||
multi::array<double, 2> A({4, 6}, 99.);
|
||||
cuda::array<double, 2> Agpu{A};
|
||||
BOOST_REQUIRE( extensions(A) == extensions(Agpu) );
|
||||
BOOST_REQUIRE( Agpu == A );
|
||||
|
||||
A[1][1] = Agpu[1][1];
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_copy_2d){
|
||||
multi::array<double, 2> A({4, 6}); std::iota(A.data_elements(), A.data_elements() + A.num_elements(), 1.);
|
||||
cuda::array<double, 2> Agpu({4, 6}, 99.);
|
||||
BOOST_REQUIRE( extensions(A) == extensions(Agpu) );
|
||||
Agpu({0, 4}, {1, 6}) = A({0, 4}, {1, 6});
|
||||
BOOST_REQUIRE( Agpu != A );
|
||||
Agpu = A;
|
||||
BOOST_REQUIRE( Agpu == A );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_1d_initializer_list){
|
||||
cuda::array<double, 1> Bgpu = {1., 2., 3., 4.};
|
||||
BOOST_REQUIRE( Bgpu[1] == 2. );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_2d_initializer_list){
|
||||
cuda::array<double, 2> Bgpu = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{9., 10., 11., 12.},
|
||||
};
|
||||
BOOST_REQUIRE( size(Bgpu) == 3 );
|
||||
BOOST_REQUIRE( Bgpu[1][1] == 6. );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_2d_initializer_list_bis){
|
||||
multi::array<double, 2> A({3, 4}); std::iota(A.data_elements(), A.data_elements() + A.num_elements(), 1.);
|
||||
|
||||
cuda::array<double, 2> Agpu({3, 4}, 99.);
|
||||
BOOST_REQUIRE( extensions(A) == extensions(Agpu) );
|
||||
Agpu({0, 3}, {1, 4}) = A({0, 3}, {1, 4});
|
||||
BOOST_REQUIRE( Agpu != A );
|
||||
Agpu = A;
|
||||
BOOST_REQUIRE( Agpu == A );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_copy_vs_move){
|
||||
cuda::array<double, 4> Agpu({30, 100, 100, 100}, 99.);
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" COPY"}]{
|
||||
cuda::array<double, 4> Agpu_cpy = Agpu;
|
||||
BOOST_REQUIRE( &Agpu_cpy[1][2][3][4] != &Agpu[1][2][3][4] );
|
||||
BOOST_REQUIRE( Agpu_cpy[1][2][3][4] == Agpu[1][2][3][4] );
|
||||
}();
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" MOVE"}]{
|
||||
cuda::array<double, 4> Agpu_mov = std::move(Agpu);
|
||||
BOOST_REQUIRE( Agpu.empty() );
|
||||
BOOST_REQUIRE( Agpu_mov.size() == 30 );
|
||||
}();
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_copy_vs_move_complex){
|
||||
using complex = std::complex<double>;
|
||||
cuda::array<complex, 4> Agpu({30, 100, 100, 100}, 99.);
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" COPY"}]{
|
||||
cuda::array<complex, 4> Agpu_cpy = Agpu;
|
||||
BOOST_REQUIRE( &Agpu_cpy[1][2][3][4] != &Agpu[1][2][3][4] );
|
||||
BOOST_REQUIRE( Agpu_cpy[1][2][3][4] == Agpu[1][2][3][4] );
|
||||
}();
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" MOVE"}]{
|
||||
cuda::array<complex, 4> Agpu_mov = std::move(Agpu);
|
||||
BOOST_REQUIRE( Agpu.empty() );
|
||||
BOOST_REQUIRE( Agpu_mov.size() == 30 );
|
||||
}();
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_managed_double){
|
||||
cuda::managed::array<double, 4> A({2,3,4,5});
|
||||
cuda::managed::array<double, 4> B({2,3,4,5}, 0.);
|
||||
cuda::managed::array<double, 4> C({2,3,4,5}, 5.);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_managed_ai3){
|
||||
using ai3 = std::array<int, 3>;
|
||||
cuda::managed::array<ai3, 4> A({2,3,4,5}); // default initialize elements
|
||||
cuda::managed::array<ai3, 4> B({2,3,4,5}, ai3{} ); // value initialize elements
|
||||
cuda::managed::array<ai3, 4> C({2,3,4,5}, ai3{11, 22, 33} ); // value initialize elements
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptor_cuda_decay){
|
||||
cuda::array<double, 2> A = {
|
||||
{1., 2., 3., 4.},
|
||||
{5., 6., 7., 8.},
|
||||
{1., 2., 3., 4.}
|
||||
};
|
||||
cuda::array<double, 1> A1 = A[1];
|
||||
|
||||
// cuda::array<complex, 2> A = {
|
||||
// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I},
|
||||
// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I},
|
||||
// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I}
|
||||
// };
|
||||
// cuda::array<complex
|
||||
}
|
||||
|
||||
#if 0
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_copy){
|
||||
|
||||
multi::array<double, 2> A({4, 5}, 99.);
|
||||
cuda::array<double, 2> Agpu = A;
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_adaptors_cuda){
|
||||
|
||||
multi::array<double, 2> A({4, 5}, 99.);
|
||||
cuda::array<double, 2> Agpu = A;
|
||||
assert( Agpu == A );
|
||||
|
||||
cuda::managed::array<double, 2> Amng = A;
|
||||
assert( Amng == Agpu );
|
||||
|
||||
cuda::array_ref<double, 2> Rgpu(data_elements(Agpu), extensions(Agpu));
|
||||
|
||||
{std::allocator<double> a = get_allocator(A);}
|
||||
|
||||
{
|
||||
cuda::ptr<double> p;
|
||||
using multi::get_allocator;
|
||||
cuda::allocator<double> a = get_allocator(p); (void)a;
|
||||
}
|
||||
{
|
||||
cuda::managed::ptr<double> p;
|
||||
using multi::get_allocator;
|
||||
cuda::managed::allocator<double> a = get_allocator(p); (void)a;
|
||||
}
|
||||
{
|
||||
double* p = nullptr;
|
||||
using multi::get_allocator;
|
||||
std::allocator<double> a = get_allocator(p); (void)a;
|
||||
}
|
||||
{
|
||||
multi::array<double, 2> arr;
|
||||
std::allocator<double> a = get_allocator(arr);
|
||||
}
|
||||
{
|
||||
cuda::array<double, 2> arr;
|
||||
cuda::allocator<double> a = get_allocator(arr); (void)a;
|
||||
}
|
||||
{
|
||||
// cuda::array<double, 0> arr = 45.;
|
||||
// BOOST_REQUIRE( arr() == 45. );
|
||||
}
|
||||
{
|
||||
// cuda::managed::array<double, 0> arr = 45.;
|
||||
// BOOST_REQUIRE( arr() == 45. );
|
||||
}
|
||||
{
|
||||
cuda::managed::array<double, 1> arr = {1.2, 3.4, 4.5};
|
||||
}
|
||||
{
|
||||
using complex = std::complex<double>;
|
||||
cuda::managed::array<complex, 2> a({1000, 1000}, 99.);
|
||||
BOOST_REQUIRE( size(a) == 1000 );
|
||||
cuda::managed::array<complex, 2> b;
|
||||
b = std::move(a);
|
||||
BOOST_REQUIRE( size(b) == 1000 );
|
||||
BOOST_REQUIRE( size(a) == 0 );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="CPP_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/cuda.iml" filepath="$PROJECT_DIR$/.idea/cuda.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
|
@ -1,6 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
|
@ -1,20 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.11)
|
||||
project(boost-multi-adaptor-cuda VERSION 0.1 LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
enable_language(CUDA)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++17 --extended-lambda --expt-relaxed-constexpr -Xcudafe \"--display_error_number --set_error_limit=2 --verbose_diagnostics --emit_warnings_as_errors --diag_suppress=implicit_return_from_non_void_function\"")
|
||||
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||
|
||||
enable_testing()
|
||||
list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.1
|
||||
|
||||
add_subdirectory(thrust/test)
|
||||
add_subdirectory(cublas/test)
|
||||
|
|
@ -1,235 +0,0 @@
|
|||
#ifdef COMPILATION_INSTRUCTIONS//-*-indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4;-*-
|
||||
nvcc -D_TEST_MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY -x cu $0 -o $0x -lboost_unit_test_framework -lboost_timer&&$0x&&
|
||||
clang++ -D_TEST_MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY -x cuda --cuda-gpu-arch=sm_61 -std=c++14 $0 -o $0x -lcudart -lboost_unit_test_framework -lboost_timer&&$0x&&
|
||||
rm $0x; exit
|
||||
#endif
|
||||
|
||||
#ifndef MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY_HPP
|
||||
#define MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY_HPP
|
||||
|
||||
#include<cassert>
|
||||
#include<iostream>
|
||||
|
||||
#include "../../../adaptors/cuda.hpp"
|
||||
//#include "../algorithms/for_each.hpp"
|
||||
|
||||
//#include "/home/correaa/prj/alf/boost/iterator/zipper.hpp"
|
||||
|
||||
#ifndef HD
|
||||
#if defined(__CUDACC__)
|
||||
#define HD __host__ __device__
|
||||
#else
|
||||
#define HD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace boost{
|
||||
namespace multi{namespace cuda{
|
||||
|
||||
#if 0
|
||||
template<typename From, typename To, typename = std::enable_if_t<std::is_trivially_assignable<To&, From>{}> >
|
||||
array_iterator<To, 1, To*> copy(
|
||||
array_iterator<From, 1, memory::cuda::ptr<To>> f,
|
||||
array_iterator<From, 1, memory::cuda::ptr<To>> l,
|
||||
array_iterator<To, 1, To*> d
|
||||
){
|
||||
assert(0);
|
||||
assert(f.stride() == l.stride()); static_assert(sizeof(From) == sizeof(To), "!");
|
||||
auto n = std::distance(f, l);
|
||||
if(f.stride()==1 and d.stride()==1){
|
||||
auto s = cudaMemcpy(d.data(), raw_pointer_cast(f.data()), n*sizeof(To), cudaMemcpyDeviceToHost); assert( s == cudaSuccess );
|
||||
}else{
|
||||
auto s = cudaMemcpy2D(d.data(), d.stride()*sizeof(To), raw_pointer_cast(f.data()), f.stride()*sizeof(To), sizeof(To), n, cudaMemcpyDeviceToHost);
|
||||
assert( s == cudaSuccess );
|
||||
}
|
||||
return d + n;
|
||||
}
|
||||
|
||||
template<typename From, typename From2, typename To, typename To2, typename = std::enable_if_t<std::is_trivially_assignable<To&, From>{}> >
|
||||
array_iterator<To, 1, To*> copy(
|
||||
array_iterator<From, 1, memory::cuda::ptr<From2>> f,
|
||||
array_iterator<From, 1, memory::cuda::ptr<From2>> l,
|
||||
array_iterator<To , 1, memory::cuda::ptr<To2> > d
|
||||
){
|
||||
assert(0);
|
||||
assert(f.stride() == l.stride()); static_assert(sizeof(From) == sizeof(To), "!");
|
||||
auto n = std::distance(f, l);
|
||||
if(f.stride()==1 and d.stride()==1){
|
||||
auto s = cudaMemcpy(raw_pointer_cast(d.data()), raw_pointer_cast(f.data()), n*sizeof(To), cudaMemcpyDeviceToHost); assert( s == cudaSuccess );
|
||||
}else{
|
||||
auto s = cudaMemcpy2D(raw_pointer_cast(d.data()), d.stride()*sizeof(To), raw_pointer_cast(f.data()), f.stride()*sizeof(To), sizeof(To), n, cudaMemcpyDeviceToDevice);
|
||||
assert( s == cudaSuccess );
|
||||
}
|
||||
return d + n;
|
||||
}
|
||||
#endif
|
||||
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
#ifdef _TEST_MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA copy"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../../adaptors/cuda.hpp"
|
||||
|
||||
#include <thrust/for_each.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
|
||||
#include <boost/timer/timer.hpp>
|
||||
|
||||
#if __cpp_lib_parallel_algorithm >= 201603
|
||||
#include<execution>
|
||||
#endif
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace cuda = multi::memory::cuda;
|
||||
|
||||
template<class T> __device__ void WHAT(T&&) = delete;
|
||||
template<class T> __device__ void WHAT(int) = delete;
|
||||
|
||||
template<class T> T&& what(T&&) = delete;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(copy_by_iterator){
|
||||
auto const A_cpu = []{
|
||||
multi::array<double, 2> r({198, 23});
|
||||
std::generate(r.data_elements(), r.data_elements()+r.num_elements(), &std::rand);
|
||||
return r;
|
||||
}();
|
||||
multi::cuda::array<double, 2> A = A_cpu;
|
||||
|
||||
multi::cuda::array<double, 2> B(extensions(A));
|
||||
B() = A();
|
||||
// BOOST_REQUIRE( A[13] == B[13] );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(copy_by_pointer){
|
||||
auto const A_cpu = []{
|
||||
multi::array<double, 2> r({198, 23});
|
||||
std::generate(r.data_elements(), r.data_elements()+r.num_elements(), &std::rand);
|
||||
return r;
|
||||
}();
|
||||
multi::cuda::array<double, 2> A = A_cpu;
|
||||
|
||||
multi::cuda::array<double, 2> B(extensions(A));
|
||||
B = A;
|
||||
// BOOST_REQUIRE( A[13] == B[13] );
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cuda_copy){
|
||||
|
||||
multi::cuda::array<double, 1> A(1<<27); CUDA_SLOW( A[10] = 99. );
|
||||
multi::cuda::array<double, 1> B(size(A));
|
||||
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"thrust copy_n cuda::ptr %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
thrust::copy_n(thrust::device, A.data_elements(), A.num_elements(), B.data_elements());
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cuda copy_n cuda::ptr copy_n %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
copy_n(A.data_elements(), A.num_elements(), B.data_elements());
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cuda copy_n cuda::ptr copy_n %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
multi::adl::copy_n(A.data_elements(), A.num_elements(), B.data_elements());
|
||||
}
|
||||
#if 0
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cuda ptr copy_n %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
/*multi::cuda::*/copy_n(A.data_elements(), A.num_elements(), B.data_elements());
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"indirect cuda ptr copy_n %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
B = A;
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"indirect cuda ptr uninitialized_copy_n %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
multi::cuda::array<double, 1> C = A;
|
||||
BOOST_REQUIRE( CUDA_SLOW( C[10] == 99. ) );
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"indirect cuda ptr uninitialized_copy_n %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
multi::cuda::array<double, 1> C = A;//();
|
||||
BOOST_REQUIRE( CUDA_SLOW( C[10] == 99. ) );
|
||||
}
|
||||
BOOST_REQUIRE( CUDA_SLOW( B[10] == 99. ) );
|
||||
CUDA_SLOW( B[10] = 10. );
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"thrust copy_n %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
thrust::copy_n(thrust::device, begin(A), size(A), begin(B));
|
||||
}
|
||||
BOOST_REQUIRE( CUDA_SLOW( B[10] == 99. ) );
|
||||
#endif
|
||||
|
||||
/* multi::cuda::for_each_n(
|
||||
boost::iterators::zip(begin(A), begin(B)),
|
||||
size(A),
|
||||
[]__device__(auto&& e){
|
||||
std::get<1>(e) = std::get<0>(e);
|
||||
printf( "**** %f %f\n", static_cast<double const&>(std::get<0>(e)), static_cast<double const&>(std::get<1>(e)) );
|
||||
}
|
||||
);*/
|
||||
|
||||
// auto l =
|
||||
// BOOST_REQUIRE( l == end(B) );
|
||||
// std::cout << B[8] << std::endl;
|
||||
// multi::cuda::array<double, 1> A(10, 99.);
|
||||
// BOOST_REQUIRE( CUDA_SLOW( A[5] == 99. ) );
|
||||
// int uno = 1.;
|
||||
// for_each(begin(A), end(A), [uno]__device__(auto&& e){e = uno;});
|
||||
// BOOST_REQUIRE( CUDA_SLOW( A[5] == 1. ) );
|
||||
}
|
||||
|
||||
#if 0
|
||||
BOOST_AUTO_TEST_CASE(cuda_for_each){
|
||||
multi::cuda::array<double, 1> A(10, 99.);
|
||||
BOOST_REQUIRE( CUDA_SLOW( A[5] == 99. ) );
|
||||
int uno = 1.;
|
||||
for_each(begin(A), end(A), [uno]__device__(auto&& e){e = uno;});
|
||||
BOOST_REQUIRE( CUDA_SLOW( A[5] == 1. ) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cuda_timing){
|
||||
multi::cuda::managed::array<double, 1> A(1<<29); //std::cout << A.size()*8 << std::endl;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cuda cold %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
multi::cuda::for_each(begin(A), end(A), []__device__(auto&& e){e = 11.;});
|
||||
} BOOST_REQUIRE( CUDA_SLOW( A[size(A) - 10] == 11.) );
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cuda %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
multi::cuda::for_each(begin(A), end(A), []__device__(auto&& e){e = 22.;});
|
||||
} BOOST_REQUIRE( CUDA_SLOW( A[size(A) - 10] == 22.) );
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"thrust %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
thrust::for_each(thrust::device, begin(A), end(A), []__device__(auto&& e){e = 222.;});
|
||||
} BOOST_REQUIRE( CUDA_SLOW( A[size(A) - 10] == 222.) );
|
||||
{
|
||||
std::for_each(begin(A), end(A), [](auto&& e){e = 55.;});
|
||||
} BOOST_REQUIRE( CUDA_SLOW( A[size(A) - 10] == 55.) );
|
||||
#if __cpp_lib_parallel_algorithm >= 201603
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"par %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
std::for_each(std::execution::par_unseq, begin(A), end(A), [](auto&& e){e = 33.;});
|
||||
} BOOST_REQUIRE( CUDA_SLOW( A[size(A) - 10] == 33.) );
|
||||
#endif
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"seq %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
std::for_each(begin(A), end(A), [](auto&& e){e = 55.;});
|
||||
} BOOST_REQUIRE( CUDA_SLOW( A[size(A) - 10] == 55.) );
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cuda cold %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
multi::cuda::for_each(begin(A), end(A), []__device__(auto&& e){e = 66.;});
|
||||
} BOOST_REQUIRE( CUDA_SLOW( A[size(A) - 10] == 66.) );
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cuda %ws wall, %us user + %ss system = %ts CPU (%p%)\n"};
|
||||
multi::cuda::for_each(begin(A), end(A), []__device__(auto&& e){e = 77.;});
|
||||
} BOOST_REQUIRE( CUDA_SLOW( A[size(A) - 10] == 77.) );
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2021
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "./cublas/context.hpp"
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
#ifndef MULTI_ADAPTORS_CUDA_CUBLAS_CALL_HPP
|
||||
#define MULTI_ADAPTORS_CUDA_CUBLAS_CALL_HPP
|
||||
|
||||
#include "../cublas/error.hpp"
|
||||
|
||||
#include<cuda_runtime.h> // cudaDeviceSynchronize
|
||||
|
||||
namespace boost{
|
||||
namespace multi::cuda::cublas{
|
||||
|
||||
template<auto Function, class... Args> // needs C++17
|
||||
void call(Args... args){
|
||||
auto e = static_cast<enum cublas::error>(Function(args...));
|
||||
if(e != cublas::error::success) throw std::system_error{e, "cannot call function "+ std::string{__PRETTY_FUNCTION__}};
|
||||
}
|
||||
|
||||
#define CUBLAS_(F) call<F>
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,212 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2020-2021
|
||||
|
||||
#ifndef MULTI_ADAPTORS_CUDA_CUBLAS_CONTEXT_HPP
|
||||
#define MULTI_ADAPTORS_CUDA_CUBLAS_CONTEXT_HPP
|
||||
|
||||
#include "../../../config/MARK.hpp"
|
||||
#include "../../../adaptors/cuda/cublas/call.hpp"
|
||||
|
||||
#include "../../../adaptors/blas/traits.hpp"
|
||||
#include "../../../adaptors/blas/core.hpp"
|
||||
|
||||
#include "../../../memory/adaptors/cuda/ptr.hpp"
|
||||
#include "../../../memory/adaptors/cuda/managed/ptr.hpp"
|
||||
|
||||
#include <thrust/system/cuda/memory.h>
|
||||
|
||||
#include<mutex>
|
||||
|
||||
namespace boost{
|
||||
namespace multi::cuda::cublas{
|
||||
|
||||
class operation{
|
||||
cublasOperation_t impl_;
|
||||
public:
|
||||
operation(char trans) : impl_{[=]{
|
||||
switch(trans){
|
||||
case 'N': return CUBLAS_OP_N;
|
||||
case 'T': return CUBLAS_OP_T;
|
||||
case 'C': return CUBLAS_OP_C;
|
||||
default : assert(0);
|
||||
}
|
||||
return cublasOperation_t{};
|
||||
}()}{}
|
||||
operator cublasOperation_t() const{return impl_;}
|
||||
};
|
||||
|
||||
class side{
|
||||
cublasSideMode_t impl_;
|
||||
public:
|
||||
side(char trans) : impl_{[=]{
|
||||
switch(trans){
|
||||
case 'L': return CUBLAS_SIDE_LEFT;
|
||||
case 'R': return CUBLAS_SIDE_RIGHT;
|
||||
}
|
||||
assert(0); return cublasSideMode_t{};
|
||||
}()}{}
|
||||
operator cublasSideMode_t() const{return impl_;}
|
||||
};
|
||||
|
||||
class filling{
|
||||
cublasFillMode_t impl_;
|
||||
public:
|
||||
filling(char trans) : impl_{[=]{
|
||||
switch(trans){
|
||||
case 'L': return CUBLAS_FILL_MODE_LOWER;
|
||||
case 'U': return CUBLAS_FILL_MODE_UPPER;
|
||||
}
|
||||
assert(0); return cublasFillMode_t{};
|
||||
}()}{}
|
||||
operator cublasFillMode_t() const{return impl_;}
|
||||
};
|
||||
|
||||
class diagonal{
|
||||
cublasDiagType_t impl_;
|
||||
public:
|
||||
diagonal(char trans) : impl_{[=]{
|
||||
switch(trans){
|
||||
case 'N': return CUBLAS_DIAG_NON_UNIT;
|
||||
case 'U': return CUBLAS_DIAG_UNIT;
|
||||
}
|
||||
assert(0); return cublasDiagType_t{};
|
||||
}()}{}
|
||||
operator cublasDiagType_t() const{return impl_;}
|
||||
};
|
||||
|
||||
using blas::is_z;
|
||||
using blas::is_d;
|
||||
using std::is_assignable;
|
||||
using std::is_convertible_v;
|
||||
|
||||
class context : private std::unique_ptr<std::decay_t<decltype(*cublasHandle_t{})>, decltype(&cublasDestroy)>{
|
||||
using pimpl_t = std::unique_ptr<std::decay_t<decltype(*cublasHandle_t{})>, decltype(&cublasDestroy)>;
|
||||
cudaStream_t stream() const{cudaStream_t streamId; cublas::call<cublasGetStream>(this->get(), &streamId); return streamId;}
|
||||
template<auto Function, class... Args> // needs C++17
|
||||
void sync_call(Args... args){
|
||||
call<Function>(this->get(), args...);
|
||||
this->synchronize();
|
||||
}
|
||||
public:
|
||||
using pimpl_t::get;
|
||||
static context& get_instance(){
|
||||
thread_local context ctxt;
|
||||
return ctxt;
|
||||
};
|
||||
context() : pimpl_t{[]{cublasHandle_t h; cublasCreate(&h); return h;}(), &cublasDestroy}{}
|
||||
using ssize_t = int;
|
||||
static int version(){int ret; cublas::call<cublasGetVersion>(nullptr, &ret); return ret;}
|
||||
void synchronize(){
|
||||
// cudaError_t e = cudaDeviceSynchronize();
|
||||
cudaError_t e = cudaStreamSynchronize(stream());
|
||||
if(e != cudaSuccess) throw std::runtime_error{"cannot synchronize stream in cublas context"};
|
||||
}
|
||||
template<class ALPHA, class XP, class X = typename std::pointer_traits<XP>::element_type, class YP, class Y = typename std::pointer_traits<YP>::element_type,
|
||||
std::enable_if_t<is_d<X>{} and is_d<Y>{}, int> = 0
|
||||
// std::enable_if_t<is_d<X>{} and is_d<Y>{} and is_assignable<Y&, ALPHA{}*X{} + Y{}>{} and is_convertible_v<XP, thrust::cuda::pointer<X>> and is_convertible_v<YP, thrust::cuda::pointer<Y>>, int> = 0
|
||||
>
|
||||
void axpy(ssize_t n, ALPHA const* alpha, XP x, ssize_t incx, YP y, ssize_t incy){
|
||||
sync_call<cublasDaxpy>(
|
||||
n,
|
||||
(double const*)alpha,
|
||||
(double const*)raw_pointer_cast(x), incx,
|
||||
(double*)raw_pointer_cast(y), incy
|
||||
);
|
||||
}
|
||||
|
||||
template<class ALPHA, class AAP, class AA = typename std::pointer_traits<AAP>::element_type, class BBP, class BB = typename std::pointer_traits<BBP>::element_type, class BETA, class CCP, class CC = typename std::pointer_traits<CCP>::element_type,
|
||||
std::enable_if_t<
|
||||
is_z<AA>{} and is_z<BB>{} and is_z<CC>{} and is_z<ALPHA>{} and is_z<BETA>{} and is_assignable<CC&, decltype(ALPHA{}*AA{}*BB{})>{} and
|
||||
std::is_convertible_v<AAP, memory::cuda::ptr<AA>> and std::is_convertible_v<BBP, memory::cuda::ptr<BB>> and std::is_convertible_v<CCP, memory::cuda::ptr<CC>>
|
||||
,int> =0
|
||||
>
|
||||
void gemm(char transA, char transB, ssize_t m, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb, BETA const* beta, CCP cc, ssize_t ldc){
|
||||
MULTI_MARK_SCOPE("cublasZgemm");
|
||||
sync_call<cublasZgemm>(cublas::operation{transA}, cublas::operation{transB}, m, n, k, (cuDoubleComplex const*)alpha, (cuDoubleComplex const*)raw_pointer_cast(aa), lda, (cuDoubleComplex const*)raw_pointer_cast(bb), ldb, (cuDoubleComplex const*)beta, (cuDoubleComplex*)raw_pointer_cast(cc), ldc);
|
||||
}
|
||||
template<class ALPHA, class AAP, class AA = typename std::pointer_traits<AAP>::element_type, class BBP, class BB = typename std::pointer_traits<BBP>::element_type, class BETA, class CCP, class CC = typename std::pointer_traits<CCP>::element_type,
|
||||
std::enable_if_t<
|
||||
is_d<AA>{} and is_d<BB>{} and is_d<CC>{} and is_assignable<CC&, decltype(ALPHA{}*AA{}*BB{})>{} and
|
||||
std::is_convertible_v<AAP, memory::cuda::ptr<AA>> and std::is_convertible_v<BBP, memory::cuda::ptr<BB>> and std::is_convertible_v<CCP, memory::cuda::ptr<CC>>
|
||||
,int> =0
|
||||
>
|
||||
void gemm(char transA, char transB, ssize_t m, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb, BETA const* beta, CCP cc, ssize_t ldc){
|
||||
MULTI_MARK_SCOPE("cublasDgemm");
|
||||
sync_call<cublasDgemm>(cublas::operation{transA}, cublas::operation{transB}, m, n, k, (double const*)alpha, (double const*)raw_pointer_cast(aa), lda, (double const*)raw_pointer_cast(bb), ldb, (double const*)beta, (double*)raw_pointer_cast(cc), ldc);
|
||||
}
|
||||
template<class ALPHA, class AAP, class AA = typename pointer_traits<AAP>::element_type, class BBP, class BB = typename pointer_traits<BBP>::element_type,
|
||||
std::enable_if_t<
|
||||
is_z<AA>{} and is_z<BB>{} and is_assignable<BB&, decltype(AA{}*BB{}/ALPHA{})>{} and is_assignable<BB&, decltype(ALPHA{}*BB{}/AA{})>{} and
|
||||
is_convertible_v<AAP, memory::cuda::ptr<AA>> and is_convertible_v<BBP, memory::cuda::ptr<BB>>
|
||||
,int> =0
|
||||
>
|
||||
void trsm(char side, char ul, char transA, char diag, ssize_t m, ssize_t n, ALPHA alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb){
|
||||
sync_call<cublasZtrsm>(cublas::side{side}, cublas::filling{ul}, cublas::operation{transA}, cublas::diagonal{diag}, m, n, (cuDoubleComplex const*)&alpha, (cuDoubleComplex const*)raw_pointer_cast(aa), lda, (cuDoubleComplex*)raw_pointer_cast(bb), ldb);
|
||||
}
|
||||
template<
|
||||
class XXP, class XX = typename std::pointer_traits<XXP>::element_type,
|
||||
class YYP, class YY = typename std::pointer_traits<YYP>::element_type,
|
||||
class RRP, class RR = typename std::pointer_traits<RRP>::element_type,
|
||||
std::enable_if_t<
|
||||
is_d<XX>{} and is_d<YY>{} and is_d<RR>{} and is_assignable<RR&, decltype(XX{}*YY{})>{} and
|
||||
is_convertible_v<XXP, memory::cuda::ptr<XX>> and is_convertible_v<YYP, memory::cuda::ptr<YY>> and is_convertible_v<RRP, RR*>
|
||||
, int> =0
|
||||
>
|
||||
void dot(int n, XXP xx, int incx, YYP yy, int incy, RRP rr){
|
||||
cublasPointerMode_t mode;
|
||||
auto s = cublasGetPointerMode(get(), &mode); assert( s == CUBLAS_STATUS_SUCCESS );
|
||||
assert( mode == CUBLAS_POINTER_MODE_HOST );
|
||||
sync_call<cublasDdot>(n, raw_pointer_cast(xx), incx, raw_pointer_cast(yy), incy, rr);
|
||||
}
|
||||
template<
|
||||
class XXP, class XX = typename std::pointer_traits<XXP>::element_type,
|
||||
class YYP, class YY = typename std::pointer_traits<YYP>::element_type,
|
||||
class RRP, class RR = typename std::pointer_traits<RRP>::element_type,
|
||||
std::enable_if_t<
|
||||
is_z<XX>{} and is_z<YY>{} and is_z<RR>{} and is_assignable<RR&, decltype(XX{}*YY{})>{} and
|
||||
is_convertible_v<XXP, memory::cuda::ptr<XX>> and is_convertible_v<YYP, memory::cuda::ptr<YY>> and is_convertible_v<RRP, RR*>
|
||||
, int> =0
|
||||
>
|
||||
void dotc(int n, XXP xx, int incx, YYP yy, int incy, RRP rr){
|
||||
cublasPointerMode_t mode;
|
||||
auto s = cublasGetPointerMode(get(), &mode); assert( s == CUBLAS_STATUS_SUCCESS );
|
||||
assert( mode == CUBLAS_POINTER_MODE_HOST );
|
||||
sync_call<cublasZdotc>(n, (cuDoubleComplex const*)raw_pointer_cast(xx), incx, (cuDoubleComplex const*)raw_pointer_cast(yy), incy, (cuDoubleComplex*)rr);
|
||||
}
|
||||
// template<class ALPHA, class AAP, class AA = typename pointer_traits<AAP>::element_type, class BETA, class CCP, class CC = typename pointer_traits<CCP>::element_type,
|
||||
// std::enable_if_t<
|
||||
// is_z<AA>{} and is_z<CC>{} and is_d<ALPHA>{} and is_d<BETA>{} and is_assignable<CC&, decltype(ALPHA{}*AA{}*AA{})>{} and
|
||||
// is_convertible_v<AAP, AA*> and is_convertible_v<CCP, CC*>
|
||||
// , int
|
||||
// > =0
|
||||
// >
|
||||
// void herk(char ul, char transA, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BETA const* beta, CCP cc, ssize_t ldc){
|
||||
// MULTI_MARK_SCOPE("cublasZherk");
|
||||
// cublas::call<cublasZherk>(this->get(), cublas::filling{ul}, cublas::operation{transA}, n, k, (double const*)&alpha, (cuDoubleComplex const*)raw_pointer_cast(aa), lda, (double const*)&beta, (cuDoubleComplex*)raw_pointer_cast(cc), ldc);
|
||||
// }
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
namespace boost::multi::blas{
|
||||
|
||||
template<> struct is_context<boost::multi::cuda::cublas::context > : std::true_type{};
|
||||
template<> struct is_context<boost::multi::cuda::cublas::context&> : std::true_type{};
|
||||
|
||||
template<class Ptr, class T = typename std::pointer_traits<Ptr>::element_type, std::enable_if_t<std::is_convertible<Ptr, multi::memory::cuda::ptr<T>>{}, int> =0>
|
||||
boost::multi::cuda::cublas::context* default_context_of(Ptr const&){
|
||||
namespace multi = boost::multi;
|
||||
return &multi::cuda::cublas::context::get_instance();
|
||||
}
|
||||
|
||||
template<class T>
|
||||
boost::multi::cuda::cublas::context* default_context_of(boost::multi::memory::cuda::managed::ptr<T> const&){
|
||||
namespace multi = boost::multi;
|
||||
return &multi::cuda::cublas::context::get_instance();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --cflags --libs cudart-11.0 cublas-11.0 blas` -lboost_unit_test_framework&&$0.$X&&rm $0.$X;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_CUDA_CUBLAS_ERROR_HPP
|
||||
#define MULTI_ADAPTORS_CUDA_CUBLAS_ERROR_HPP
|
||||
|
||||
#include<cublas_v2.h> // cublasStatus_t
|
||||
|
||||
#include<string>
|
||||
#include<system_error> // std::error_category
|
||||
#include<type_traits> // std::underlying_type
|
||||
|
||||
namespace boost{
|
||||
namespace multi::cuda::cublas{
|
||||
|
||||
enum class error : typename std::underlying_type<cublasStatus_t>::type{
|
||||
success = CUBLAS_STATUS_SUCCESS,
|
||||
not_initialized = CUBLAS_STATUS_NOT_INITIALIZED,
|
||||
allocation_failed = CUBLAS_STATUS_ALLOC_FAILED,
|
||||
invalid_value = CUBLAS_STATUS_INVALID_VALUE,
|
||||
architecture_mismatch = CUBLAS_STATUS_ARCH_MISMATCH,
|
||||
mapping_error = CUBLAS_STATUS_MAPPING_ERROR,
|
||||
execution_failed = CUBLAS_STATUS_EXECUTION_FAILED,
|
||||
internal_error = CUBLAS_STATUS_INTERNAL_ERROR,
|
||||
not_supported = CUBLAS_STATUS_NOT_SUPPORTED,
|
||||
license_error = CUBLAS_STATUS_LICENSE_ERROR
|
||||
};
|
||||
|
||||
std::string inline error_string(enum cublas::error err){ //https://stackoverflow.com/questions/13041399/equivalent-of-cudageterrorstring-for-cublas
|
||||
switch(err){
|
||||
case cublas::error::success : return "CUBLAS_STATUS_SUCCESS" ;
|
||||
case cublas::error::not_initialized : return "CUBLAS_STATUS_NOT_INITIALIZED" ;
|
||||
case cublas::error::allocation_failed : return "CUBLAS_STATUS_ALLOC_FAILED" ;
|
||||
case cublas::error::invalid_value : return "CUBLAS_STATUS_INVALID_VALUE" ;
|
||||
case cublas::error::architecture_mismatch: return "CUBLAS_STATUS_ARCH_MISMATCH" ;
|
||||
case cublas::error::mapping_error : return "CUBLAS_STATUS_MAPPING_ERROR" ;
|
||||
case cublas::error::execution_failed : return "CUBLAS_STATUS_EXECUTION_FAILED";
|
||||
case cublas::error::internal_error : return "CUBLAS_STATUS_INTERNAL_ERROR" ;
|
||||
case cublas::error::not_supported : return "CUBLAS_STATUS_NOT_SUPPORTED" ;
|
||||
case cublas::error::license_error : return "CUBLAS_STATUS_LICENSE_ERROR" ;
|
||||
}
|
||||
return "cublas status <unknown>";
|
||||
}
|
||||
|
||||
struct error_category : std::error_category{
|
||||
char const* name() const noexcept override{return "cublas wrapper";}
|
||||
std::string message(int err) const override{return error_string(static_cast<enum cublas::error>(err));}
|
||||
static error_category& instance(){static cublas::error_category instance; return instance;}
|
||||
};
|
||||
|
||||
inline std::error_code make_error_code(cublas::error err) noexcept{
|
||||
return std::error_code(int(err), cublas::error_category::instance());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
namespace std{
|
||||
template<> struct is_error_code_enum<::boost::multi::cuda::cublas::error> : true_type{};
|
||||
}
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_CUDA
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
//#include "../../array.hpp"
|
||||
//#include "../../utility.hpp"
|
||||
|
||||
//#include "../../adaptors/cuda.hpp"
|
||||
//#include "../../adaptors/blas.hpp"
|
||||
//#include "../../adaptors/blas/cuda.hpp"
|
||||
|
||||
#include<cassert>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_cublas_error){
|
||||
|
||||
BOOST_CHECK_THROW(
|
||||
throw (std::system_error{multi::cuda::cublas::make_error_code(multi::cuda::cublas::error::not_initialized), "error test"}),
|
||||
std::system_error
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.11)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
project(boost-multi-adaptors-cuda-cublas-test VERSION 0.1 LANGUAGES CXX CUDA)
|
||||
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||
|
||||
find_package(BLAS REQUIRED)
|
||||
find_path(BLAS_INCLUDE_DIRS cblas.h
|
||||
/usr/include
|
||||
/usr/local/include
|
||||
$ENV{BLAS_HOME}/include)
|
||||
|
||||
link_libraries(${BLAS_LIBRARIES})
|
||||
include_directories(${TEST_EXE} PRIVATE ${BLAS_INCLUDE_DIRS})
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
if(ENABLE_CUDA OR DEFINED CXXCUDA)
|
||||
enable_language(CUDA)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe \"--display_error_number --diag_suppress=implicit_return_from_non_void_function\"")
|
||||
endif()
|
||||
|
||||
find_package(CUDA)
|
||||
|
||||
enable_testing()
|
||||
list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.17
|
||||
include(CTest)
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
#file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
|
||||
set(TEST_SRCS
|
||||
# herk.cu
|
||||
gemm.cu
|
||||
)
|
||||
|
||||
foreach(TEST_FILE ${TEST_SRCS})
|
||||
SET(TEST_EXE "${TEST_FILE}.x")
|
||||
add_executable (${TEST_EXE} ${TEST_FILE})
|
||||
if(ENABLE_CUDA OR DEFINED CXXCUDA)
|
||||
set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA)
|
||||
target_compile_options (${TEST_EXE} PRIVATE -std=c++17)
|
||||
endif()
|
||||
# target_compile_features (${TEST_EXE} PUBLIC cxx_std_17)
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS")
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS})
|
||||
target_include_directories(${TEST_EXE} PRIVATE ${Boost_INCLUDE_DIRS})
|
||||
target_include_directories(${TEST_EXE} PRIVATE ${CUDA_INCLUDE_DIRS})
|
||||
target_link_libraries (${TEST_EXE} PRIVATE ${Boost_LIBRARIES})
|
||||
target_link_directories (${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS})
|
||||
target_link_libraries (${TEST_EXE} PRIVATE ${CUDA_LIBRARIES})
|
||||
target_link_directories (${TEST_EXE} PRIVATE ${CUDA_LIBRARY_DIRS})
|
||||
# if(NOT ENABLE_CUDA)
|
||||
# target_compile_options (${TEST_EXE} PRIVATE
|
||||
# $<$<CXX_COMPILER_ID:GNU>:
|
||||
# -Werror -Wall -Wextra -fno-common -Wpedantic -Wformat-truncation -fstack-usage>#-Wconversion
|
||||
# $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:
|
||||
# -Werror -Wall -Wextra -fno-common -Wpedantic -Wmove>
|
||||
# $<$<CXX_COMPILER_ID:Intel>:
|
||||
# -Werror -Wall -Wextra -fno-common -wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings -Werror -diag-error:3846
|
||||
# >
|
||||
# $<$<CXX_COMPILER_ID:MSVC>:
|
||||
# /W4>)
|
||||
# endif()
|
||||
add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE})
|
||||
endforeach()
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS herk"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
//#include "../../../../adaptors/cuda.hpp" // multi::cuda ns
|
||||
#include "../../../../adaptors/blas/gemm.hpp"
|
||||
#include "../../../../adaptors/cuda/cublas.hpp"
|
||||
|
||||
#include "../../../adaptors/cuda/thrust.hpp"
|
||||
//#include "../../../complex.hpp"
|
||||
|
||||
#include<thrust/complex.h>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_cublas_gemm_double){
|
||||
multi::array<double, 2> const a = {
|
||||
{ 1., 3., 4.},
|
||||
{ 9., 7., 1.}
|
||||
};
|
||||
// multi::thrust::cuda::array<double, 2> const a_gpu = a;
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_cublas_gemm_complex){
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
// multi::thrust::cuda::array<complex, 2> const a_gpu = a;
|
||||
}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(multi_cublas_gemm_thrust_complex){
|
||||
// using complex = thrust::complex<double>; complex const I{0, 1};
|
||||
// multi::array<complex, 2> const a = {
|
||||
// { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
// { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
// };
|
||||
//// multi::thrust::cuda::array<complex, 2> const a_gpu = a;
|
||||
//}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_cublas_gemm_complex2){
|
||||
// using complex = std::complex<double>; complex const I{0, 1};
|
||||
// multi::array<complex, 2> const a = {
|
||||
// {1. + 2.*I, 5. + 2.*I},
|
||||
// {9. - 1.*I, 9. + 1.*I},
|
||||
// {1. + 1.*I, 2. + 2.*I}
|
||||
// };
|
||||
// multi::array<complex, 2> const b = {
|
||||
// { 11. - 2.*I, 5. + 2.*I},
|
||||
// { 7. - 3.*I, 2. + 1.*I},
|
||||
// { 8. - 1.*I, 1. + 1.*I}
|
||||
// };
|
||||
//// multi::thrust::cuda::array<complex, 2> const a_gpu = a;
|
||||
//// multi::thrust::cuda::array<complex, 2> const b_gpu = b;
|
||||
// namespace blas = multi::blas;
|
||||
// {
|
||||
// multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// // blas::gemm(1., a, blas::H(b), 0., c);
|
||||
|
||||
// // multi::thrust::cuda::array<complex, 2> const c_gpu;
|
||||
// // blas::gemm(1., a_gpu, b_gpu, c_gpu);
|
||||
// // BOOST_REQUIRE( c == c_gpu );
|
||||
// }
|
||||
// {
|
||||
// multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
// blas::herk(1., blas::H(a), c);
|
||||
// BOOST_REQUIRE( c[2][1] == complex(41, +2) );
|
||||
// BOOST_REQUIRE( c[1][2] == complex(41, -2) );
|
||||
|
||||
// multi::array<complex, 2> const c_copy = blas::herk(1., blas::H(a));
|
||||
// BOOST_REQUIRE( c_copy == c );
|
||||
// }
|
||||
}
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS herk"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../../../adaptors/cuda.hpp" // multi::cuda ns
|
||||
#include "../../../../adaptors/blas/herk.hpp"
|
||||
|
||||
|
||||
namespace multi = boost::multi;
|
||||
using complex = std::complex<double>;
|
||||
complex const I{0, 1};
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multi_blas_herk){
|
||||
multi::array<complex, 2> const a = {
|
||||
{ 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I},
|
||||
{ 9. + 1.*I, 7.- 8.*I, 1.- 3.*I}
|
||||
};
|
||||
multi::cuda::array<complex, 2> const a_gpu = a;
|
||||
namespace blas = multi::blas;
|
||||
{
|
||||
multi::array<complex, 2> c({2, 2}, 9999.);
|
||||
blas::herk(1., a, c);
|
||||
BOOST_REQUIRE( c[1][0] == complex(50., -49.) );
|
||||
BOOST_REQUIRE( c[0][1] == complex(50., +49.) );
|
||||
|
||||
multi::array<complex, 2> const c_copy = blas::herk(1., a);
|
||||
BOOST_REQUIRE( c == c_copy );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> c({3, 3}, 9999.);
|
||||
blas::herk(1., blas::H(a), c);
|
||||
BOOST_REQUIRE( c[2][1] == complex(41, +2) );
|
||||
BOOST_REQUIRE( c[1][2] == complex(41, -2) );
|
||||
|
||||
multi::array<complex, 2> const c_copy = blas::herk(1., blas::H(a));
|
||||
BOOST_REQUIRE( c_copy == c );
|
||||
}
|
||||
}
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include<driver_types.h> // cudaError_t
|
||||
#include<cuda_runtime_api.h> // cudaGetErrorString
|
||||
|
||||
#include<system_error>
|
||||
#include<type_traits> // underlying_type
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace cuda{
|
||||
namespace runtime{
|
||||
|
||||
enum /*class*/ error : std::underlying_type<cudaError_t>::type{
|
||||
success = cudaSuccess, // = 0 The API call returned with no errors. In the case of query calls, this also means that the operation being queried is complete (see cudaEventQuery() and cudaStreamQuery()).
|
||||
missing_configuration = cudaErrorMissingConfiguration,
|
||||
// invalid_value /*invalid_argument*/ = cudaErrorInvalidValue, // = 1, This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values.
|
||||
memory_allocation = cudaErrorMemoryAllocation, // = 2 // The API call failed because it was unable to allocate enough memory to perform the requested operation.
|
||||
initialization_error = cudaErrorInitializationError,
|
||||
lauch_failure = cudaErrorLaunchFailure,
|
||||
lauch_timeout = cudaErrorLaunchTimeout,
|
||||
lauch_out_of_resources = cudaErrorLaunchOutOfResources,
|
||||
invalid_device_function = cudaErrorInvalidDeviceFunction,
|
||||
invalid_configuration = cudaErrorInvalidConfiguration,
|
||||
invalid_device = cudaErrorInvalidDevice,
|
||||
invalid_value = cudaErrorInvalidValue, ///*invalid_argument*/ = cudaErrorInvalidValue, // = 1 This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values.
|
||||
invalid_pitch_value = cudaErrorInvalidPitchValue,
|
||||
invalid_symbol = cudaErrorInvalidSymbol,
|
||||
unmap_buffer_object_failed = cudaErrorUnmapBufferObjectFailed,
|
||||
invalid_device_pointer = cudaErrorInvalidDevicePointer,
|
||||
invalid_texture = cudaErrorInvalidTexture,
|
||||
invalid_texture_binding = cudaErrorInvalidTextureBinding,
|
||||
invalid_channel_descriptor = cudaErrorInvalidChannelDescriptor,
|
||||
invalid_memcpy_direction = cudaErrorInvalidMemcpyDirection,
|
||||
invalud_filter_setting = cudaErrorInvalidFilterSetting,
|
||||
invalid_norm_setting = cudaErrorInvalidNormSetting,
|
||||
unknown = cudaErrorUnknown,
|
||||
invalid_resource_handle = cudaErrorInvalidResourceHandle,
|
||||
insuffient_driver = cudaErrorInsufficientDriver,
|
||||
no_device = cudaErrorNoDevice,
|
||||
set_on_active_process = cudaErrorSetOnActiveProcess,
|
||||
startup_failure = cudaErrorStartupFailure,
|
||||
invalid_ptx = cudaErrorInvalidPtx,
|
||||
no_kernel_image_for_device = cudaErrorNoKernelImageForDevice,
|
||||
jit_compiler_not_found = cudaErrorJitCompilerNotFound
|
||||
};
|
||||
|
||||
inline std::string string(enum error e){return cudaGetErrorString(static_cast<cudaError_t>(e));}
|
||||
|
||||
struct error_category : std::error_category{
|
||||
char const* name() const noexcept override{return "cuda wrapper";}
|
||||
std::string message(int e) const override{return string(static_cast<error>(e));}
|
||||
static error_category& instance(){
|
||||
static error_category instance;
|
||||
return instance;
|
||||
}
|
||||
};
|
||||
|
||||
inline std::error_code make_error_code(error err) noexcept{
|
||||
return {int(err), error_category::instance()};
|
||||
}
|
||||
|
||||
}}}}
|
||||
|
||||
namespace std{template<> struct is_error_code_enum<boost::multi::cuda::runtime::error> : true_type{};}
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
#include "../../adaptors/thrust.hpp"
|
||||
|
||||
int main(){
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
#ifdef COMPILATION_INSTRUCTIONS//-*-indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4;-*-
|
||||
$CXX $0 -o $0x -lcudart -lboost_timer -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuda adaptor"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
namespace utf = boost::unit_test;
|
||||
#include <boost/timer/timer.hpp>
|
||||
|
||||
#include "../../../adaptors/cuda.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
namespace boost::multi::memory::cuda{
|
||||
template<class T1, class P1, class T2, class P2>
|
||||
void copy(array_iterator<T1, 1, boost::multi::memory::cuda::ptr<P1>>, array_iterator<T1, 1, boost::multi::memory::cuda::ptr<P1>>, array_iterator<T2, 1, boost::multi::memory::cuda::ptr<P2>>){
|
||||
assert(0);
|
||||
}
|
||||
//std::copy<boost::multi::array_iterator<double, 1, boost::multi::memory::cuda::ptr<const double, const double *>, boost::multi::memory::cuda::ref<const double> >, boost::multi::array_iterator<double, 1, boost::multi::memory::cuda::ptr<double, double *>, boost::multi::memory::cuda::ref<double> > >
|
||||
}
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cudart_double, *utf::tolerance(0.00001)*utf::timeout(10)){
|
||||
|
||||
auto const in = []{
|
||||
multi::array<double, 4> r({32, 90, 98, 96});
|
||||
std::generate(data_elements(r), data_elements(r)+num_elements(r), &std::rand);
|
||||
return r;
|
||||
}();
|
||||
std::cout<<"memory size "<< in.num_elements()*sizeof(decltype(in)::element)/1e6 <<" MB\n";
|
||||
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"%ws wall, CPU (%p%)\n"};
|
||||
multi::cuda::array<double, 4> const in_gpu = in;
|
||||
|
||||
multi::array<double, 4> const in_cpy = in_gpu;
|
||||
BOOST_REQUIRE( in == in_cpy );
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"%ws wall, CPU (%p%)\n"};
|
||||
multi::cuda::array<double, 4> const in_gpu = in;
|
||||
}
|
||||
{
|
||||
multi::cuda::array<double, 4> const in_gpu = in;
|
||||
multi::cuda::array<double, 4> out_gpu = in;
|
||||
boost::timer::auto_cpu_timer t{"copy assign gpu____ %ws wall, CPU (%p%)\n"};
|
||||
out_gpu = in_gpu;
|
||||
auto c = static_cast<double>(out_gpu[1][2][3][4]); (void)c;
|
||||
|
||||
(out_gpu << 1) = (in_gpu << 1);
|
||||
}
|
||||
{
|
||||
multi::cuda::managed::array<double, 4> const in_mng = in;
|
||||
multi::cuda::managed::array<double, 4> out_mng = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"copy assign mng____ %ws wall, CPU (%p%)\n"};
|
||||
out_mng = in_mng;
|
||||
auto c = static_cast<double>(out_mng[1][2][3][4]); (void)c;
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"copy assign mng_hot %ws wall, CPU (%p%)\n"};
|
||||
out_mng = in_mng;
|
||||
auto c = static_cast<double>(out_mng[1][2][3][4]); (void)c;
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"copy assign mng loop %ws wall, CPU (%p%)\n"};
|
||||
out_mng() = in_mng();
|
||||
auto c = static_cast<double>(out_mng[1][2][3][4]); (void)c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cudart_complex, *utf::tolerance(0.00001)*utf::timeout(10)){
|
||||
|
||||
using complex = std::complex<double>;
|
||||
|
||||
auto const in = []{
|
||||
multi::array<complex, 4> r({32, 90, 98, 96});
|
||||
std::generate(data_elements(r), data_elements(r)+num_elements(r), &std::rand);
|
||||
return r;
|
||||
}();
|
||||
std::cout<<"memory size "<< in.num_elements()*sizeof(decltype(in)::element)/1e6 <<" MB\n";
|
||||
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"%ws wall, CPU (%p%)\n"};
|
||||
multi::cuda::array<complex, 4> const in_gpu = in;
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"%ws wall, CPU (%p%)\n"};
|
||||
multi::cuda::array<complex, 4> const in_gpu = in;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
// © Alfredo A. Correa 2021
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../array.hpp"
|
||||
|
||||
#include "./thrust/cuda/managed.hpp"
|
||||
|
||||
#include <thrust/device_allocator.h>
|
||||
#include <thrust/system/cuda/memory.h> // ::thrust::cuda::allocator
|
||||
|
||||
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace thrust{
|
||||
|
||||
template<class T, multi::dimensionality_type D> using device_array = multi::array<T, D, ::thrust::device_allocator<T>>;
|
||||
template<class T, multi::dimensionality_type D> using host_array = multi::array<T, D >;
|
||||
|
||||
namespace device{
|
||||
|
||||
template<class T, multi::dimensionality_type D> using array = device_array<T, D>;
|
||||
|
||||
}
|
||||
|
||||
namespace host{
|
||||
|
||||
template<class T, multi::dimensionality_type D> using array = host_array<T, D>;
|
||||
|
||||
}
|
||||
|
||||
namespace cuda{
|
||||
|
||||
template<class T, multi::dimensionality_type D> using array = multi::array<T, D, ::thrust::cuda::allocator<T>>;
|
||||
|
||||
namespace managed{
|
||||
|
||||
template<class T, multi::dimensionality_type D> using array = multi::array<T, D, boost::multi::thrust::cuda::managed::allocator<T>>;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}}}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.11)
|
||||
project(boost-multi-adaptor-cuda-thrust-test VERSION 0.1 LANGUAGES CXX CUDA)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe \"--display_error_number --diag_suppress=implicit_return_from_non_void_function --diag_suppress=class_and_member_name_conflict\"")
|
||||
|
||||
enable_testing()
|
||||
|
||||
find_program(MEMORYCHECK_COMMAND valgrind)
|
||||
set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --error-exitcode=1")
|
||||
include (CTest)
|
||||
|
||||
add_subdirectory(cuda/test)
|
||||
|
||||
add_subdirectory(test)
|
||||
|
|
@ -1,144 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
|
||||
#include "../../../cuda/runtime/error.hpp"
|
||||
|
||||
#include <thrust/system/cuda/pointer.h>
|
||||
|
||||
#include<new> // bad_alloc
|
||||
#include<cassert>
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
|
||||
namespace thrust{
|
||||
namespace cuda{
|
||||
namespace managed{
|
||||
|
||||
template<class> class pointer;
|
||||
|
||||
template<class T>
|
||||
class reference : public ::thrust::cuda::reference<T>{
|
||||
using base_type = ::thrust::cuda::reference<T>;
|
||||
public:
|
||||
constexpr explicit reference(::thrust::cuda::reference<T> const& other) : base_type{other}{}
|
||||
constexpr explicit reference(T& other) : base_type{&other}{}
|
||||
constexpr operator T&()&&{return raw_reference_cast(static_cast<base_type&>(*this));}
|
||||
constexpr pointer<T> operator&(){return pointer<T>{base_type::operator&()};}
|
||||
using ::thrust::cuda::reference<T>::operator=;
|
||||
};
|
||||
|
||||
template<class T>
|
||||
class pointer{
|
||||
::thrust::cuda::pointer<T> impl_;
|
||||
public:
|
||||
constexpr explicit pointer(::thrust::cuda::pointer<T> const& other) : impl_{other}{}
|
||||
constexpr explicit pointer(T* other) : impl_(other){}
|
||||
|
||||
using difference_type = typename ::thrust::iterator_traits<::thrust::cuda::pointer<T>>::difference_type;
|
||||
using value_type = typename ::thrust::iterator_traits<::thrust::cuda::pointer<T>>::value_type;
|
||||
using pointer = pointer<T>; // -Xcudafe \"--diag_suppress=class_and_member_name_conflict\" //TODO
|
||||
using reference = managed::reference<T>;
|
||||
using iterator_category = typename ::thrust::iterator_traits<::thrust::cuda::pointer<T>>::iterator_category;
|
||||
|
||||
using element_type = T;
|
||||
|
||||
constexpr operator T*() const{return raw_pointer_cast(impl_);}
|
||||
constexpr operator ::thrust::cuda::pointer<T>() const{return impl_;}
|
||||
|
||||
constexpr pointer& operator++(){impl_.operator++(); return *this;}
|
||||
constexpr pointer& operator--(){impl_.operator--(); return *this;}
|
||||
constexpr auto operator++(int i){return pointer{impl_.operator++(i)};}
|
||||
constexpr auto operator--(int i){return pointer{impl_.operator--(i)};}
|
||||
constexpr pointer& operator+=(difference_type n){impl_.operator+=(n); return *this;}
|
||||
constexpr pointer& operator-=(difference_type n){impl_.operator-=(n); return *this;}
|
||||
constexpr pointer operator+(difference_type n) const{return pointer{impl_ + n};}
|
||||
constexpr pointer operator-(difference_type n) const{return pointer{impl_ - n};}
|
||||
|
||||
constexpr reference operator*() const{return reference{impl_.operator*()};}
|
||||
constexpr reference operator[](difference_type n){return *((*this)+n);}
|
||||
|
||||
friend auto raw_pointer_cast(pointer const& p){return raw_pointer_cast(p.impl_);}
|
||||
};
|
||||
|
||||
struct bad_alloc : std::bad_alloc{};
|
||||
|
||||
template<class T = void>
|
||||
class allocator{// : cuda::allocator<T>{
|
||||
static_assert( std::is_same<T, std::decay_t<T>>{}, "!" );
|
||||
public:
|
||||
using value_type = T;
|
||||
using pointer = managed::pointer<T>;
|
||||
using size_type = ::size_t; // as specified by CudaMalloc
|
||||
pointer allocate(typename allocator::size_type n, const void* = 0){
|
||||
if(n == 0) return pointer{nullptr};
|
||||
T* p = nullptr;
|
||||
namespace cudart = boost::multi::cuda::runtime;
|
||||
auto e = static_cast<cudart::error>(cudaMallocManaged(&p, n*sizeof(T)));
|
||||
switch(e){
|
||||
case cudart::success : break;
|
||||
case cudart::memory_allocation: throw bad_alloc{};
|
||||
default: throw std::system_error{e, "cannot allocate "+std::to_string(n*sizeof(T))+" bytes in '"+__PRETTY_FUNCTION__+"'"};
|
||||
}
|
||||
auto ret = static_cast<pointer>(p);
|
||||
if(!ret) throw bad_alloc{};
|
||||
return ret;
|
||||
}
|
||||
void deallocate(pointer p, size_type){
|
||||
namespace cudart = boost::multi::cuda::runtime;
|
||||
auto e = static_cast<cudart::error>(cudaFree(raw_pointer_cast(p)));
|
||||
if(e!=cudart::success){
|
||||
throw std::system_error{e, std::string{"cannot "}+ __PRETTY_FUNCTION__};
|
||||
}
|
||||
}
|
||||
template<class P, class... Args>
|
||||
void construct(P p, Args&&... args){ // remove?
|
||||
::new(p.rp_) T(std::forward<Args>(args)...);
|
||||
}
|
||||
template<class P, class... Args>
|
||||
void construct(P* p, Args&&... args){ // remove?
|
||||
::new(p) T(std::forward<Args>(args)...);
|
||||
}
|
||||
template<class P> void destroy(P p){p.rp_->~T();} // remove?
|
||||
template<class P> void destroy(P* p){p->~T();} // remove?
|
||||
constexpr bool operator==(allocator<T> const&) const{return true;}
|
||||
constexpr bool operator!=(allocator<T> const&) const{return false;}
|
||||
|
||||
template<class InputIt, class ForwardIt>
|
||||
constexpr ForwardIt alloc_uninitialized_copy(InputIt first, InputIt last, ForwardIt d_first) const{
|
||||
return ForwardIt{adl_uninitialized_copy(first, last, d_first)};
|
||||
}
|
||||
template<class InputIt, class Size, class ForwardIt>
|
||||
constexpr ForwardIt alloc_uninitialized_copy_n(InputIt first, Size count, ForwardIt d_first) const{
|
||||
return ForwardIt{adl_uninitialized_copy_n(first, count, d_first)};
|
||||
}
|
||||
template<class ForwardIt, class Size>
|
||||
constexpr ForwardIt alloc_uninitialized_default_construct_n(ForwardIt first, Size n) const{
|
||||
return ForwardIt{adl_uninitialized_default_construct_n(first, n)};
|
||||
}
|
||||
template<class ForwardIt, class Size>
|
||||
constexpr ForwardIt alloc_destroy_n(ForwardIt first, Size n) const{return ForwardIt{destroy_n(first, n)};}
|
||||
};
|
||||
|
||||
}}}
|
||||
|
||||
}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#include<memory>
|
||||
#include<iostream>
|
||||
#include "../../../../array.hpp"
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace cuda = multi::memory::cuda;
|
||||
|
||||
int main(){
|
||||
|
||||
multi::array<double, 1, multi::memory::cuda::managed::allocator<double> > A(32);
|
||||
A[17] = 3.;
|
||||
assert( A[17] == 3. );
|
||||
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||
|
||||
include_directories(${Boost_INCLUDE_DIRS})
|
||||
|
||||
file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cu)
|
||||
#set(TEST_SRCS
|
||||
# managed.cu
|
||||
#)
|
||||
|
||||
foreach(TEST_FILE ${TEST_SRCS})
|
||||
get_filename_component(TEST_EXE ${TEST_FILE} NAME_WE)
|
||||
add_executable (${TEST_EXE} ${TEST_FILE})
|
||||
if(ENABLE_CUDA OR DEFINED CXXCUDA)
|
||||
set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA)
|
||||
target_compile_options (${TEST_EXE} PRIVATE -std=c++17 --expt-relaxed-constexpr)
|
||||
endif()
|
||||
# target_compile_features (${TEST_EXE} PUBLIC cxx_std_17)
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS")
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS})
|
||||
# target_include_directories(${TEST_EXE} PRIVATE ${Boost_INCLUDE_DIRS})
|
||||
target_link_libraries (${TEST_EXE} PRIVATE ${Boost_LIBRARIES})
|
||||
target_link_directories (${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS})
|
||||
# if(NOT ENABLE_CUDA)
|
||||
# target_compile_options (${TEST_EXE} PRIVATE
|
||||
# -Werror -Wall -Wextra -fno-common
|
||||
# $<$<CXX_COMPILER_ID:GNU>:
|
||||
# -Wpedantic -Wformat-truncation -fstack-usage>#-Wconversion
|
||||
# $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:
|
||||
# -Wpedantic -Wmove>
|
||||
# $<$<CXX_COMPILER_ID:Intel>:
|
||||
# -wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings -Werror -diag-error:3846
|
||||
# >
|
||||
# $<$<CXX_COMPILER_ID:MSVC>:
|
||||
# /W4>)
|
||||
# endif()
|
||||
add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE})
|
||||
endforeach()
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../cuda/managed.hpp"
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
void set_one(double* p){
|
||||
*p = 1.;
|
||||
}
|
||||
|
||||
void set_two_gpu(thrust::cuda::pointer<double> p){
|
||||
*p = 2.;
|
||||
}
|
||||
|
||||
void set_three_ref(double& p){
|
||||
p = 3.;
|
||||
}
|
||||
|
||||
template<class Pointer, class V = typename std::iterator_traits<Pointer>::value_type, class = std::enable_if_t<std::is_same<V, double>{} and std::is_convertible<Pointer, thrust::cuda::pointer<V>>{}> >
|
||||
void some_fun(Pointer p){}
|
||||
|
||||
template<class Pointer, class V = typename std::iterator_traits<Pointer>::value_type, class = std::enable_if_t<std::is_same<V, double>{} and std::is_convertible<Pointer, V*>{}> >
|
||||
void some_other_fun(Pointer p){}
|
||||
|
||||
template<int N> class prio : std::conditional_t<N!=0, prio<N-1>, std::false_type>{};
|
||||
|
||||
template<class Pointer, class V = typename std::iterator_traits<Pointer>::value_type, std::enable_if_t<std::is_same<V, double>{} and std::is_convertible<Pointer, thrust::cuda::pointer<V>>{}, int> =0>
|
||||
int overload_aux(Pointer p, prio<0>){return 0;}
|
||||
|
||||
template<class Pointer, class V = typename std::iterator_traits<Pointer>::value_type, std::enable_if_t<std::is_same<V, double>{} and std::is_convertible<Pointer, V*>{}, int> =0>
|
||||
int overload_aux(Pointer p, prio<1>){return 1;}
|
||||
|
||||
template<class Pointer> int overload(Pointer p){return overload_aux(p, prio<1>{});}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(vector){
|
||||
|
||||
multi::thrust::cuda::managed::allocator<double> alloc;
|
||||
multi::thrust::cuda::managed::pointer<double> p = alloc.allocate(100);
|
||||
|
||||
p[17] = 3.;
|
||||
BOOST_TEST_REQUIRE( p[17] == 3. );
|
||||
|
||||
set_one(p);
|
||||
BOOST_TEST_REQUIRE( p[0] == 1. );
|
||||
|
||||
set_two_gpu(p);
|
||||
BOOST_TEST_REQUIRE( p[0] == 2. );
|
||||
|
||||
set_three_ref( p[1] );
|
||||
BOOST_TEST_REQUIRE( p[1] == 3. );
|
||||
|
||||
some_fun(p);
|
||||
|
||||
BOOST_TEST_REQUIRE(overload(p) == 1);
|
||||
|
||||
alloc.deallocate(p, 100);
|
||||
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.11)
|
||||
project(boost-multi-adaptor-cuda-thrust-test VERSION 0.1 LANGUAGES CXX CUDA)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe \"--display_error_number --diag_suppress=implicit_return_from_non_void_function\"")
|
||||
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||
|
||||
enable_testing()
|
||||
|
||||
find_program(MEMORYCHECK_COMMAND valgrind)
|
||||
set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --error-exitcode=1")
|
||||
include (CTest)
|
||||
|
||||
#file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
|
||||
set(TEST_SRCS
|
||||
array.cu
|
||||
vector.cu
|
||||
)
|
||||
|
||||
foreach(TEST_FILE ${TEST_SRCS})
|
||||
SET(TEST_EXE "${TEST_FILE}.x")
|
||||
add_executable (${TEST_EXE} ${TEST_FILE})
|
||||
if(ENABLE_CUDA OR DEFINED CXXCUDA)
|
||||
set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA)
|
||||
target_compile_options (${TEST_EXE} PRIVATE -std=c++17 --expt-relaxed-constexpr)
|
||||
endif()
|
||||
# target_compile_features (${TEST_EXE} PUBLIC cxx_std_17)
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS")
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS})
|
||||
target_include_directories(${TEST_EXE} PRIVATE ${Boost_INCLUDE_DIRS})
|
||||
target_link_libraries (${TEST_EXE} PRIVATE ${Boost_LIBRARIES})
|
||||
target_link_directories (${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS})
|
||||
# if(NOT ENABLE_CUDA)
|
||||
# target_compile_options (${TEST_EXE} PRIVATE
|
||||
# -Werror -Wall -Wextra -fno-common
|
||||
# $<$<CXX_COMPILER_ID:GNU>:
|
||||
# -Wpedantic -Wformat-truncation -fstack-usage>#-Wconversion
|
||||
# $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:
|
||||
# -Wpedantic -Wmove>
|
||||
# $<$<CXX_COMPILER_ID:Intel>:
|
||||
# -wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings -Werror -diag-error:3846
|
||||
# >
|
||||
# $<$<CXX_COMPILER_ID:MSVC>:
|
||||
# /W4>)
|
||||
# endif()
|
||||
add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE})
|
||||
endforeach()
|
||||
|
|
@ -1,87 +0,0 @@
|
|||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../../../adaptors/cuda/thrust.hpp"
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
template<class T> void what(T&&) = delete;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(array){
|
||||
|
||||
{
|
||||
multi::thrust::cuda::array<double, 2> C({2, 3});
|
||||
|
||||
C[0][0] = 0. ;
|
||||
C[1][1] = 11.;
|
||||
|
||||
BOOST_TEST_REQUIRE( C[1][1] == 11. );
|
||||
}
|
||||
|
||||
{
|
||||
multi::array<double, 2> const H = {
|
||||
{00., 01., 02.},
|
||||
{10., 11., 12.},
|
||||
};
|
||||
|
||||
BOOST_TEST_REQUIRE( H[1][1] == 11. );
|
||||
|
||||
{
|
||||
multi::thrust::cuda::array<double, 2> C(H.extensions());
|
||||
BOOST_REQUIRE( C.num_elements() == H.num_elements() );
|
||||
|
||||
thrust::copy_n(H.data_elements(), H.num_elements(), C.data_elements());
|
||||
BOOST_TEST_REQUIRE( C[1][1] == 11. );
|
||||
BOOST_REQUIRE( C == H );
|
||||
}
|
||||
{
|
||||
multi::thrust::cuda::array<double, 2> C(H.extensions());
|
||||
BOOST_REQUIRE( C.num_elements() == H.num_elements() );
|
||||
|
||||
std::copy_n(H.data_elements(), H.num_elements(), C.data_elements());
|
||||
BOOST_TEST_REQUIRE( C[1][1] == 11. );
|
||||
BOOST_REQUIRE( C == H );
|
||||
}
|
||||
{
|
||||
multi::thrust::cuda::array<double, 2> C(H.extensions());
|
||||
BOOST_REQUIRE( C.num_elements() == H.num_elements() );
|
||||
|
||||
std::uninitialized_copy_n(H.data_elements(), H.num_elements(), C.data_elements());
|
||||
BOOST_TEST_REQUIRE( C[1][1] == 11. );
|
||||
BOOST_REQUIRE( C == H );
|
||||
}
|
||||
{
|
||||
multi::thrust::cuda::array<double, 2> C(H.extensions());
|
||||
BOOST_REQUIRE( C.num_elements() == H.num_elements() );
|
||||
|
||||
what( C.data_elements() );
|
||||
thrust::uninitialized_copy_n(H.data_elements(), H.num_elements(), C.data_elements());
|
||||
BOOST_TEST_REQUIRE( C[1][1] == 11. );
|
||||
BOOST_REQUIRE( C == H );
|
||||
}
|
||||
// {
|
||||
// multi::thrust::cuda::array<double, 2> C(H.extensions());
|
||||
// BOOST_REQUIRE( C.extensions() == H.extensions() );
|
||||
// thrust::copy_n(H.begin(), H.size(), C.begin());
|
||||
// BOOST_REQUIRE( C == H );
|
||||
// }
|
||||
// {
|
||||
// multi::thrust::cuda::array<double, 2> C(H.extensions());
|
||||
// BOOST_REQUIRE( C.extensions() == H.extensions() );
|
||||
// std::copy_n(H.begin(), H.size(), C.begin());
|
||||
// BOOST_REQUIRE( C == H );
|
||||
// }
|
||||
// {
|
||||
// multi::thrust::cuda::array<double, 2> C(H.extensions());
|
||||
// C = H;
|
||||
// BOOST_REQUIRE( C == H );
|
||||
// }
|
||||
// {
|
||||
// multi::thrust::cuda::array<double, 2> C = H;
|
||||
// BOOST_REQUIRE( C == H );
|
||||
// }
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
#include <thrust/device_vector.h>
|
||||
|
||||
int main(){
|
||||
// thrust::device_vector<int> D(5);
|
||||
// assert( D.size() == 5 );
|
||||
|
||||
// cudaDeviceSynchronize();
|
||||
std::allocator<int> alloc;
|
||||
int* p = alloc.allocate(10);
|
||||
p[0] = 2;
|
||||
return p[0] + 1;
|
||||
}
|
||||
|
Binary file not shown.
|
@ -1,43 +0,0 @@
|
|||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include <thrust/host_vector.h>
|
||||
#include <thrust/device_vector.h>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(vector){
|
||||
// H has storage for 4 integers
|
||||
thrust::host_vector<int> H(4);
|
||||
|
||||
// initialize individual elements
|
||||
H[0] = 14;
|
||||
H[1] = 20;
|
||||
H[2] = 38;
|
||||
H[3] = 46;
|
||||
|
||||
// H.size() returns the size of vector H
|
||||
BOOST_TEST_REQUIRE( H.size() == 4 );
|
||||
|
||||
// print contents of H
|
||||
BOOST_TEST_REQUIRE( H[2] == 38 );
|
||||
|
||||
// resize H
|
||||
H.resize(2);
|
||||
|
||||
BOOST_REQUIRE( H.size() == 2 );
|
||||
|
||||
// Copy host_vector H to device_vector D
|
||||
thrust::device_vector<int> D = H;
|
||||
|
||||
// f(D.data());
|
||||
|
||||
// elements of D can be modified
|
||||
D[0] = 99;
|
||||
D[1] = 88;
|
||||
|
||||
thrust::cuda::pointer<int> p = D.data();
|
||||
BOOST_REQUIRE( p[0] == 99 );
|
||||
|
||||
BOOST_TEST_REQUIRE( D[1] == 88 );
|
||||
}
|
||||
|
|
@ -1,739 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4;-*-
|
||||
$CXX $0 -o $0x -lcudart -lcufft `pkg-config --libs fftw3` -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_CUFFTW_HPP
|
||||
#define MULTI_ADAPTORS_CUFFTW_HPP
|
||||
|
||||
#include "../adaptors/../utility.hpp"
|
||||
#include "../adaptors/../array.hpp"
|
||||
#include "../adaptors/../config/NODISCARD.hpp"
|
||||
|
||||
#include "../adaptors/cuda.hpp"
|
||||
|
||||
#include<numeric>
|
||||
|
||||
#include<tuple> // std::apply
|
||||
#include<array>
|
||||
|
||||
#include<vector>
|
||||
|
||||
#include "../complex.hpp"
|
||||
|
||||
//#include<execution>
|
||||
#include<future>
|
||||
#include<cufft.h>
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace memory{
|
||||
namespace cuda{
|
||||
|
||||
#if 0
|
||||
template<class T1, class T1const, class T2, class T2const>
|
||||
auto copy(
|
||||
array_iterator<T1, 1, managed::ptr<T1const>> first,
|
||||
array_iterator<T1, 1, managed::ptr<T1const>> last,
|
||||
array_iterator<T2, 1, managed::ptr<T2const>> d_first
|
||||
){
|
||||
assert(first.stride() == last.stride());
|
||||
auto s = cudaMemcpy2D(raw_pointer_cast(d_first.data()), d_first.stride()*sizeof(T2), raw_pointer_cast(first.data()), first.stride()*sizeof(T2), sizeof(T2), last - first, cudaMemcpyDefault);
|
||||
switch(s){
|
||||
case cudaSuccess: break;
|
||||
case cudaErrorInvalidValue: assert(0);
|
||||
case cudaErrorInvalidPitchValue: assert(0);
|
||||
case cudaErrorInvalidDevicePointer: assert(0);
|
||||
case cudaErrorInvalidMemcpyDirection: assert(0);
|
||||
default: assert(0); // unknown error
|
||||
}
|
||||
return d_first + (last - first);
|
||||
}
|
||||
#endif
|
||||
|
||||
}}}}
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace cufft{
|
||||
|
||||
class sign{
|
||||
int impl_;
|
||||
public:
|
||||
sign() = default;
|
||||
constexpr sign(int i) : impl_{i}{}
|
||||
constexpr operator int() const{return impl_;}
|
||||
};
|
||||
|
||||
constexpr sign forward{CUFFT_FORWARD};
|
||||
constexpr sign none{0};
|
||||
constexpr sign backward{CUFFT_INVERSE};
|
||||
|
||||
static_assert(forward != none and none != backward and backward != forward, "!");
|
||||
|
||||
class plan{
|
||||
using complex_type = cufftDoubleComplex;
|
||||
complex_type const* idata_ = nullptr;
|
||||
complex_type* odata_ = nullptr;
|
||||
int direction_ = 0;
|
||||
cufftHandle h_;
|
||||
plan() = default;
|
||||
plan(plan const&) = delete;
|
||||
plan(plan&& other) :
|
||||
idata_{std::exchange(other.idata_, nullptr)},
|
||||
odata_{std::exchange(other.odata_, nullptr)},
|
||||
direction_{std::exchange(other.direction_, 0)},
|
||||
h_{std::exchange(other.h_, {})}
|
||||
{} // needed in <=C++14 for return
|
||||
void ExecZ2Z(complex_type const* idata, complex_type* odata, int direction) const{
|
||||
++tl_execute_count;
|
||||
// assert(idata_ and odata_);
|
||||
// assert(direction_!=0);
|
||||
cufftResult r = ::cufftExecZ2Z(h_, const_cast<complex_type*>(idata), odata, direction);
|
||||
switch(r){
|
||||
case CUFFT_SUCCESS : break;// "cuFFT successfully executed the FFT plan."
|
||||
case CUFFT_INVALID_PLAN : throw std::runtime_error{"The plan parameter is not a valid handle."};
|
||||
// case CUFFT_ALLOC_FAILED : throw std::runtime_error{"CUFFT failed to allocate GPU memory."};
|
||||
// case CUFFT_INVALID_TYPE : throw std::runtime_error{"The user requests an unsupported type."};
|
||||
case CUFFT_INVALID_VALUE : throw std::runtime_error{"At least one of the parameters idata, odata, and direction is not valid."};
|
||||
case CUFFT_INTERNAL_ERROR : throw std::runtime_error{"Used for all internal driver errors."};
|
||||
case CUFFT_EXEC_FAILED : throw std::runtime_error{"CUFFT failed to execute an FFT on the GPU."};
|
||||
case CUFFT_SETUP_FAILED : throw std::runtime_error{"The cuFFT library failed to initialize."};
|
||||
// case CUFFT_INVALID_SIZE : throw std::runtime_error{"The user specifies an unsupported FFT size."};
|
||||
// case CUFFT_UNALIGNED_DATA : throw std::runtime_error{"Unaligned data."};
|
||||
// case CUFFT_INCOMPLETE_PARAMETER_LIST: throw std::runtime_error{"Incomplete parameter list."};
|
||||
// case CUFFT_INVALID_DEVICE : throw std::runtime_error{"Invalid device."};
|
||||
// case CUFFT_PARSE_ERROR : throw std::runtime_error{"Parse error."};
|
||||
// case CUFFT_NO_WORKSPACE : throw std::runtime_error{"No workspace."};
|
||||
// case CUFFT_NOT_IMPLEMENTED: throw std::runtime_error{"Not implemented."};
|
||||
// case CUFFT_LICENSE_ERROR : throw std::runtime_error{"License error."};
|
||||
// case CUFFT_NOT_SUPPORTED : throw std::runtime_error{"CUFFT_NOT_SUPPORTED"};
|
||||
default : throw std::runtime_error{"cufftExecZ2Z unknown error"};
|
||||
}
|
||||
// if(cudaDeviceSynchronize() != cudaSuccess) throw std::runtime_error{"Cuda error: Failed to synchronize"};
|
||||
}
|
||||
void swap(plan& other){
|
||||
using std::swap;
|
||||
swap(idata_, other.idata_);
|
||||
swap(odata_, other.odata_);
|
||||
swap(direction_, other.direction_);
|
||||
swap(h_, other.h_);
|
||||
}
|
||||
public:
|
||||
thread_local static int tl_execute_count;
|
||||
plan& operator=(plan other){swap(other); return *this;}
|
||||
void operator()() const{ExecZ2Z(idata_, odata_, direction_);}
|
||||
template<class I, class O>
|
||||
O&& execute_dft(I&& i, O&& o, int direction) const{
|
||||
ExecZ2Z(
|
||||
const_cast<complex_type*>(reinterpret_cast<complex_type const*>(base(i))),
|
||||
const_cast<complex_type*>(reinterpret_cast<complex_type const*>(base(o))),
|
||||
direction
|
||||
);
|
||||
return std::forward<O>(o);
|
||||
}
|
||||
template<class I, class O>
|
||||
void execute_dft(I&& i, O&& o) const{execute_dft(std::forward<I>(i), std::forward<O>(o), direction_);}
|
||||
~plan(){if(h_) cufftDestroy(h_);}
|
||||
using size_type = int;
|
||||
using ssize_type = int;
|
||||
|
||||
template<class I, class O, //std::enable_if_t<(I::dimensionality < 4), int> =0,
|
||||
dimensionality_type D = I::dimensionality,
|
||||
typename = decltype(raw_pointer_cast(base(std::declval<I const&>())), reinterpret_cast<complex_type* >(raw_pointer_cast(base(std::declval<O&>()))))
|
||||
>
|
||||
plan(I const& i, O&& o, sign s) :
|
||||
idata_{ reinterpret_cast<complex_type const*>(raw_pointer_cast(base(i))) },
|
||||
odata_{const_cast<complex_type*>(reinterpret_cast<complex_type* >(raw_pointer_cast(base(o))))},
|
||||
direction_{s}
|
||||
{
|
||||
assert( I::dimensionality < 4 );
|
||||
assert( CUFFT_FORWARD == s or CUFFT_INVERSE == s or s == 0 );
|
||||
assert( sizes(i) == sizes(o) );
|
||||
|
||||
// using std::experimental::apply;// using std::experimental::make_array;
|
||||
auto ion = std::apply([](auto... t){return std::array< size_type, D>{static_cast< size_type>(t)...};}, sizes (i));
|
||||
auto istrides = std::apply([](auto... t){return std::array<ssize_type, D>{static_cast<ssize_type>(t)...};}, strides(i));
|
||||
auto ostrides = std::apply([](auto... t){return std::array<ssize_type, D>{static_cast<ssize_type>(t)...};}, strides(o));
|
||||
|
||||
std::array<std::tuple<int, int, int>, I::dimensionality> ssn;
|
||||
for(std::size_t i = 0; i != ssn.size(); ++i) ssn[i] = std::make_tuple(istrides[i], ostrides[i], ion[i]);
|
||||
std::sort(ssn.begin(), ssn.end(), std::greater<>{});
|
||||
|
||||
for(std::size_t i = 0; i != ssn.size(); ++i){
|
||||
istrides[i] = std::get<0>(ssn[i]);
|
||||
ostrides[i] = std::get<1>(ssn[i]);
|
||||
ion[i] = std::get<2>(ssn[i]);
|
||||
}// = std::tuple<int, int, int>(istrides[i], ostrides[i], ion[i]);
|
||||
|
||||
int istride = istrides.back();
|
||||
auto inembed = istrides; inembed.fill(0);
|
||||
int ostride = ostrides.back();
|
||||
auto onembed = ostrides; onembed.fill(0);
|
||||
for(std::size_t i = 1; i != onembed.size(); ++i){
|
||||
assert(ostrides[i-1] >= ostrides[i]); // otherwise ordering is incompatible
|
||||
assert(ostrides[i-1]%ostrides[i]==0);
|
||||
onembed[i]=ostrides[i-1]/ostrides[i]; // assert( onembed[i] <= ion[i] );
|
||||
assert(istrides[i-1]%istrides[i]==0);
|
||||
inembed[i]=istrides[i-1]/istrides[i]; // assert( inembed[i] <= ion[i] );
|
||||
}
|
||||
|
||||
direction_ = s;
|
||||
idata_ = reinterpret_cast<complex_type const*>(raw_pointer_cast(base(i))) ;
|
||||
odata_ = const_cast<complex_type*>(reinterpret_cast<complex_type* >(raw_pointer_cast(base(o))));
|
||||
|
||||
switch(::cufftPlanMany(
|
||||
/*cufftHandle *plan*/ &h_,
|
||||
/*int rank*/ ion.size(),
|
||||
/*int *n*/ ion.data(), // /*NX*/ last - first,
|
||||
/*int *inembed*/ inembed.data(),
|
||||
/*int istride*/ istride,
|
||||
/*int idist*/ 1, //stride(first),
|
||||
/*int *onembed*/ onembed.data(),
|
||||
/*int ostride*/ ostride,
|
||||
/*int odist*/ 1, //stride(d_first),
|
||||
/*cufftType type*/ CUFFT_Z2Z,
|
||||
/*int batch*/ 1 //BATCH
|
||||
)){
|
||||
case CUFFT_SUCCESS : break;// "cuFFT successfully executed the FFT plan."
|
||||
case CUFFT_ALLOC_FAILED : throw std::runtime_error{"CUFFT failed to allocate GPU memory."};
|
||||
case CUFFT_INVALID_VALUE : throw std::runtime_error{"At least one of the parameters idata, odata, and direction is not valid."};
|
||||
case CUFFT_INTERNAL_ERROR : throw std::runtime_error{"Used for all internal driver errors."};
|
||||
case CUFFT_SETUP_FAILED : throw std::runtime_error{"The cuFFT library failed to initialize."};
|
||||
case CUFFT_INVALID_SIZE : throw std::runtime_error{"The user specifies an unsupported FFT size."};
|
||||
default : throw std::runtime_error{"cufftPlanMany unknown error"};
|
||||
}
|
||||
}
|
||||
#ifndef __INTEL_COMPILER
|
||||
template<class It1, class It2, dimensionality_type D = decltype(*It1{})::dimensionality>
|
||||
static auto many(It1 first, It1 last, It2 d_first, int sign = 0, unsigned = 0)
|
||||
->std::decay_t<decltype(const_cast<complex_type*>(reinterpret_cast<complex_type*>(raw_pointer_cast(base(d_first)))), std::declval<plan>())>
|
||||
#else
|
||||
template<class It1, class It2,
|
||||
dimensionality_type D = decltype(*It1{})::dimensionality,
|
||||
typename TT = decltype(const_cast<complex_type*>(reinterpret_cast<complex_type*>(It2{}.base().raw_pointer_cast())))
|
||||
>
|
||||
static auto many(It1 first, It1 last, It2 d_first, int sign = 0, unsigned = 0)
|
||||
#endif
|
||||
{
|
||||
assert( CUFFT_FORWARD == sign or CUFFT_INVERSE == sign or sign == 0 );
|
||||
assert(sizes(*first)==sizes(*d_first));
|
||||
auto ion = std::apply([](auto... t){return std::array< size_type, D>{static_cast< size_type>(t)...};}, sizes (* first));
|
||||
|
||||
assert(strides(*first) == strides(*last));
|
||||
auto istrides = std::apply([](auto... t){return std::array<ssize_type, D>{static_cast<ssize_type>(t)...};}, strides(* first));
|
||||
auto ostrides = std::apply([](auto... t){return std::array<ssize_type, D>{static_cast<ssize_type>(t)...};}, strides(*d_first));
|
||||
|
||||
std::array<std::tuple<int, int, int>, std::decay_t<decltype(*It1{})>::dimensionality> ssn;
|
||||
for(std::size_t i = 0; i != ssn.size(); ++i) ssn[i] = std::make_tuple(istrides[i], ostrides[i], ion[i]);
|
||||
std::sort(ssn.begin(), ssn.end(), std::greater<>{});
|
||||
|
||||
for(std::size_t i = 0; i != ssn.size(); ++i){
|
||||
istrides[i] = std::get<0>(ssn[i]);
|
||||
ostrides[i] = std::get<1>(ssn[i]);
|
||||
ion[i] = std::get<2>(ssn[i]);
|
||||
}
|
||||
|
||||
int istride = istrides.back();
|
||||
auto inembed = istrides; inembed.fill(0);
|
||||
int ostride = ostrides.back();
|
||||
auto onembed = ostrides; onembed.fill(0);
|
||||
for(std::size_t i = 1; i != onembed.size(); ++i){
|
||||
assert(ostrides[i-1] >= ostrides[i]); // otherwise ordering is incompatible
|
||||
assert(ostrides[i-1]%ostrides[i]==0);
|
||||
onembed[i]=ostrides[i-1]/ostrides[i]; // assert( onembed[i] <= ion[i] );
|
||||
assert(istrides[i-1]%istrides[i]==0);
|
||||
inembed[i]=istrides[i-1]/istrides[i]; // assert( inembed[i] <= ion[i] );
|
||||
}
|
||||
|
||||
plan ret;
|
||||
ret.direction_ = sign;
|
||||
ret.idata_ = reinterpret_cast<complex_type const*>( first.base().raw_pointer_cast()) ;
|
||||
ret.odata_ = const_cast<complex_type*>(reinterpret_cast<complex_type* >(d_first.base().raw_pointer_cast()));
|
||||
|
||||
switch(::cufftPlanMany(
|
||||
/*cufftHandle *plan*/ &ret.h_,
|
||||
/*int rank*/ ion.size(),
|
||||
/*int *n*/ ion.data(), // /*NX*/ last - first,
|
||||
/*int *inembed*/ inembed.data(),
|
||||
/*int istride*/ istride,
|
||||
/*int idist*/ stride(first),
|
||||
/*int *onembed*/ onembed.data(),
|
||||
/*int ostride*/ ostride,
|
||||
/*int odist*/ stride(d_first),
|
||||
/*cufftType type*/ CUFFT_Z2Z,
|
||||
/*int batch*/ last - first //BATCH
|
||||
)){
|
||||
case CUFFT_SUCCESS : break;// "cuFFT successfully executed the FFT plan."
|
||||
// case CUFFT_INVALID_PLAN : throw std::runtime_error{"The plan parameter is not a valid handle."};
|
||||
case CUFFT_ALLOC_FAILED : throw std::runtime_error{"CUFFT failed to allocate GPU memory."};
|
||||
// case CUFFT_INVALID_TYPE : throw std::runtime_error{"The user requests an unsupported type."};
|
||||
case CUFFT_INVALID_VALUE : throw std::runtime_error{"At least one of the parameters idata, odata, and direction is not valid."};
|
||||
case CUFFT_INTERNAL_ERROR : throw std::runtime_error{"Used for all internal driver errors."};
|
||||
// case CUFFT_EXEC_FAILED : throw std::runtime_error{"CUFFT failed to execute an FFT on the GPU."};
|
||||
case CUFFT_SETUP_FAILED : throw std::runtime_error{"The cuFFT library failed to initialize."};
|
||||
case CUFFT_INVALID_SIZE : throw std::runtime_error{"The user specifies an unsupported FFT size."};
|
||||
// case CUFFT_UNALIGNED_DATA : throw std::runtime_error{"Unaligned data."};
|
||||
// case CUFFT_INCOMPLETE_PARAMETER_LIST: throw std::runtime_error{"Incomplete parameter list."};
|
||||
// case CUFFT_INVALID_DEVICE : throw std::runtime_error{"Invalid device."};
|
||||
// case CUFFT_PARSE_ERROR : throw std::runtime_error{"Parse error."};
|
||||
// case CUFFT_NO_WORKSPACE : throw std::runtime_error{"No workspace."};
|
||||
// case CUFFT_NOT_IMPLEMENTED: throw std::runtime_error{"Not implemented."};
|
||||
// case CUFFT_LICENSE_ERROR : throw std::runtime_error{"License error."};
|
||||
// case CUFFT_NOT_SUPPORTED : throw std::runtime_error{"CUFFT_NOT_SUPPORTED"};
|
||||
default : throw std::runtime_error{"cufftPlanMany unknown error"};
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
thread_local int plan::tl_execute_count = 0;
|
||||
|
||||
template<typename In, class Out>
|
||||
auto dft(In const& i, Out&& o, int s)
|
||||
->decltype(cufft::plan{i, o, s}(), std::forward<Out>(o)){
|
||||
return cufft::plan{i, o, s}(), std::forward<Out>(o);}
|
||||
|
||||
template<typename In, typename R = multi::array<typename In::element_type, In::dimensionality, decltype(get_allocator(std::declval<In>()))>>
|
||||
NODISCARD("when first argument is const")
|
||||
R dft(In const& i, int s){
|
||||
static_assert(std::is_trivially_default_constructible<typename In::element_type>{}, "!");
|
||||
R ret(extensions(i), get_allocator(i));
|
||||
cufft::dft(i, ret, s);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef __INTEL_COMPILER
|
||||
template<typename It1, typename It2>
|
||||
auto many_dft(It1 first, It1 last, It2 d_first, sign s)
|
||||
->decltype(plan::many(first, last, d_first, s)(), d_first + (last - first)){
|
||||
return plan::many(first, last, d_first, s)(), d_first + (last - first);}
|
||||
#else
|
||||
template<typename It1, typename It2>
|
||||
auto many_dft(It1 first, It1 last, It2 d_first, sign s)
|
||||
->decltype(plan::many(first, last, d_first, s)(), d_first + (last - first)){
|
||||
return plan::many(first, last, d_first, s)(), d_first + (last - first);}
|
||||
#endif
|
||||
|
||||
template<typename In, class Out, std::size_t D = In::dimensionality, std::enable_if_t<(D==1), int> = 0>
|
||||
Out&& dft(std::array<bool, D> which, In const& i, Out&& o, int s){
|
||||
if(which[0]) return cufft::dft(i, std::forward<Out>(o), s);
|
||||
else return std::forward<Out>(std::forward<Out>(o) = i);
|
||||
}
|
||||
|
||||
template <class Array, std::size_t... Ns>
|
||||
constexpr auto array_tail_impl(Array const& t, std::index_sequence<Ns...>){
|
||||
return std::array<typename Array::value_type, std::tuple_size<Array>{} - 1>{std::get<Ns + 1>(t)...};
|
||||
}
|
||||
|
||||
template<class Array>
|
||||
constexpr auto array_tail(Array const& t)
|
||||
->decltype(array_tail_impl(t, std::make_index_sequence<std::tuple_size<Array>{} - 1>())){
|
||||
return array_tail_impl(t, std::make_index_sequence<std::tuple_size<Array>{} - 1>());}
|
||||
|
||||
template<typename In, class Out, std::size_t D = In::dimensionality, std::enable_if_t<(D>1), int> = 0>
|
||||
auto dft(std::array<bool, D> which, In const& i, Out&& o, int s)
|
||||
->decltype(many_dft(i.begin(), i.end(), o.begin(), s),std::forward<Out>(o))
|
||||
{
|
||||
assert(extension(i) == extension(o));
|
||||
auto ff = std::find(begin(which)+1, end(which), false);
|
||||
if(which[0] == true){
|
||||
if(ff==end(which)) cufft::dft(i, std::forward<Out>(o), s);
|
||||
else{
|
||||
auto n = ff - which.begin();
|
||||
std::rotate(begin(which), ff, end(which));
|
||||
dft(which, i<<n, o<<n, s);
|
||||
}
|
||||
}else if(which[0]==false){
|
||||
if(D==1 or std::none_of(begin(which)+1, end(which), [](auto e){return e;})){
|
||||
if(base(o) != base(i)) std::forward<Out>(o) = i;
|
||||
else if(o.layout() != i.layout()) std::forward<Out>(o) = +i;
|
||||
}
|
||||
else if(ff==end(which)) many_dft(i.begin(), i.end(), o.begin(), s);
|
||||
else{
|
||||
std::array<bool, D-1> tail = array_tail(which);
|
||||
if(which[1] == false and i.is_flattable() and o.is_flattable()) cufft::dft(tail, i.flatted(), o.flatted(), s);
|
||||
else{
|
||||
auto d_min = 0; auto n_min = size(i);
|
||||
for(auto d = 0; d != D - 1; ++d){
|
||||
if((size(i<<d) < n_min) and (tail[d]==false)){
|
||||
n_min = size(i<<d);
|
||||
d_min = d;
|
||||
}
|
||||
}
|
||||
if( d_min!=0 ){
|
||||
std::rotate(which.begin(), which.begin()+d_min, which.end());
|
||||
dft(which, i<<d_min, o<<d_min, s);
|
||||
}else
|
||||
{
|
||||
if(base(i) == base(o) and i.layout() != o.layout()){
|
||||
auto const tmp = +i;
|
||||
for(auto idx : extension(i)) cufft::dft(tail, tmp[idx], o[idx], s);
|
||||
}else for(auto idx : extension(i)) cufft::dft(tail, i[idx], o[idx], s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::forward<Out>(o);
|
||||
}
|
||||
|
||||
template<typename In, std::size_t D = In::dimensionality>
|
||||
NODISCARD("when passing a const argument")
|
||||
auto dft(std::array<bool, D> which, In const& i, int sign)->std::decay_t<decltype(
|
||||
dft(which, i, typename In::decay_type(extensions(i), get_allocator(i)), sign))>{return
|
||||
dft(which, i, typename In::decay_type(extensions(i), get_allocator(i)), sign);}
|
||||
|
||||
template<typename In, std::size_t D = In::dimensionality>
|
||||
auto dft(std::array<bool, D> which, In&& i, int sign)
|
||||
->decltype(dft(which, i, i, sign), std::forward<In>(i)){
|
||||
return dft(which, i, i, sign), std::forward<In>(i);}
|
||||
|
||||
//template<typename... A> auto dft_forward(A&&... a)
|
||||
//->decltype(cufft::dft(std::forward<A>(a)..., cufft::forward)){
|
||||
// return cufft::dft(std::forward<A>(a)..., cufft::forward);}
|
||||
|
||||
template<typename Array, typename A> NODISCARD("when passing a const argument")
|
||||
auto dft_forward(Array arr, A const& a)
|
||||
->decltype(cufft::dft(arr, a, cufft::forward)){
|
||||
return cufft::dft(arr, a, cufft::forward);}
|
||||
|
||||
template<typename Array, dimensionality_type D> NODISCARD("when passing a const argument")
|
||||
auto dft_forward(Array arr, multi::cuda::array<std::complex<double>, D>&& a)
|
||||
->decltype(cufft::dft(arr, a, cufft::forward), multi::cuda::array<std::complex<double>, D>{}){//assert(0);
|
||||
return cufft::dft(arr, a, cufft::forward), std::move(a);}
|
||||
|
||||
template<typename A> NODISCARD("when passing a const argument")
|
||||
auto dft_forward(A const& a)
|
||||
->decltype(cufft::dft(a, cufft::forward)){
|
||||
return cufft::dft(a, cufft::forward);}
|
||||
|
||||
template<typename... A> auto dft_backward(A&&... a)
|
||||
->decltype(cufft::dft(std::forward<A>(a)..., cufft::backward)){
|
||||
return cufft::dft(std::forward<A>(a)..., cufft::backward);}
|
||||
|
||||
template<typename Array, typename A> NODISCARD("when passing a const argument")
|
||||
auto dft_backward(Array arr, A const& a)
|
||||
->decltype(cufft::dft(arr, a, cufft::backward)){
|
||||
return cufft::dft(arr, a, cufft::backward);}
|
||||
|
||||
template<typename A> NODISCARD("when passing a const argument")
|
||||
auto dft_backward(A const& a)
|
||||
->decltype(cufft::dft(a, cufft::backward)){
|
||||
return cufft::dft(a, cufft::backward);}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
|
||||
#if not __INCLUDE_LEVEL__ // TEST BELOW
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuFFT adaptor"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include <boost/timer/timer.hpp>
|
||||
|
||||
#include "../adaptors/cuda.hpp"
|
||||
#include "../adaptors/fftw.hpp"
|
||||
#include "../adaptors/cufft.hpp"
|
||||
|
||||
//#include "../adaptors/fft.hpp"
|
||||
|
||||
#include<complex>
|
||||
#include<thrust/complex.h>
|
||||
#include "../complex.hpp"
|
||||
|
||||
#include<cuda_runtime.h> // cudaDeviceSynchronize
|
||||
|
||||
#include<iostream>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
using complex = std::complex<double>;
|
||||
namespace utf = boost::unit_test;
|
||||
|
||||
|
||||
template <class T>
|
||||
__attribute__((always_inline)) inline void DoNotOptimize(const T &value) {
|
||||
asm volatile("" : "+m"(const_cast<T &>(value)));
|
||||
}
|
||||
|
||||
struct watch : private std::chrono::high_resolution_clock{
|
||||
std::string label_; time_point start_;
|
||||
watch(std::string label ="") : label_{label}, start_{}{
|
||||
cudaDeviceSynchronize();
|
||||
start_ = now();
|
||||
}
|
||||
~watch(){
|
||||
cudaDeviceSynchronize();
|
||||
auto const count = std::chrono::duration<double>(now() - start_).count();
|
||||
std::cerr<< label_<<": "<< count <<" sec"<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
constexpr complex I{0, 1};
|
||||
|
||||
#if 1
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cufft_2D, *boost::unit_test::tolerance(0.0001)){
|
||||
|
||||
multi::array<complex, 2> const in_cpu = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
multi::array<complex, 2> fw_cpu(extensions(in_cpu));
|
||||
multi::fftw::dft(in_cpu, fw_cpu, multi::fftw::forward);
|
||||
|
||||
multi::cuda::array<complex, 2> const in_gpu = in_cpu;
|
||||
multi::cuda::array<complex, 2> fw_gpu(extensions(in_gpu));
|
||||
multi::cufft::dft(in_gpu, fw_gpu, multi::cufft::forward);
|
||||
|
||||
BOOST_TEST( imag(static_cast<complex>(fw_gpu[3][2]) - fw_cpu[3][2]) == 0. );
|
||||
|
||||
auto fw2_gpu = multi::cufft::dft(in_gpu, multi::cufft::forward);
|
||||
BOOST_TEST( imag(static_cast<complex>(fw2_gpu[3][1]) - fw_cpu[3][1]) == 0. );
|
||||
|
||||
multi::cuda::managed::array<complex, 2> const in_mng = in_cpu;
|
||||
multi::cuda::managed::array<complex, 2> fw_mng(extensions(in_gpu));
|
||||
multi::cufft::dft(in_mng, fw_mng, multi::cufft::forward);
|
||||
|
||||
BOOST_TEST( imag(fw_mng[3][2] - fw_cpu[3][2]) == 0. );
|
||||
|
||||
auto fw2_mng = multi::fftw::dft(in_mng, multi::fftw::forward);
|
||||
BOOST_TEST( imag(fw2_mng[3][1] - fw_cpu[3][1]) == 0. );
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cufft_3D_timing, *boost::unit_test::tolerance(0.0001)){
|
||||
|
||||
auto x = std::make_tuple(300, 300, 300);
|
||||
{
|
||||
multi::array<complex, 3> const in_cpu(x, 10.);
|
||||
BOOST_ASSERT( in_cpu.num_elements()*sizeof(complex) < 2e9 );
|
||||
multi::array<complex, 3> fw_cpu(extensions(in_cpu), 99.);
|
||||
{
|
||||
// boost::timer::auto_cpu_timer t; // 1.041691s wall, 1.030000s user + 0.000000s system = 1.030000s CPU (98.9%)
|
||||
multi::fftw::dft(in_cpu, fw_cpu, multi::fftw::forward);
|
||||
BOOST_TEST( fw_cpu[8][9][10] != 99. );
|
||||
}
|
||||
}
|
||||
{
|
||||
multi::cuda::array<complex, 3> const in_gpu(x, 10.);
|
||||
multi::cuda::array<complex, 3> fw_gpu(extensions(in_gpu), 99.);
|
||||
{
|
||||
// boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%)
|
||||
multi::cufft::dft(in_gpu, fw_gpu, multi::fftw::forward);
|
||||
|
||||
BOOST_TEST( static_cast<complex>(fw_gpu[8][9][10]) != 99. );
|
||||
}
|
||||
}
|
||||
{
|
||||
multi::cuda::managed::array<complex, 3> const in_gpu(x, 10.);
|
||||
multi::cuda::managed::array<complex, 3> fw_gpu(extensions(in_gpu), 99.);
|
||||
{
|
||||
// boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%)
|
||||
multi::cufft::dft(in_gpu, fw_gpu, multi::cufft::forward);
|
||||
// BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. );
|
||||
}
|
||||
{
|
||||
// boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%)
|
||||
multi::cufft::dft(in_gpu, fw_gpu, multi::cufft::forward);
|
||||
// BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cufft_combinations, *utf::tolerance(0.00001)){
|
||||
|
||||
auto const in = []{
|
||||
multi::array<complex, 4> ret({32, 90, 98, 96});
|
||||
std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
[](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};}
|
||||
);
|
||||
return ret;
|
||||
}();
|
||||
std::clog<<"memory size "<< in.num_elements()*sizeof(complex)/1e6 <<" MB\n";
|
||||
|
||||
multi::cuda::array<complex, 4> const in_gpu = in;
|
||||
multi::cuda::managed::array<complex, 4> const in_mng = in;
|
||||
|
||||
using std::clog;
|
||||
for(auto c : std::vector<std::array<bool, 4>>{
|
||||
{false, true , true , true },
|
||||
{false, true , true , false},
|
||||
{true , false, false, false},
|
||||
{true , true , false, false},
|
||||
{false, false, true , false},
|
||||
{false, false, false, false},
|
||||
}){
|
||||
std::clog<<"case "; copy(begin(c), end(c), std::ostream_iterator<bool>{std::clog,", "}); std::clog<<std::endl;
|
||||
multi::array<complex, 4> out = in;
|
||||
multi::array<complex, 4> in_rw = in;
|
||||
[&, _ = watch{"cpu_opl "}]{
|
||||
multi::fftw::dft_forward(c, in, out);
|
||||
}();
|
||||
[&, _ = watch{"cpu_ipl "}]{
|
||||
multi::fftw::dft(c, in_rw, multi::fftw::forward);
|
||||
BOOST_TEST( abs( static_cast<multi::complex<double>>(in_rw[5][4][3][1]) - multi::complex<double>(out[5][4][3][1]) ) == 0. );
|
||||
}();
|
||||
{
|
||||
multi::array<complex, 4> in_rw2 = in;
|
||||
[&, _ = watch{"cpu_mov "}]{
|
||||
multi::array<complex, 4> const out_mov = multi::fftw::dft_forward(c, std::move(in_rw2));
|
||||
// what(out_mov);
|
||||
BOOST_TEST( abs( static_cast<multi::complex<double>>(out_mov[5][4][3][1]) - multi::complex<double>(out[5][4][3][1]) ) == 0. );
|
||||
BOOST_REQUIRE( is_empty(in_rw2) );
|
||||
BOOST_REQUIRE( extensions(out_mov) == extensions(in) );
|
||||
}();
|
||||
}
|
||||
|
||||
|
||||
[&, _ = watch{"cpu_new "}]{
|
||||
auto const out_cpy = multi::fftw::dft_forward(c, in);
|
||||
BOOST_TEST( abs( static_cast<multi::complex<double>>(out_cpy[5][4][3][1]) - multi::complex<double>(out[5][4][3][1]) ) == 0. );
|
||||
}();
|
||||
multi::cuda::array<complex, 4> out_gpu(extensions(in_gpu));
|
||||
[&, _ = watch{"gpu_opl "}]{
|
||||
multi::cufft::dft(c, in_gpu , out_gpu, multi::cufft::forward);
|
||||
BOOST_TEST( abs( static_cast<complex>(out_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. );
|
||||
}();
|
||||
{
|
||||
multi::cuda::array<complex, 4> in_rw_gpu = in_gpu;
|
||||
[&, _ = watch{"gpu_ipl "}]{
|
||||
multi::cufft::dft(c, in_rw_gpu, multi::cufft::forward);
|
||||
BOOST_TEST( abs( static_cast<complex>(in_rw_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. );
|
||||
}();
|
||||
}
|
||||
{
|
||||
multi::cuda::array<complex, 4> in_rw_gpu = in_gpu;
|
||||
[&, _ = watch{"gpu_mov "}]{
|
||||
multi::cuda::array<complex, 4> const out_mov = multi::cufft::dft_forward(c, std::move(in_rw_gpu));
|
||||
// BOOST_REQUIRE( in_rw_gpu.empty() );
|
||||
// BOOST_TEST( abs( static_cast<complex>(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. );
|
||||
}();
|
||||
}
|
||||
{
|
||||
multi::cuda::array<complex, 4> in_rw_gpu = in_gpu;
|
||||
[&, _ = watch{"gpu_mov "}]{
|
||||
multi::cuda::array<complex, 4> out_mov = std::move(in_rw_gpu);
|
||||
multi::cufft::dft(c, out_mov, multi::cufft::forward);
|
||||
// BOOST_REQUIRE( in_rw_gpu.empty() );
|
||||
// BOOST_TEST( abs( static_cast<complex>(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. );
|
||||
}();
|
||||
}
|
||||
cudaDeviceSynchronize();
|
||||
[&, _ = watch{"gpu_new "}]{
|
||||
multi::cuda::array<complex, 4> const out_cpy = multi::cufft::dft(c, in_gpu, multi::cufft::forward);
|
||||
}();
|
||||
multi::cuda::managed::array<complex, 4> out_mng(extensions(in_mng));
|
||||
[&, _ = watch{"mng_cld "}]{
|
||||
multi::cufft::dft(c, in_mng, out_mng, multi::cufft::forward);
|
||||
BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}();
|
||||
[&, _ = watch{"mng_hot "}]{
|
||||
multi::cufft::dft(c, in_mng , out_mng, multi::cufft::forward);
|
||||
BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}();
|
||||
[&, _ = watch{"mng_new "}]{
|
||||
auto const out_mng = multi::cufft::dft(c, in_mng, multi::cufft::forward);
|
||||
BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}();
|
||||
}
|
||||
std::clog<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cufft_many_3D, *utf::tolerance(0.00001) ){
|
||||
|
||||
auto const in_cpu = []{
|
||||
multi::array<complex, 4> ret({45, 18, 32, 16});
|
||||
std::generate(
|
||||
ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
[](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};}
|
||||
);
|
||||
return ret;
|
||||
}();
|
||||
|
||||
multi::cuda::array<complex, 4> const in = in_cpu;
|
||||
multi::cuda::array<complex, 4> out(extensions(in));
|
||||
|
||||
#if 0
|
||||
multi::cufft::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), +1);
|
||||
|
||||
multi::array<complex, 4> out_cpu(extensions(in));
|
||||
multi::fft::many_dft(begin(unrotated(in_cpu)), end(unrotated(in_cpu)), begin(unrotated(out_cpu)), +1);
|
||||
|
||||
BOOST_TEST( imag( static_cast<complex>(out[5][4][3][2]) - out_cpu[5][4][3][2]) == 0. );
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cufft_4D, *utf::tolerance(0.00001)){
|
||||
auto const in = []{
|
||||
multi::array<complex, 3> ret({10, 10, 10});
|
||||
std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
[](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};}
|
||||
);
|
||||
return ret;
|
||||
}();
|
||||
|
||||
multi::array<complex, 3> out(extensions(in));
|
||||
// multi::fftw::dft({true, false, true}, in, out, multi::fftw::forward);
|
||||
multi::fft::many_dft(begin(in<<1), end(in<<1), begin(out<<1), multi::fftw::forward);
|
||||
|
||||
multi::cuda::array<complex, 3> in_gpu = in;
|
||||
multi::cuda::array<complex, 3> out_gpu(extensions(in));
|
||||
|
||||
// multi::cufft::dft({true, false, true}, in_gpu, out_gpu, multi::fft::forward);//multi::cufft::forward);
|
||||
multi::cufft::many_dft(begin(in_gpu<<1), end(in_gpu<<1), begin(out_gpu<<1), multi::fftw::forward);
|
||||
BOOST_TEST( imag( static_cast<complex>(out_gpu[5][4][3]) - out[5][4][3]) == 0. );
|
||||
}
|
||||
|
||||
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(cu
|
||||
|
||||
#if 0
|
||||
BOOST_AUTO_TEST_CASE(cufft_4D){
|
||||
auto const in = []{
|
||||
multi::array<complex, 3> ret({10, 10, 10});
|
||||
ret[2][3][4] = 99.;
|
||||
return ret;
|
||||
}();
|
||||
multi::array<complex, 3> out(extensions(in));
|
||||
|
||||
multi::fftw::dft({true, true, false}, in, out, multi::fftw::forward);
|
||||
|
||||
// auto fwd = multi::fftw::dft({true, true, true, true}, in, out, multi::fftw::forward);
|
||||
// BOOST_REQUIRE(in[2][3][4][5] == 99.);
|
||||
std::cout << out[9][1][2] << std::endl;
|
||||
for(auto i = 0; i != out.num_elements(); ++i) std:cout << (out.data_elements()[i]) <<' ';
|
||||
|
||||
#if 0
|
||||
multi::cuda::array<complex, 3> in_gpu = in;//[]{
|
||||
// multi::cuda::array<complex, 4> ret({10, 10, 10, 10});
|
||||
// ret[2][3][4][5] = 99.;
|
||||
// return ret;
|
||||
// }();
|
||||
multi::cuda::array<complex, 3> out_gpu(extensions(in));
|
||||
multi::cufft::dft({true, true, false}, in_gpu, out_gpu, multi::cufft::forward);
|
||||
|
||||
std::cout << out_gpu[5][4][3].operator complex() << std::endl;
|
||||
|
||||
// multi::cufft::dft({true, true, true, true}, in_gpu, out_gpu, multi::cufft::forward);
|
||||
// multi::cufft::dft({true, true, true, true}, in_gpu, out_gpu, multi::cufft::forward);
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,131 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*-
|
||||
$CXX $0 -o $0x -lcudart -lcufft `pkg-config --libs fftw3` -lboost_timer -lboost_unit_test_framework&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_FFT_HPP
|
||||
#define MULTI_ADAPTORS_FFT_HPP
|
||||
|
||||
#include "../adaptors/fftw.hpp"
|
||||
#include "../adaptors/cufft.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace fft{
|
||||
|
||||
static constexpr int forward = fftw::forward;//FFTW_FORWARD;
|
||||
static constexpr int none = 0;
|
||||
static constexpr int backward = fftw::backward;//FFTW_BACKWARD;
|
||||
|
||||
static_assert( forward != none and none != backward and backward != forward, "!");
|
||||
|
||||
template<std::size_t I> struct priority : std::conditional_t<I==0, std::true_type, struct priority<I-1>>{};
|
||||
|
||||
template<class... Args> auto dft_aux_(priority<0>, Args&&... args) DECLRETURN( fftw::dft(std::forward<Args>(args)...))
|
||||
template<class... Args> auto dft_aux_(priority<1>, Args&&... args) DECLRETURN(cufft ::dft(std::forward<Args>(args)...))
|
||||
template<class... Args> auto dft(Args&&... args) DECLRETURN(dft_aux_(priority<1>{}, std::forward<Args>(args)...))
|
||||
|
||||
template<class In, class... Args> auto dft(std::array<bool, std::decay_t<In>::dimensionality> which, In&& in, Args&&... args) DECLRETURN(dft_aux_(priority<1>{}, which, std::forward<In>(in), std::forward<Args>(args)...))
|
||||
|
||||
template<class... Args> auto many_dft_aux_(priority<0>, Args&&... args) DECLRETURN( fftw::many_dft(std::forward<Args>(args)...))
|
||||
template<class... Args> auto many_dft_aux_(priority<1>, Args&&... args) DECLRETURN(cufft ::many_dft(std::forward<Args>(args)...))
|
||||
template<class... Args> auto many_dft(Args&&... args) DECLRETURN(many_dft_aux_(priority<1>{}, std::forward<Args>(args)...))
|
||||
|
||||
template<class... Args> auto dft_forward_aux_(priority<0>, Args&&... args) DECLRETURN( fftw::dft_forward(std::forward<Args>(args)...))
|
||||
template<class... Args> auto dft_forward_aux_(priority<1>, Args&&... args) DECLRETURN(cufft ::dft_forward(std::forward<Args>(args)...))
|
||||
template<class... Args> auto dft_forward(Args&&... args) DECLRETURN(dft_forward_aux_(priority<1>{}, std::forward<Args>(args)...))
|
||||
template<class In, class... Args> auto dft_forward(std::array<bool, std::decay_t<In>::dimensionality> which, In&& in, Args&&... args) DECLRETURN(dft_forward_aux_(priority<1>{}, which, std::forward<In>(in), std::forward<Args>(args)...))
|
||||
|
||||
template<class... Args> auto dft_backward_aux_(priority<0>, Args&&... args) DECLRETURN( fftw::dft_backward(std::forward<Args>(args)...))
|
||||
template<class... Args> auto dft_backward_aux_(priority<1>, Args&&... args) DECLRETURN(cufft ::dft_backward(std::forward<Args>(args)...))
|
||||
template<class... Args> auto dft_backward(Args&&... args) DECLRETURN(dft_backward_aux_(priority<1>{}, std::forward<Args>(args)...))
|
||||
template<class In, class... Args> auto dft_backward(std::array<bool, std::decay_t<In>::dimensionality> which, In&& in, Args&&... args) DECLRETURN(dft_backward_aux_(priority<1>{}, which, std::forward<In>(in), std::forward<Args>(args)...))
|
||||
|
||||
}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFT adaptor"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include <boost/timer/timer.hpp>
|
||||
#include <boost/config.hpp>
|
||||
|
||||
namespace utf = boost::unit_test;
|
||||
|
||||
using complex = std::complex<double>;
|
||||
namespace multi = boost::multi;
|
||||
|
||||
using std::cout;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fft_combinations, *utf::tolerance(0.00001)){
|
||||
cout<< "# threads is " << multi::fftw::plan::with_nthreads() <<"\n";
|
||||
cout<<"=========================================================\n";
|
||||
cout<< BOOST_PLATFORM <<' '<< BOOST_COMPILER <<' '<< __DATE__<<'\n';
|
||||
|
||||
auto const in = []{
|
||||
multi::array<complex, 4> ret({32, 90, 98, 96});
|
||||
std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
[](){return complex{std::rand()/1./RAND_MAX, std::rand()/1./RAND_MAX};}
|
||||
);
|
||||
return ret;
|
||||
}();
|
||||
std::cout<<"memory size "<< in.num_elements()*sizeof(complex)/1e6 <<" MB\n";
|
||||
|
||||
multi::cuda::array<complex, 4> const in_gpu = in;
|
||||
multi::cuda::managed::array<complex, 4> const in_mng = in;
|
||||
|
||||
std::vector<std::array<bool, 4>> cases = {
|
||||
{false, true , true , true },
|
||||
{false, true , true , false},
|
||||
{true , false, false, false},
|
||||
{true , true , false, false},
|
||||
{false, false, true , false},
|
||||
{false, false, false, false},
|
||||
};
|
||||
|
||||
for(auto c : cases){
|
||||
cout<<"case: "<<std::boolalpha;
|
||||
copy(begin(c), end(c), std::ostream_iterator<bool>{cout,", "}); cout<<"\n";
|
||||
|
||||
multi::array<complex, 4> out(extensions(in));
|
||||
{
|
||||
cout<<"flops "<< multi::fftw::plan(c, in, out, multi::fft::forward).flops() <<"\n";
|
||||
boost::timer::auto_cpu_timer t{"cpu____ %ws wall, CPU (%p%)\n"};
|
||||
multi::fft::dft(c, in, out, multi::fft::forward);
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cpu_hot %ws wall, CPU (%p%)\n"};
|
||||
multi::fft::dft(c, in, out, multi::fft::forward);
|
||||
}
|
||||
multi::cuda::array<complex, 4> out_gpu(extensions(in_gpu));
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"gpu_cld %ws wall, CPU (%p%)\n"};
|
||||
multi::fft::dft(c, in_gpu , out_gpu , multi::fft::forward);
|
||||
BOOST_TEST( abs( static_cast<complex>(out_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. );
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"gpu_hot %ws wall, CPU (%p%)\n"};
|
||||
multi::fft::dft(c, in_gpu , out_gpu , multi::fft::forward);
|
||||
// BOOST_TEST( abs( static_cast<complex>(out_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. );
|
||||
}
|
||||
multi::cuda::managed::array<complex, 4> out_mng(extensions(in_mng));
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"mng_cld %ws wall, CPU (%p%)\n"};
|
||||
multi::fft::dft(c, in_mng , out_mng , multi::fft::forward);
|
||||
cudaDeviceSynchronize();
|
||||
BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}
|
||||
{
|
||||
/// boost::timer::auto_cpu_timer t{"mng_hot %ws wall, CPU (%p%)\n"};
|
||||
multi::fft::dft(c, in_mng() , out_mng() , multi::fft::forward);
|
||||
cudaDeviceSynchronize();
|
||||
BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,934 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0x$OXX `pkg-config --cflags --libs fftw3 cuda-11.0` -lboost_timer -lboost_unit_test_framework&&$0x$OXX&&rm $0x$OXX;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2018-2020
|
||||
|
||||
#ifndef MULTI_ADAPTORS_FFTW_HPP
|
||||
#define MULTI_ADAPTORS_FFTW_HPP
|
||||
|
||||
#include "../adaptors/../array.hpp"
|
||||
#include "../adaptors/../config/NODISCARD.hpp"
|
||||
|
||||
#include<algorithm> // sort
|
||||
#include<complex>
|
||||
#include<numeric> // accumulate
|
||||
|
||||
#if HAVE_FFTW3_THREADS
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
#include<fftw3.h> // external fftw3 library
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
|
||||
namespace fftw{
|
||||
// template<class T> auto alignment_of(T* p){return ::fftw_alignment_of((double*)p);}
|
||||
#if __cpp_lib_as_const >= 201510
|
||||
using std::as_const;
|
||||
#else
|
||||
template<class T> constexpr std::add_const_t<T>& as_const(T& t) noexcept{return t;}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 0
|
||||
template<typename Size>
|
||||
auto fftw_plan_dft_1d(
|
||||
Size N,
|
||||
std::complex<double> const* in, std::complex<double>* out, int sign,
|
||||
unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
#ifndef NDEBUG
|
||||
auto check = in[N/3]; // check that const data will not been overwritten
|
||||
#endif
|
||||
assert( fftw::alignment_of(in) == fftw::alignment_of(out) );
|
||||
auto ret=::fftw_plan_dft_1d(N, (fftw_complex*)in, (fftw_complex*)out, sign, flags | FFTW_PRESERVE_INPUT );
|
||||
assert(check == in[N/3]); // check that const data has not been overwritten
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename Size>
|
||||
auto fftw_plan_dft_1d(
|
||||
Size N,
|
||||
std::complex<double>* in, std::complex<double>* out, int sign,
|
||||
unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
assert( fftw::alignment_of(in) == fftw::alignment_of(out) );
|
||||
return ::fftw_plan_dft_1d(N, (fftw_complex*)in, (fftw_complex*)out, sign, flags);
|
||||
}
|
||||
|
||||
template<typename Size>
|
||||
auto fftw_plan_dft_2d(
|
||||
Size N1, Size N2,
|
||||
std::complex<double> const* in, std::complex<double>* out, int sign,
|
||||
unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
assert( fftw::alignment_of(in) == fftw::alignment_of(out) );
|
||||
#ifndef NDEBUG
|
||||
auto check = in[N1*N2/3]; // check that const data will not been overwritten
|
||||
#endif
|
||||
auto ret = ::fftw_plan_dft_2d(N1, N2, (fftw_complex*)in, (fftw_complex*)out, sign, flags | FFTW_PRESERVE_INPUT);
|
||||
assert( check == in[N1*N2/3] ); // check that const data has not been overwritten
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename Size>
|
||||
auto fftw_plan_dft_2d(
|
||||
Size N1, Size N2,
|
||||
std::complex<double>* in, std::complex<double>* out, int sign,
|
||||
unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out));
|
||||
return ::fftw_plan_dft_2d(N1, N2, (fftw_complex*)in, (fftw_complex*)out, sign, flags);
|
||||
}
|
||||
|
||||
template<typename Size>
|
||||
auto fftw_plan_dft_3d(
|
||||
Size N1, Size N2, Size N3,
|
||||
std::complex<double>* in, std::complex<double>* out, int sign,
|
||||
unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out));
|
||||
return ::fftw_plan_dft_3d(N1, N2, N3, (fftw_complex*)in, (fftw_complex*)out, sign, flags);
|
||||
}
|
||||
template<typename Size>
|
||||
auto fftw_plan_dft_3d(
|
||||
Size N1, Size N2, Size N3,
|
||||
std::complex<double> const* in, std::complex<double>* out, int sign,
|
||||
unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
assert( flags & FFTW_PRESERVE_INPUT );
|
||||
assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out));
|
||||
return ::fftw_plan_dft_3d(N1, N2, N3, (fftw_complex*)in, (fftw_complex*)out, sign, flags | FFTW_PRESERVE_INPUT);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
template<typename Rank>
|
||||
auto fftw_plan_dft(
|
||||
Rank r, int* ns,
|
||||
std::complex<double>* in, std::complex<double>* out,
|
||||
int sign, unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out));
|
||||
return ::fftw_plan_dft(r, ns, (fftw_complex*)in, (fftw_complex*)out, sign, flags);
|
||||
}
|
||||
template<typename RankType>
|
||||
auto fftw_plan_dft(
|
||||
RankType r, int* ns,
|
||||
std::complex<double> const* in, std::complex<double>* out,
|
||||
int sign, unsigned flags = FFTW_ESTIMATE | FFTW_PRESERVE_INPUT
|
||||
){
|
||||
assert( flags & FFTW_PRESERVE_INPUT );
|
||||
assert(fftw::alignment_of(in) == fftw::alignment_of(out));
|
||||
#ifndef NDEBUG
|
||||
size_t ne = 1; for(RankType i = 0; i != r; ++i) ne*=ns[i];
|
||||
auto check = in[ne/3]; // check that const data will not been overwritten
|
||||
#endif
|
||||
auto ret=::fftw_plan_dft(r, ns, (fftw_complex*)in, (fftw_complex*)out, sign, flags);
|
||||
assert(check == in[ne/3]); // check that const data has not been overwritten
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
template<typename In, typename Out>
|
||||
auto fftw_plan_dft_1d(
|
||||
In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
static_assert(in.dimensionality == 1, "!"); assert(size(in) == size(out));
|
||||
assert( in.is_compact() ); assert( out.is_compact() );
|
||||
return multi::fftw_plan_dft_1d(size(in), data_elements(in), data_elements(out), sign, flags);
|
||||
}
|
||||
|
||||
template<class In, class Out>
|
||||
auto fftw_plan_dft_2d(
|
||||
In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
static_assert(in.dimensionality == 2, "!"); assert(in.sizes() == out.sizes());
|
||||
assert( in.is_compact() ); assert( out.is_compact() );
|
||||
return multi::fftw_plan_dft_2d(
|
||||
sizes(in)[0], sizes(in)[1],
|
||||
data_elements(in), data_elements(out), sign, flags
|
||||
);
|
||||
}
|
||||
|
||||
template<class In, class Out>
|
||||
auto fftw_plan_dft_3d(
|
||||
In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE
|
||||
){
|
||||
static_assert(in.dimensionality == 3, "!"); assert(in.sizes() == out.sizes());
|
||||
assert( in.is_compact() ); assert( out.is_compact() );
|
||||
return multi::fftw_plan_dft_3d(
|
||||
sizes(in)[0], sizes(in)[1], sizes(in)[2],
|
||||
data(in), data(out),
|
||||
sign, flags
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<class T, class Tpl> constexpr auto to_array(Tpl const& t){
|
||||
return detail::to_array_impl<T>(t, std::make_index_sequence<std::tuple_size<Tpl>{}>{});
|
||||
}
|
||||
|
||||
#if(__cpp_if_constexpr>=201606)
|
||||
//https://stackoverflow.com/a/35110453/225186
|
||||
template<class T> constexpr std::remove_reference_t<T> _constx(T&&t){return t;}
|
||||
#define logic_assert(C, M) \
|
||||
if constexpr(noexcept(_constx(C))) static_assert((C), M); else assert((C)&& M);
|
||||
#else
|
||||
#define logic_assert(ConditioN, MessagE) assert(ConditioN && MessagE);
|
||||
#endif
|
||||
|
||||
template<typename It1, class It2, std::enable_if_t<std::is_pointer<decltype(base(It2{}))>{} or std::is_convertible<decltype(base(It2{})), std::complex<double>*>{}, int> = 0
|
||||
>
|
||||
auto fftw_plan_many_dft(It1 first, It1 last, It2 d_first, int sign, unsigned flags = FFTW_ESTIMATE)
|
||||
->decltype(reinterpret_cast<fftw_complex*>(/*static_cast<std::complex<double>*>*/(base(d_first))), fftw_plan{}){
|
||||
|
||||
static_assert( sizeof(*base( first)) == sizeof(real(*base( first))) + sizeof(imag(*base( first))) and sizeof(*base( first)) == sizeof(fftw_complex),
|
||||
"input must have complex pod layout" );
|
||||
static_assert( sizeof(*base(d_first)) == sizeof(real(*base(d_first))) + sizeof(imag(*base(d_first))) and sizeof(*base(d_first)) == sizeof(fftw_complex),
|
||||
"output must have complex pod layout");
|
||||
|
||||
assert(sizes(*first)==sizes(*d_first));
|
||||
auto ion = to_array<int>(sizes(*first));
|
||||
|
||||
assert(strides(*first) == strides(*last));
|
||||
auto istrides = to_array<int>(strides(*first));
|
||||
auto ostrides = to_array<int>(strides(*d_first));
|
||||
|
||||
std::array<std::array<int, 3>, std::decay_t<decltype(*It1{})>::rank::value> ssn;
|
||||
for(std::size_t i = 0; i != ssn.size(); ++i) ssn[i] = {istrides[i], ostrides[i], ion[i]};
|
||||
std::sort(ssn.begin(), ssn.end(), std::greater<>{});
|
||||
|
||||
for(std::size_t i = 0; i != ssn.size(); ++i){
|
||||
istrides[i] = std::get<0>(ssn[i]);
|
||||
ostrides[i] = std::get<1>(ssn[i]);
|
||||
ion[i] = std::get<2>(ssn[i]);
|
||||
}
|
||||
|
||||
int istride = istrides.back();
|
||||
auto inembed = istrides; inembed.fill(0);
|
||||
int ostride = ostrides.back();
|
||||
auto onembed = ostrides; onembed.fill(0);
|
||||
for(std::size_t i = 1; i != onembed.size(); ++i){
|
||||
assert(ostrides[i-1] >= ostrides[i]); // otherwise ordering is incompatible
|
||||
assert(ostrides[i-1]%ostrides[i]==0);
|
||||
onembed[i]=ostrides[i-1]/ostrides[i]; // assert( onembed[i] <= ion[i] );
|
||||
assert(istrides[i-1]%istrides[i]==0);
|
||||
inembed[i]=istrides[i-1]/istrides[i]; // assert( inembed[i] <= ion[i] );
|
||||
}
|
||||
|
||||
auto ret = ::fftw_plan_many_dft(
|
||||
/*int rank*/ ion.size(),
|
||||
/*const int* n*/ ion.data(),
|
||||
/*int howmany*/ last - first,
|
||||
/*fftw_complex * in */ reinterpret_cast<fftw_complex*>(const_cast<std::complex<double>*>(static_cast<std::complex<double> const*>(base(first)))),
|
||||
/*const int *inembed*/ inembed.data(),
|
||||
/*int*/ istride,
|
||||
/*int idist*/ stride(first),
|
||||
/*fftw_complex * out */ reinterpret_cast<fftw_complex*>(static_cast<std::complex<double>*>(base(d_first))),
|
||||
/*const int *onembed*/ onembed.data(),
|
||||
/*int*/ ostride,
|
||||
/*int odist*/ stride(d_first),
|
||||
/*int*/ sign, /*unsigned*/ flags
|
||||
);
|
||||
assert(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<
|
||||
class In, class Out, dimensionality_type D = std::decay_t<In>::dimensionality,
|
||||
class=std::enable_if_t<D==std::decay_t<Out>::dimensionality>,
|
||||
class=decltype(reinterpret_cast<fftw_complex*>(/*static_cast<std::complex<double> *>*/(base(std::declval<Out&>()))))
|
||||
>
|
||||
fftw_plan fftw_plan_dft(std::array<bool, +D> which, In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE){
|
||||
static_assert( sizeof(*base(in )) == sizeof((*base(in )).real()) + sizeof((*base(in)).imag()) and sizeof(*base(in)) == sizeof(fftw_complex),
|
||||
"input must have complex pod layout" );
|
||||
static_assert( sizeof(*base(out)) == sizeof((*base(out)).real()) + sizeof((*base(in)).imag()) and sizeof(*base(out)) == sizeof(fftw_complex),
|
||||
"output must have complex pod layout" );
|
||||
|
||||
using multi::sizes;
|
||||
assert(sizes(in) == sizes(out));
|
||||
|
||||
using multi::strides;
|
||||
auto ion = to_array<ptrdiff_t>(in.sizes());
|
||||
auto istrides = to_array<ptrdiff_t>(in.strides());
|
||||
auto ostrides = to_array<ptrdiff_t>(out.strides());
|
||||
|
||||
std::array<fftw_iodim64, D> dims ;
|
||||
auto l_dims = dims.begin();
|
||||
|
||||
std::array<fftw_iodim64, D> howmany;
|
||||
auto l_howmany = howmany.begin();
|
||||
|
||||
for(int i=0; i!=D; ++i) *(which[i]?l_dims:l_howmany)++ = {ion[i], istrides[i], ostrides[i]};
|
||||
|
||||
assert( D == l_dims - dims.begin() + l_howmany - howmany.begin() );
|
||||
assert(in.base()); assert(out.base()); assert( in.extensions() == out.extensions() );
|
||||
assert( (sign == -1) or (sign == +1) );
|
||||
fftw_plan ret = fftw_plan_guru64_dft(
|
||||
/*int rank*/ l_dims - dims.begin(),
|
||||
/*const fftw_iodim64 *dims*/ dims.data(),
|
||||
/*int howmany_rank*/ l_howmany - howmany.begin(),
|
||||
/*const fftw_iodim *howmany_dims*/ howmany.data(), //nullptr, //howmany_dims.data(), //;//nullptr,
|
||||
/*fftw_complex *in*/ const_cast<fftw_complex*>(reinterpret_cast<fftw_complex const*>(/*static_cast<std::complex<double> const *>*/(in.base()))),
|
||||
/*fftw_complex *out*/ reinterpret_cast<fftw_complex*>(/*static_cast<std::complex<double> *>*/(out.base())),
|
||||
sign, flags// | FFTW_ESTIMATE
|
||||
);
|
||||
assert(ret &&"fftw lib returned a null plan, if you are using MKL check the limitations of their fftw interface");
|
||||
//https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-c/top/appendix-d-fftw-interface-to-intel-math-kernel-library/fftw3-interface-to-intel-math-kernel-library/using-fftw3-wrappers.html
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class To, class From, std::enable_if_t<std::is_convertible<From, To>{},int> =0>
|
||||
To implicit_cast(From&& f){return static_cast<To>(f);}
|
||||
|
||||
template<class In, class Out, dimensionality_type D = In::dimensionality, typename = decltype(reinterpret_cast<fftw_complex*>(implicit_cast<std::complex<double>*>(base(std::declval<Out&>()))))>
|
||||
auto fftw_plan_dft(In const& in, Out&& out, int s, unsigned flags = FFTW_ESTIMATE){
|
||||
static_assert( D == std::decay_t<Out>::dimensionality , "!");
|
||||
using multi::sizes; using multi::strides; assert(sizes(in) == sizes(out));
|
||||
auto
|
||||
ion = to_array<ptrdiff_t>(sizes(in)),
|
||||
istrides = to_array<ptrdiff_t>(strides(in)),
|
||||
ostrides = to_array<ptrdiff_t>(strides(out))
|
||||
;
|
||||
std::array<fftw_iodim64, D> dims;
|
||||
for(int i=0; i!=D; ++i) dims[i] = {ion[i], istrides[i], ostrides[i]};
|
||||
auto ret = fftw_plan_guru64_dft(
|
||||
/*int rank*/ s?D:0,
|
||||
/*const fftw_iodim64 *dims*/ dims.data(),
|
||||
/*int howmany_rank*/ 0,
|
||||
/*const fftw_iodim *howmany_dims*/ nullptr, //howmany_dims.data(), //;//nullptr,
|
||||
/*fftw_complex *in*/ const_cast<fftw_complex*>(reinterpret_cast<fftw_complex const*>(static_cast<std::complex<double> const*>(base(in)))),
|
||||
/*fftw_complex *out*/ reinterpret_cast<fftw_complex*>(implicit_cast<std::complex<double>*>(base(out))),
|
||||
s, flags
|
||||
);
|
||||
assert(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
namespace fftw{
|
||||
|
||||
#if HAVE_FFTW3_THREADS
|
||||
void initialize_threads(){int good = fftw_init_threads(); assert(good); (void)good;}
|
||||
#else
|
||||
void initialize_threads(){}
|
||||
#endif
|
||||
|
||||
void cleanup(){fftw_cleanup();}
|
||||
|
||||
struct environment{
|
||||
~environment(){cleanup();}
|
||||
};
|
||||
|
||||
class plan{
|
||||
plan() : impl_{nullptr, &fftw_destroy_plan}{}
|
||||
std::unique_ptr<std::remove_pointer_t<fftw_plan>, decltype(&fftw_destroy_plan)> impl_;
|
||||
public:
|
||||
plan(plan const&) = delete;//default;
|
||||
plan(plan&&) = default;
|
||||
template<typename... As,
|
||||
typename = decltype(fftw_plan_dft(std::declval<As&&>()...))
|
||||
> plan(As&&... as) : impl_{fftw_plan_dft(std::forward<As>(as)...), &fftw_destroy_plan}{
|
||||
assert(impl_);
|
||||
}
|
||||
template<typename... As>
|
||||
static auto many(As&&... as)
|
||||
->std::decay_t<decltype(fftw_plan_many_dft(std::forward<As>(as)...) , std::declval<plan>())>
|
||||
{
|
||||
plan r; r.impl_.reset(fftw_plan_many_dft(std::forward<As>(as)...)); return r; // this produces a compilation error in icc++17
|
||||
}
|
||||
|
||||
private:
|
||||
void execute() const{fftw_execute(impl_.get());}
|
||||
template<class I, class O>
|
||||
void execute_dft(I&& i, O&& o) const{
|
||||
::fftw_execute_dft(impl_.get(), const_cast<fftw_complex*>(reinterpret_cast<fftw_complex const*>(static_cast<std::complex<double> const*>(base(i)))), reinterpret_cast<fftw_complex*>(static_cast<std::complex<double>*>(base(o))));
|
||||
}
|
||||
template<class I, class O> void execute(I&& i, O&& o) const{execute_dft(std::forward<I>(i), std::forward<O>(o));}
|
||||
friend void execute(plan const& p){p.execute();}
|
||||
public:
|
||||
plan& operator=(plan&&) = default;
|
||||
plan& operator=(plan const&) = delete;//default;
|
||||
void operator()() const{execute();} // http://www.fftw.org/fftw3_doc/Thread-safety.html#Thread-safety
|
||||
template<class I, class O> void operator()(I&& i, O&& o) const{return execute(std::forward<I>(i), std::forward<O>(o));}
|
||||
double cost() const{return fftw_cost(impl_.get());}
|
||||
auto flops() const{
|
||||
struct{double add; double mul; double fma; operator double() const{return add + mul + 2*fma;}} r;
|
||||
fftw_flops(impl_.get(), &r.add, &r.mul, &r.fma);
|
||||
return r;
|
||||
}
|
||||
//std::string string_print() const{
|
||||
// return std::unique_ptr<char>{fftw_sprint_plan(impl_.get())}.get();
|
||||
//}
|
||||
//friend std::ostream& operator<<(std::ostream& os, plan const& p){return os<<p.string_print()<<'\n';}
|
||||
#if HAVE_FFTW3_THREADS
|
||||
public:
|
||||
static void make_thread_safe(){
|
||||
fftw_make_planner_thread_safe(); // needs linking to -lfftw3_threads, requires FFTW-3.3.6 or greater
|
||||
is_thread_safe_ = true;
|
||||
}
|
||||
static int with_nthreads(int n){fftw_plan_with_nthreads(n); nthreads_ = n; return n;}
|
||||
static int with_nthreads(){
|
||||
int n=std::thread::hardware_concurrency(); return with_nthreads(n?n:2);
|
||||
}
|
||||
static bool is_thread_safe(){return is_thread_safe_;}
|
||||
static bool nthreads(){return nthreads_;}
|
||||
private:
|
||||
static bool is_thread_safe_;
|
||||
static int nthreads_;
|
||||
static bool initialized_threads_;
|
||||
#else
|
||||
static constexpr bool is_thread_safe(){return false;}
|
||||
static constexpr bool nthreads(){return 1;}
|
||||
static constexpr int with_nthreads(){return 1;}
|
||||
#endif
|
||||
};
|
||||
|
||||
#if HAVE_FFTW3_THREADS
|
||||
bool plan::is_thread_safe_ = (plan::make_thread_safe(), true);
|
||||
int plan::nthreads_ = (initialize_threads(), with_nthreads());
|
||||
#endif
|
||||
|
||||
using sign = int;
|
||||
constexpr sign forward = FFTW_FORWARD;
|
||||
constexpr sign none = 0;
|
||||
constexpr sign backward = FFTW_BACKWARD;
|
||||
|
||||
static_assert( forward != none and none != backward and backward != forward, "!");
|
||||
|
||||
enum strategy: decltype(FFTW_ESTIMATE){ estimate = FFTW_ESTIMATE, measure = FFTW_MEASURE };
|
||||
|
||||
template<class In, class Out>
|
||||
auto dft(In const& i, Out&& o, int s)
|
||||
->decltype(fftw::plan{i, o, s}(), std::forward<Out>(o)){
|
||||
return fftw::plan{i, o, s}(), std::forward<Out>(o);}
|
||||
|
||||
using std::decay_t;
|
||||
|
||||
template<class In, class Out, std::size_t D=In::dimensionality>
|
||||
auto dft(std::array<bool, +D> which, In const& i, Out&& o, sign s)
|
||||
->decltype(plan{which, i, o, s}(), std::forward<Out>(o)){
|
||||
return plan{which, i, o, s}(), std::forward<Out>(o);}
|
||||
|
||||
template<typename In, class Out, dimensionality_type D=In::dimensionality, dimensionality_type=std::decay_t<Out>::dimensionality>
|
||||
auto dft(std::array<sign, +D> w, In const& i, Out&& o){
|
||||
std::array<bool, D> fwd, /*non,*/ bwd;
|
||||
|
||||
std::transform(begin(w), end(w), begin(fwd), [](auto e){return e==FFTW_FORWARD;});
|
||||
dft(fwd, i, o, fftw::forward);
|
||||
|
||||
std::transform(begin(w), end(w), begin(bwd), [](auto e){return e==FFTW_BACKWARD;});
|
||||
if(std::accumulate(begin(bwd), end(bwd), false)) dft(bwd, o, o, FFTW_BACKWARD);
|
||||
|
||||
return std::forward<Out>(o);
|
||||
}
|
||||
|
||||
template<typename It1, typename It2>
|
||||
auto many_dft(It1 first, It1 last, It2 d_first, int sign)
|
||||
->decltype(plan::many(first, last, d_first, sign)(), d_first + (last - first)){
|
||||
return plan::many(first, last, d_first, sign)(), d_first + (last - first);}
|
||||
|
||||
template<typename In, class R=typename In::decay_type>
|
||||
NODISCARD("when first argument is const")
|
||||
auto dft(In const& i, sign s)
|
||||
->std::decay_t<decltype(dft(i, R(extensions(i), get_allocator(i)), s))>{
|
||||
return dft(i, R(extensions(i), get_allocator(i)), s);}
|
||||
|
||||
template<typename T, dimensionality_type D, class... Args>
|
||||
decltype(auto) rotate(multi::array<T, D, Args...>& i, int = 1){
|
||||
multi::array_ref<T, D, typename multi::array<T, D, Args...>::element_ptr> before(data_elements(i), extensions(i));
|
||||
i.reshape(extensions(rotated(before) ));
|
||||
fftw::dft(before, i, fftw::none);
|
||||
return i;
|
||||
}
|
||||
|
||||
template<typename In, dimensionality_type D = In::dimensionality, class R=typename In::decay_type>
|
||||
NODISCARD("when first argument is const")
|
||||
auto dft(std::array<bool, +D> which, In const& i, sign s)
|
||||
->std::decay_t<decltype(fftw::dft(which, i, R(extensions(i), get_allocator(i)), s))>{
|
||||
return fftw::dft(which, i, R(extensions(i), get_allocator(i)), s);}
|
||||
|
||||
template<typename In, multi::dimensionality_type D = std::decay_t<In>::dimensionality>
|
||||
auto dft(std::array<bool, +D> which, In&& i, sign s)
|
||||
->decltype(dft(which, i, i, s), std::forward<In>(i)){
|
||||
return dft(which, i, i, s), std::forward<In>(i);}
|
||||
|
||||
template<typename In, std::size_t D = In::dimensionality, class R=typename In::decay_type>
|
||||
void dft(std::array<bool, +D> which, In const& i) = delete;
|
||||
|
||||
template<dimensionality_type Rank /*not deduced*/, typename In, class R=typename In::decay_type>
|
||||
NODISCARD("when second argument is const")
|
||||
R dft(In const& i, sign s){
|
||||
static_assert( Rank <= In::dimensionality, "!" );
|
||||
return dft<Rank>(i, R(extensions(i), get_allocator(i)), s);
|
||||
}
|
||||
|
||||
template<typename... A> auto dft_forward(A&&... a)
|
||||
->decltype(fftw::dft(std::forward<A>(a)..., fftw::forward)){
|
||||
return fftw::dft(std::forward<A>(a)..., fftw::forward);}
|
||||
|
||||
template<typename BoolArray, typename A>
|
||||
NODISCARD("when input argument is read only")
|
||||
auto dft_forward(BoolArray which, A const& a)
|
||||
->decltype(fftw::dft(which, a, fftw::forward)){
|
||||
return fftw::dft(which, a, fftw::forward);}
|
||||
|
||||
template<class A, multi::dimensionality_type D = A::dimensionality>
|
||||
NODISCARD("when input argument is read only")
|
||||
auto dft_forward(std::array<bool, +D> which, A const& a)
|
||||
->decltype(fftw::dft(which, a, fftw::forward)){
|
||||
return fftw::dft(which, a, fftw::forward);}
|
||||
|
||||
template<class A, class O, multi::dimensionality_type D = A::dimensionality>
|
||||
auto dft_forward(std::array<bool, +D> which, A const& a, O&& o)
|
||||
->decltype(fftw::dft(which, a, std::forward<O>(o), fftw::forward)){
|
||||
return fftw::dft(which, a, std::forward<O>(o), fftw::forward);}
|
||||
|
||||
template<typename A>
|
||||
NODISCARD("when input argument is read only")
|
||||
auto dft_forward(A const& a)
|
||||
->decltype(fftw::dft(a, fftw::forward)){
|
||||
return fftw::dft(a, fftw::forward);}
|
||||
|
||||
template<typename... A> auto dft_backward(A&&... a)
|
||||
->decltype(dft(std::forward<A>(a)..., fftw::backward)){
|
||||
return dft(std::forward<A>(a)..., fftw::backward);}
|
||||
|
||||
template<class In> In&& dft_inplace(In&& i, sign s){
|
||||
fftw::plan{i, i, (int)s}();//(i, i);
|
||||
return std::forward<In>(i);
|
||||
}
|
||||
|
||||
template<class In, class Out, dimensionality_type D = In::dimensionality>
|
||||
auto copy(In const& i, Out&& o)
|
||||
->decltype(dft(std::array<bool, D>{}, i, std::forward<Out>(o), fftw::forward)){
|
||||
return dft(std::array<bool, D>{}, i, std::forward<Out>(o), fftw::forward);}
|
||||
|
||||
template<typename In, class R=typename In::decay_type>
|
||||
NODISCARD("when argument is const")
|
||||
R copy(In const& i)
|
||||
{//->decltype(copy(i, R(extensions(i), get_allocator(i))), R()){
|
||||
return copy(i, R(extensions(i), get_allocator(i)));}
|
||||
|
||||
template<typename In, class R=typename std::decay_t<In>::decay_type>
|
||||
auto move(In&& in){
|
||||
if(in.is_compact()){
|
||||
multi::array_ref<typename In::element, In::dimensionality, typename In::element_ptr> ref(
|
||||
in.base(), extensions(in)
|
||||
);
|
||||
copy(in, ref);
|
||||
return R(
|
||||
multi::array_ref<typename In::element, In::dimensionality_type, std::move_iterator<typename In::element_ptr>>(std::make_move_iterator(in.mbase()), ((in.mbase()=0), extensions(ref)))
|
||||
);
|
||||
}else return copy(std::forward<In>(in));
|
||||
}
|
||||
|
||||
template<typename T, dimensionality_type D, class P, class R=typename multi::array<T, D>>
|
||||
R copy(multi::basic_array<T, D, multi::move_ptr<T, P>>&& a){
|
||||
if(a.is_compact()){
|
||||
return
|
||||
fftw::copy(
|
||||
a.template static_array_cast<T, T*>(),
|
||||
multi::array_ref<T, D, T*>(a.base().base(), a.extensions())
|
||||
).template static_array_cast<T, multi::move_ptr<T>>()
|
||||
;
|
||||
}else return fftw::copy(a.template static_array_cast<T, P>());
|
||||
}
|
||||
|
||||
template<class Array>
|
||||
auto transpose(Array& a)
|
||||
->decltype(fftw::copy(transposed(a), a.reshape(extensions(layout(a).transpose())))){
|
||||
multi::array_ref<typename Array::element, Array::dimensionality, typename Array::element_ptr> r(a.base(), extensions(a));
|
||||
return fftw::copy(r.transposed(), a.reshape(layout(a).transpose().extensions()));
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
// TODO investigate why this doesn't work as expected
|
||||
template<class Array>
|
||||
auto rotate(Array& a)
|
||||
->decltype(fftw::copy(rotated(a), a.reshape(extensions(layout(a).transpose())))){
|
||||
multi::array_ref<typename Array::element, Array::dimensionality, typename Array::element_ptr> r(a.base(), extensions(a));
|
||||
auto&& ro = r.rotated();
|
||||
return fftw::copy(ro, a.reshape(layout(a).rotate().extensions()));
|
||||
}
|
||||
#endif
|
||||
|
||||
}}}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW adaptor"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../array.hpp"
|
||||
#include "../adaptors/../complex.hpp"
|
||||
|
||||
#include<chrono>
|
||||
#include<random>
|
||||
|
||||
#include<thrust/complex.h>
|
||||
|
||||
namespace{
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace fftw = multi::fftw;
|
||||
|
||||
using complex = std::complex<double>; MAYBE_UNUSED complex const I{0, 1};
|
||||
|
||||
template<class M> auto power(M const& m)->decltype(std::norm(m)){return std::norm(m);}
|
||||
|
||||
template<class M, DELETE((M::dimensionality < 1))> double power(M const& m){return accumulate(begin(m), end(m), 0., [](auto const& a, auto const& b){return a + power(b);});}
|
||||
|
||||
struct sum_power{
|
||||
template<class A, class B> auto operator()(A const& a, B const& b) const{return a+power(b);}
|
||||
};
|
||||
|
||||
MAYBE_UNUSED constexpr int N = 16;
|
||||
}
|
||||
|
||||
struct watch : private std::chrono::high_resolution_clock{
|
||||
std::string label_; time_point start_;
|
||||
watch(std::string label ="") : label_{label}, start_{now()}{}
|
||||
~watch(){
|
||||
std::cerr<< label_<<": "<< std::chrono::duration<double>(now() - start_).count() <<" sec"<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<class T> struct randomizer{
|
||||
template<class M> void operator()(M&& m) const{for(auto&& e:m) operator()(e);}
|
||||
void operator()(T& e) const{
|
||||
static std::random_device r; static std::mt19937 g{r()}; static std::normal_distribution<T> d;
|
||||
e = d(g);
|
||||
}
|
||||
};
|
||||
|
||||
template<class T> struct randomizer<std::complex<T>>{
|
||||
template<class M> void operator()(M&& m) const{for(auto&& e:m) operator()(e);}
|
||||
void operator()(std::complex<T>& e) const{
|
||||
static std::random_device r; static std::mt19937 g{r()}; static std::normal_distribution<T> d;
|
||||
e = std::complex<T>(d(g), d(g));
|
||||
}
|
||||
};
|
||||
|
||||
struct fftw_fixture : fftw::environment{
|
||||
void setup(){}
|
||||
void teardown(){}//fftw_cleanup();}
|
||||
};
|
||||
|
||||
BOOST_TEST_GLOBAL_FIXTURE( fftw_fixture );
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_3D){
|
||||
using complex = std::complex<double>; //TODO make it work with thrust
|
||||
multi::array<complex, 3> in({10, 10, 10});
|
||||
in[2][3][4] = 99.;
|
||||
auto fwd = multi::fftw::dft(in, fftw::forward);
|
||||
BOOST_REQUIRE(in[2][3][4] == 99.);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_1D_const){
|
||||
multi::array<complex, 1> const in = {1. + 2.*I, 2. + 3. *I, 4. + 5.*I, 5. + 6.*I};
|
||||
|
||||
auto fwd = multi::fftw::dft(in, fftw::forward); // Fourier[in, FourierParameters -> {1, -1}]
|
||||
BOOST_REQUIRE( size(fwd) == size(in) );
|
||||
BOOST_REQUIRE( fwd[2] == -2. - 2.*I );
|
||||
BOOST_REQUIRE( in[1] == +2. + 3.*I );
|
||||
|
||||
auto bwd = multi::fftw::dft(in, fftw::forward); // InverseFourier[in, FourierParameters -> {-1, -1}]
|
||||
BOOST_REQUIRE( bwd[2] == -2. - 2.*I );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_identity_2, *boost::unit_test::tolerance(0.0001)){
|
||||
multi::array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
multi::array<complex, 2> out(extensions(in));
|
||||
multi::fftw::dft({false, false}, in, out, fftw::forward); // out = in;
|
||||
BOOST_REQUIRE( power(in) == power(out) );
|
||||
BOOST_REQUIRE( out == in );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_identity, *boost::unit_test::tolerance(0.0001)){
|
||||
multi::array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
auto fwd = multi::fftw::dft({}, in, fftw::forward);
|
||||
BOOST_REQUIRE( fwd == in );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D, *boost::unit_test::tolerance(0.0001)){
|
||||
multi::array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
|
||||
namespace fftw = multi::fftw;
|
||||
auto fwd = fftw::dft_forward(in);
|
||||
BOOST_TEST_REQUIRE( fwd[3][1].real() == -19.0455 ); // Fourier[in, FourierParameters -> {1, -1}][[4]][[2]]
|
||||
BOOST_TEST_REQUIRE( fwd[3][1].imag() == - 2.22717 );
|
||||
|
||||
multi::array<complex, 1> const in0 = {1. + 2.*I, 9. - 1.*I, 2. + 4.*I};
|
||||
|
||||
auto b = multi::fftw::dft_forward(in0);
|
||||
auto a = multi::fftw::dft_forward(in[0]);
|
||||
BOOST_REQUIRE( fftw::dft_forward(in[0]) == fftw::dft_forward(in0) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_rotated, *boost::unit_test::tolerance(0.0001)){
|
||||
using multi::array;
|
||||
array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
using multi::fftw::dft_forward;
|
||||
auto fwd = dft_forward(in);
|
||||
BOOST_REQUIRE(
|
||||
dft_forward(rotated(in)[0])
|
||||
== dft_forward(array<complex, 1>{1.+2.*I, 3.+3.*I, 4. + 1.*I, 3. - 1.*I, 31. - 1.*I})
|
||||
);
|
||||
BOOST_REQUIRE( dft_forward(rotated(in)) == rotated(fwd) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_many, *boost::unit_test::tolerance(0.0001)){
|
||||
multi::array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
multi::array<complex, 2> out(extensions(in));
|
||||
|
||||
using multi::fftw::dft_forward;
|
||||
|
||||
multi::fftw::dft({fftw::none, fftw::forward}, in, out);
|
||||
BOOST_REQUIRE( dft_forward(in[0]) == out[0] );
|
||||
|
||||
multi::fftw::dft({false, true}, rotated(in), rotated(out), fftw::forward);
|
||||
BOOST_REQUIRE( dft_forward(rotated(in)[0]) == rotated(out)[0] );
|
||||
|
||||
multi::fftw::dft_forward({false, false}, rotated(in), rotated(out));
|
||||
BOOST_REQUIRE( in == out );
|
||||
|
||||
multi::fftw::many_dft(begin(in), end(in), begin(out), fftw::forward);
|
||||
BOOST_REQUIRE( dft_forward(in[0]) == out[0] );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_many1_from_2){
|
||||
multi::array<complex, 2> in({3, 10}); randomizer<complex>{}(in);
|
||||
multi::array<complex, 2> out({3, 10});
|
||||
fftw::dft({false, true}, in, out, fftw::forward);
|
||||
|
||||
multi::array<complex, 2> out2({3, 10});
|
||||
for(int i = 0; i!=size(in); ++i)
|
||||
fftw::dft(in[i], out2[i], fftw::forward);
|
||||
|
||||
BOOST_REQUIRE(out2 == out);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_many2_from_3){
|
||||
multi::array<complex, 3> in({3, 5, 6}); randomizer<complex>{}(in);
|
||||
multi::array<complex, 3> out({3, 5, 6});
|
||||
fftw::dft({false, true, true}, in, out, FFTW_FORWARD);
|
||||
|
||||
multi::array<complex, 3> out2({3, 5, 6});
|
||||
for(int i = 0; i!=size(in); ++i)
|
||||
fftw::dft(in[i], out2[i], FFTW_FORWARD);
|
||||
|
||||
BOOST_REQUIRE(out2 == out);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_many2_from_2){
|
||||
multi::array<complex, 2> in({5, 6}); randomizer<complex>{}(in);
|
||||
multi::array<complex, 2> out({5, 6});
|
||||
fftw::dft({true, true}, in, out, FFTW_FORWARD);
|
||||
|
||||
multi::array<complex, 2> out2({5, 6});
|
||||
fftw::dft(in, out2, FFTW_FORWARD);
|
||||
BOOST_REQUIRE(out2 == out);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_4D){
|
||||
multi::array<complex, 4> const in = []{
|
||||
multi::array<complex, 4> in({10, 10, 10, 10}); in[2][3][4][5] = 99.; return in;
|
||||
}();
|
||||
auto fwd = multi::fftw::dft({true, true, true, true}, in, fftw::forward);
|
||||
BOOST_REQUIRE(in[2][3][4][5] == 99.);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_4D_many){
|
||||
|
||||
auto const in = []{
|
||||
multi::array<complex, 4> in({97, 95, 101, 10}, 0.);
|
||||
in[2][3][4][5] = 99.; return in;
|
||||
}();
|
||||
auto fwd = multi::fftw::dft({true, true, true, false}, in, fftw::forward);
|
||||
BOOST_REQUIRE( in[2][3][4][5] == 99. );
|
||||
|
||||
multi::array<complex, 4> out(extensions(in));
|
||||
multi::fftw::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), fftw::forward);
|
||||
BOOST_REQUIRE( out == fwd );
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cufft_many_2D){
|
||||
auto const in = []{
|
||||
multi::array<complex, 3> ret({10, 10, 10});
|
||||
std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
[](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};}
|
||||
);
|
||||
return ret;
|
||||
}();
|
||||
multi::array<complex, 3> out(extensions(in));
|
||||
multi::fftw::many_dft((in<<1).begin(), (in<<1).end(), (out<<1).begin(), multi::fftw::forward);
|
||||
|
||||
multi::array<complex, 3> out2(extensions(in));
|
||||
multi::fftw::dft({true, false, true}, in, out2, multi::fftw::forward);
|
||||
BOOST_REQUIRE( out == out2 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_5D){
|
||||
multi::array<complex, 5> in({4, 5, 6, 7, 8});
|
||||
in[2][3][4][5][6] = 99.;
|
||||
auto fwd = multi::fftw::dft(in, fftw::forward);
|
||||
BOOST_REQUIRE(in[2][3][4][5][6] == 99.);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_1D_power){
|
||||
multi::array<complex, 1> in(N, 0.); assert( size(in) == N );
|
||||
std::iota(begin(in), end(in), 1.);
|
||||
multi::array<complex, 1> out(extensions(in));
|
||||
static_assert(dimensionality(in)==dimensionality(out), "!");
|
||||
auto p = multi::fftw_plan_dft(in, out, fftw::forward, FFTW_PRESERVE_INPUT);
|
||||
fftw_execute(p);
|
||||
fftw_destroy_plan(p);
|
||||
BOOST_REQUIRE( (power(in) - power(out)/num_elements(out)) < 1e-17 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_power){
|
||||
multi::array<complex, 2> in({N, N});
|
||||
std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2);
|
||||
multi::array<complex, 2> out(extensions(in));
|
||||
auto p = multi::fftw_plan_dft(in, out, fftw::forward, FFTW_PRESERVE_INPUT);
|
||||
fftw_execute(p); fftw_destroy_plan(p);
|
||||
BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-12 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_power_plan){
|
||||
multi::array<complex, 2> in({16, 16});
|
||||
std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2);
|
||||
multi::array<complex, 2> out(extensions(in));
|
||||
multi::fftw::plan const p{in, out, fftw::forward, FFTW_PRESERVE_INPUT};
|
||||
p(); //execute(p); //p.execute();
|
||||
BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_power_dft){
|
||||
multi::array<complex, 2> in({16, 16}); std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2);
|
||||
multi::array<complex, 2> out(extensions(in));
|
||||
multi::fftw::dft(in, out, fftw::forward);
|
||||
BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_power_dft_out){
|
||||
multi::array<complex, 2> in({16, 16}); std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2);
|
||||
auto out = multi::fftw::dft(in, fftw::forward);
|
||||
BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_power_dft_out_default){
|
||||
multi::array<complex, 2> in({16, 16}); std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2);
|
||||
auto out = multi::fftw::dft(in, fftw::forward);
|
||||
BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_3D_power){
|
||||
multi::array<complex, 3> in({4, 4, 4}); std::iota(in.data_elements(), in.data_elements() + in.num_elements(), 1.2);
|
||||
multi::array<complex, 3> out = fftw::dft(in, fftw::forward);
|
||||
BOOST_REQUIRE( std::abs(power(in) - power(out)/num_elements(out)) < 1e-10 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_3D_power_in_place){
|
||||
multi::array<complex, 3> io({4, 4, 4}); std::iota(io.data_elements(), io.data_elements() + io.num_elements(), 1.2);
|
||||
auto powerin = power(io);
|
||||
fftw::dft_inplace(io, fftw::forward);
|
||||
BOOST_REQUIRE( powerin - power(io)/num_elements(io) < 1e-10 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_3D_power_in_place_over_ref_inplace){
|
||||
multi::array<complex, 3> io({4, 4, 4}); std::iota(io.data_elements(), io.data_elements() + io.num_elements(), 1.2);
|
||||
auto powerin = power(io);
|
||||
// fftw::dft_inplace(multi::array_ref<complex, 3>(io.data(), io.extensions()), fftw::forward);
|
||||
fftw::dft_inplace(multi::array_ref<complex, 3>(data_elements(io), extensions(io)), fftw::forward);
|
||||
BOOST_REQUIRE( powerin - power(io)/num_elements(io) < 1e-10 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_3D_power_out_of_place_over_ref){
|
||||
multi::array<complex, 3> in({4, 4, 4}); std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2);
|
||||
multi::array<complex, 3> out({4, 4, 4});
|
||||
multi::array_ref<complex, 3>(data_elements(out), extensions(out)) = fftw::dft(multi::array_cref<complex, 3>(data_elements(in), extensions(in)), fftw::forward);
|
||||
BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-10 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_3D_power_out_of_place_over_temporary){
|
||||
double powerin;
|
||||
auto f = [&](){
|
||||
multi::array<complex, 3> in({4, 4, 4});
|
||||
std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2);
|
||||
powerin = power(in);
|
||||
return in;
|
||||
};
|
||||
auto out = fftw::dft(f(), fftw::forward);
|
||||
BOOST_REQUIRE( std::abs(powerin - power(out)/num_elements(out)) < 1e-10 );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_transposition_square_inplace){
|
||||
multi::array<complex, 2> in = {
|
||||
{11., 12.},
|
||||
{21., 22.}
|
||||
};
|
||||
BOOST_REQUIRE( in[1][0] == 21. );
|
||||
|
||||
multi::fftw::copy(in, rotated(in));
|
||||
BOOST_TEST( in[0][1].real() == 21. );
|
||||
BOOST_TEST( in[0][1].imag() == 0. );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_4D_inq_poisson){
|
||||
|
||||
multi::array<complex, 4> const in = []{
|
||||
multi::array<complex, 4> in({50, 100, 137, 1});
|
||||
std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2);
|
||||
return in;
|
||||
}();
|
||||
|
||||
multi::array<complex, 4> out(extensions(in));
|
||||
multi::fftw::dft({0, 1, 1, 0}, in, out);
|
||||
|
||||
BOOST_TEST( power(in) == power(out)/std::get<1>(sizes(out))/std::get<2>(sizes(out)) , boost::test_tools::tolerance(1e-10) );
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.11)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
project(boost-multi-adaptors-fftw VERSION 0.1 LANGUAGES CXX)
|
||||
|
||||
find_package(FFTW REQUIRED COMPONENTS DOUBLE_LIB)
|
||||
include_directories(${FFTW_INCLUDE_DIRS})
|
||||
link_libraries(${FFTW_LIBRARIES})
|
||||
|
||||
link_libraries(${BLAS_LIBRARIES})
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
add_subdirectory(test)
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
|
||||
# ==================================================================================================
|
||||
# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
|
||||
# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
|
||||
#
|
||||
# Author(s):
|
||||
# Cedric Nugteren <cedric.nugteren@surfsara.nl>
|
||||
#
|
||||
# ==================================================================================================
|
||||
#
|
||||
# Defines the following variables:
|
||||
# FFTW_FOUND Boolean holding whether or not the FFTW3 library was found
|
||||
# FFTW_INCLUDE_DIRS The FFTW3 include directory
|
||||
# FFTW_LIBRARIES The FFTW3 library
|
||||
#
|
||||
# In case FFTW3 is not installed in the default directory, set the FFTW_ROOT variable to point to
|
||||
# the root of FFTW3, such that 'fftw3.h' can be found in $FFTW_ROOT/include. This can either be done
|
||||
# using an environmental variable (e.g. export FFTW_ROOT=/path/to/fftw3) or using a CMake variable
|
||||
# (e.g. cmake -DFFTW_ROOT=/path/to/fftw3 ..).
|
||||
#
|
||||
# ==================================================================================================
|
||||
|
||||
# Sets the possible install locations
|
||||
set(FFTW_HINTS
|
||||
${FFTW_ROOT}
|
||||
$ENV{FFTW_ROOT}
|
||||
)
|
||||
set(FFTW_PATHS
|
||||
/usr
|
||||
/usr/local
|
||||
)
|
||||
|
||||
# Finds the include directories
|
||||
find_path(FFTW_INCLUDE_DIRS
|
||||
NAMES fftw3.h
|
||||
HINTS ${FFTW_HINTS}
|
||||
PATH_SUFFIXES include api inc include/x86_64 include/x64
|
||||
PATHS ${FFTW_PATHS}
|
||||
DOC "FFTW3 include header fftw3.h"
|
||||
)
|
||||
mark_as_advanced(FFTW_INCLUDE_DIRS)
|
||||
|
||||
# Finds the library
|
||||
find_library(FFTW_LIBRARIES
|
||||
NAMES fftw3
|
||||
HINTS ${FFTW_HINTS}
|
||||
PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32
|
||||
PATHS ${FFTW_PATHS}
|
||||
DOC "FFTW3 library"
|
||||
)
|
||||
mark_as_advanced(FFTW_LIBRARIES)
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
# Notification messages
|
||||
if(NOT FFTW_INCLUDE_DIRS)
|
||||
message(STATUS "Could NOT find 'fftw3.h', install FFTW3 or set FFTW_ROOT")
|
||||
endif()
|
||||
if(NOT FFTW_LIBRARIES)
|
||||
message(STATUS "Could NOT find the FFTW3 library, install it or set FFTW_ROOT")
|
||||
endif()
|
||||
|
||||
# Determines whether or not FFTW3 was found
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(FFTW DEFAULT_MSG FFTW_INCLUDE_DIRS FFTW_LIBRARIES)
|
||||
|
||||
# ==================================================================================================
|
||||
|
|
@ -1,174 +0,0 @@
|
|||
#if COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
$CXXX $CXXFLAGS $0 -o $0x -lfftw3 -lfftw3_mpi&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
// apt-get install libfftw3-mpi-dev
|
||||
// compile with: mpicc simple_mpi_example.c -Wl,-rpath=/usr/local/lib -lfftw3_mpi -lfftw3 -o simple_mpi_example */
|
||||
|
||||
#ifndef MULTI_ADAPTOR_FFTW_MEMORY_HPP
|
||||
#define MULTI_ADAPTOR_FFTW_MEMORY_HPP
|
||||
|
||||
#include <fftw3.h>
|
||||
|
||||
#include "../../config/NODISCARD.hpp"
|
||||
|
||||
#include<cassert>
|
||||
#include<cstddef>
|
||||
#include<complex>
|
||||
#include<limits>
|
||||
#include<memory>
|
||||
#include<type_traits>
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace fftw{
|
||||
|
||||
template<class T>
|
||||
class allocator{
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
#if 1
|
||||
using pointer = value_type*;
|
||||
using const_pointer = typename std::pointer_traits<pointer>::template
|
||||
rebind<value_type const>;
|
||||
using void_pointer = typename std::pointer_traits<pointer>::template
|
||||
rebind<void>;
|
||||
using const_void_pointer = typename std::pointer_traits<pointer>::template
|
||||
rebind<const void>;
|
||||
using difference_type = typename std::pointer_traits<pointer>::difference_type;
|
||||
using size_type = std::make_unsigned_t<difference_type>;
|
||||
|
||||
template <class U> struct rebind {typedef allocator<U> other;};
|
||||
#endif
|
||||
|
||||
allocator() noexcept {} // not required, unless used
|
||||
template <class U> allocator(allocator<U> const&) noexcept {}
|
||||
|
||||
NODISCARD("to avoid memory leak")
|
||||
value_type* allocate(std::size_t n) const{return static_cast<value_type*>(fftw_malloc(sizeof(T)*n));}
|
||||
|
||||
// value_type* // Use pointer if pointer is not a value_type*
|
||||
// allocate(std::size_t n){return static_cast<value_type*>(::operator new (n*sizeof(value_type)));}
|
||||
|
||||
void deallocate(value_type* p, std::size_t){fftw_free(p);}
|
||||
// void deallocate(value_type* p, std::size_t) noexcept // Use pointer if pointer is not a value_type*
|
||||
// {::operator delete(p);}
|
||||
|
||||
static int alignment_of(value_type* p){return fftw_alignment_of((double*)p);}
|
||||
|
||||
#if 1
|
||||
value_type* allocate(std::size_t n, const_void_pointer){return allocate(n);}
|
||||
|
||||
template <class U, class ...Args>
|
||||
void construct(U* p, Args&& ...args){::new(p) U(std::forward<Args>(args)...);}
|
||||
|
||||
template <class U> void destroy(U* p) noexcept{p->~U();}
|
||||
|
||||
std::size_t max_size() const noexcept{return std::numeric_limits<size_type>::max();}
|
||||
|
||||
allocator select_on_container_copy_construction() const{return *this;}
|
||||
|
||||
using propagate_on_container_copy_assignment = std::false_type;
|
||||
using propagate_on_container_move_assignment = std::false_type;
|
||||
using propagate_on_container_swap = std::false_type;
|
||||
using is_always_equal = std::is_empty<allocator>;
|
||||
#endif
|
||||
};
|
||||
|
||||
template <class T, class U>
|
||||
bool operator==(allocator<T> const&, allocator<U> const&) noexcept{return true;}
|
||||
|
||||
template <class T, class U>
|
||||
bool operator!=(allocator<T> const& x, allocator<U> const& y) noexcept{
|
||||
return !(x == y);
|
||||
}
|
||||
|
||||
#if 0
|
||||
template<typename T>
|
||||
struct allocator{
|
||||
using value_type = T;
|
||||
using pointer = value_type*;
|
||||
using size_type = std::size_t;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using propagate_on_container_move_assignment = std::true_type;
|
||||
// NODISCARD("to avoid memory leak")
|
||||
pointer allocate(size_type n) const{return static_cast<pointer>(fftw_malloc(sizeof(T)*n));}
|
||||
void deallocate(pointer data, size_type){fftw_free(data);}
|
||||
};
|
||||
#endif
|
||||
|
||||
//template<> allocator<std::complex<double>>::pointer allocator<std::complex<double>>::allocate(size_type n){return reinterpret_cast<std::complex<double>*>(fftw_alloc_complex(n));}
|
||||
//template<> allocator< double >::pointer allocator< double >::allocate(size_type n){return fftw_alloc_real(n) ;}
|
||||
|
||||
#if 0
|
||||
template<>
|
||||
struct allocator<std::complex<double>>{
|
||||
using value_type = std::complex<double>;
|
||||
using pointer = value_type*;
|
||||
using size_type = std::size_t;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using propagate_on_container_move_assignment = std::true_type;
|
||||
NODISCARD("to avoid memory leak")
|
||||
pointer allocate(size_type n){return reinterpret_cast<std::complex<double>*>(fftw_alloc_complex(n));}
|
||||
void deallocate(pointer data, size_type){fftw_free(data);}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct allocator<double>{
|
||||
using value_type = double;
|
||||
using pointer = value_type*;
|
||||
using size_type = std::size_t;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using propagate_on_container_move_assignment = std::true_type;
|
||||
NODISCARD("to avoid memory leak")
|
||||
pointer allocate(size_type n){return fftw_alloc_real(n);}
|
||||
void deallocate(pointer data, size_type){fftw_free(data);}
|
||||
};
|
||||
#endif
|
||||
|
||||
}}}
|
||||
|
||||
|
||||
#if 0//__NVCC__
|
||||
namespace std{
|
||||
|
||||
template<class T> struct allocator_traits<boost::multi::fftw::allocator<T>> : std::allocator_traits<std::allocator<T>>{
|
||||
using base = std::allocator_traits<std::allocator<T>>;
|
||||
template<class U> using rebind_alloc = boost::multi::fftw::allocator<U>;
|
||||
template<class A>
|
||||
static auto allocate(A& a, typename base::size_type n){return a.allocate(n);}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0 //def __NVCC__
|
||||
namespace std{
|
||||
template<class T> struct allocator_traits<boost::multi::fftw::allocator<T>> : std::allocator_traits<std::allocator<T>>{
|
||||
template<class U> using rebind_alloc = boost::multi::fftw::allocator<U>;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#include "../../array.hpp"
|
||||
|
||||
#include<vector>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
int main(){
|
||||
{
|
||||
std::vector<double, multi::fftw::allocator<double>> v(100);
|
||||
multi::array<double, 2> arr({10, 20});
|
||||
}
|
||||
{
|
||||
std::vector<std::complex<double>, multi::fftw::allocator<std::complex<double>>> v(100);
|
||||
multi::array<std::complex<double>, 2> arr({10, 20});
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,207 +0,0 @@
|
|||
#if COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
|
||||
ln -sf $0 $0.cpp;mpicxx -g -I$HOME/prj/alf $0.cpp -o $0x -lfftw3 -lfftw3_mpi&&time mpirun -n 4 $0x&&rm $0x $0.cpp;exit
|
||||
#ln -sf $0 $0.cpp;mpicxx -g -I$HOME/prj/alf $0.cpp -o $0x -lfftw3 -lfftw3_mpi&&time mpirun -n 4 valgrind --leak-check=full --track-origins=yes --show-leak-kinds=all --suppressions=$HOME/prj/alf/boost/mpi3/test/communicator_main.cpp.openmpi.supp --error-exitcode=1 $0x&&rm $0x $0.cpp;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
// apt-get install libfftw3-mpi-dev
|
||||
// compile with: mpicc simple_mpi_example.c -Wl,-rpath=/usr/local/lib -lfftw3_mpi -lfftw3 -o simple_mpi_example */
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../config/NODISCARD.hpp"
|
||||
|
||||
#include<boost/mpi3/communicator.hpp>
|
||||
#include<boost/mpi3/environment.hpp>
|
||||
|
||||
#include "../fftw.hpp"
|
||||
|
||||
#include <fftw3-mpi.h>
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace fftw{
|
||||
|
||||
template<typename T>
|
||||
struct allocator : std::allocator<T>{
|
||||
template <typename U> struct rebind{using other = fftw::allocator<U>;};
|
||||
NODISCARD("to avoid memory leak")
|
||||
T* allocate(std::size_t n){ return static_cast<T*>(fftw_malloc(sizeof(T)*n));}
|
||||
void deallocate(T* data, std::size_t){fftw_free(data);}
|
||||
};
|
||||
|
||||
namespace mpi{
|
||||
|
||||
struct environment{
|
||||
environment(){fftw_mpi_init();}
|
||||
~environment(){fftw_mpi_cleanup();}
|
||||
};
|
||||
|
||||
template<class T, multi::dimensionality_type D, class Alloc = fftw::allocator<T>>
|
||||
struct array;
|
||||
|
||||
namespace bmpi3 = boost::mpi3;
|
||||
|
||||
template<class T, class Alloc>
|
||||
struct array<T, multi::dimensionality_type{2}, Alloc>{
|
||||
using element_type = T;
|
||||
|
||||
mutable bmpi3::communicator comm_;
|
||||
Alloc alloc_;
|
||||
|
||||
typename std::allocator_traits<Alloc>::size_type local_count_;
|
||||
array_ptr<T, 2, typename std::allocator_traits<Alloc>::pointer> local_ptr_;
|
||||
ptrdiff_t n0_;
|
||||
|
||||
static std::pair<typename std::allocator_traits<Alloc>::size_type, multi::extensions_type_<2>>
|
||||
local_2d(multi::extensions_type_<2> ext, boost::mpi3::communicator const& comm){
|
||||
ptrdiff_t local_n0, local_0_start;
|
||||
auto count = fftw_mpi_local_size_2d(std::get<0>(ext).size(), std::get<1>(ext).size(), comm.get(), &local_n0, &local_0_start);
|
||||
assert( count >= local_n0*std::get<1>(ext).size() );
|
||||
return {count, {{local_0_start, local_0_start + local_n0}, std::get<1>(ext)}};
|
||||
}
|
||||
static auto local_count_2d(multi::extensions_type_<2> ext, boost::mpi3::communicator const& comm){
|
||||
return local_2d(ext, comm).first;
|
||||
}
|
||||
static auto local_extension_2d(multi::extensions_type_<2> ext, boost::mpi3::communicator const& comm){
|
||||
return local_2d(ext, comm).second;
|
||||
}
|
||||
array(multi::extensions_type_<2> ext, bmpi3::communicator comm = mpi3::environment::self(), Alloc alloc = {}) :
|
||||
comm_{std::move(comm)},
|
||||
alloc_{alloc},
|
||||
local_count_{local_count_2d(ext, comm_)},
|
||||
local_ptr_ {alloc_.allocate(local_count_), local_extension_2d(ext, comm_)},
|
||||
n0_{multi::layout_t<2>(ext).size()}
|
||||
{
|
||||
if(not std::is_trivially_default_constructible<element_type>{})
|
||||
adl_alloc_uninitialized_default_construct_n(alloc_, local_ptr_->base(), local_ptr_->num_elements());
|
||||
}
|
||||
bmpi3::communicator& comm() const&{return comm_;}
|
||||
array(array const& other) :
|
||||
comm_ {other.comm_},
|
||||
alloc_ {other.alloc_},
|
||||
local_count_{other.local_count_},
|
||||
local_ptr_ {alloc_.allocate(local_count_), local_extension_2d(other.extensions(), comm_)},
|
||||
n0_{multi::layout_t<2>(other.extensions()).size()}
|
||||
{
|
||||
local_cutout() = other.local_cutout();
|
||||
}
|
||||
array(array&& other) :
|
||||
comm_ {std::move(other.comm_)},
|
||||
alloc_ {std::move(other.alloc_)},
|
||||
local_count_{std::exchange(other.local_count_, 0)},
|
||||
local_ptr_ {std::exchange(other.local_ptr_, nullptr)},
|
||||
n0_{multi::layout_t<2>(other.extensions()).size()}
|
||||
{}
|
||||
explicit array(multi::array<T, 2> const& other, bmpi3::communicator comm = mpi3::environment::self(), Alloc alloc = {}) :
|
||||
array(other.extensions(), comm, alloc)
|
||||
{
|
||||
local_cutout() = other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions()));
|
||||
}
|
||||
bool empty() const{return extensions().num_elements();}
|
||||
array_ref <T, 2> local_cutout() &{return *local_ptr_;}
|
||||
array_cref<T, 2> local_cutout() const&{return *local_ptr_;}
|
||||
ptrdiff_t local_count() const&{return local_count_;}
|
||||
multi::extensions_type_<2> extensions() const&{return {n0_, std::get<1>(local_cutout().extensions())};}
|
||||
ptrdiff_t num_elements() const&{return multi::layout_t<2>(extensions()).num_elements();}
|
||||
operator multi::array<T, 2>() const&{ static_assert( std::is_trivially_copy_assignable<T>{}, "!" );
|
||||
multi::array<T, 2> ret(extensions(), alloc_);
|
||||
comm_.all_gatherv_n(local_cutout().data_elements(), local_cutout().num_elements(), ret.data_elements());
|
||||
return ret;
|
||||
}
|
||||
array& operator=(multi::array<T, 2> const& other) &{
|
||||
if(other.extensions() == extensions()) local_cutout() = other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions()));
|
||||
else{
|
||||
array tmp{other};
|
||||
std::swap(*this, tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
bool operator==(multi::array<T, 2> const& other) const&{
|
||||
if(other.extensions() != extensions()) return false;
|
||||
return comm_&=(local_cutout() == other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions())));
|
||||
}
|
||||
friend bool operator==(multi::array<T, 2> const& other, array const& self){
|
||||
return self.operator==(other);
|
||||
}
|
||||
bool operator==(array<T, 2> const& other) const&{assert(comm_==other.comm_);
|
||||
return comm_&=(local_cutout() == other.local_cutout());
|
||||
}
|
||||
array& operator=(array const& other)&{
|
||||
if(other.extensions() == this->extensions() and other.comm_ == other.comm_)
|
||||
local_cutout() = other.local_cutout();
|
||||
else assert(0);
|
||||
return *this;
|
||||
}
|
||||
~array() noexcept{alloc_.deallocate(local_cutout().data_elements(), local_count_);}
|
||||
};
|
||||
|
||||
array<std::complex<double>, 2>& dft(array<std::complex<double>, 2> const& A, array<std::complex<double>, 2>& B, fftw::sign s){
|
||||
assert( A.extensions() == B.extensions() );
|
||||
assert( A.comm() == B.comm() );
|
||||
fftw_plan p = fftw_mpi_plan_dft_2d(
|
||||
std::get<0>(A.extensions()).size(), std::get<1>(A.extensions()).size(),
|
||||
(fftw_complex *)A.local_cutout().data_elements(), (fftw_complex *)B.local_cutout().data_elements(),
|
||||
A.comm().get(),
|
||||
s, FFTW_ESTIMATE
|
||||
);
|
||||
fftw_execute(p);
|
||||
fftw_destroy_plan(p);
|
||||
return B;
|
||||
}
|
||||
|
||||
array<std::complex<double>, 2>& dft_forward(array<std::complex<double>, 2> const& A, array<std::complex<double>, 2>& B){
|
||||
return dft(A, B, fftw::forward);
|
||||
}
|
||||
|
||||
array<std::complex<double>, 2> dft_forward(array<std::complex<double>,2> const& A){
|
||||
array<std::complex<double>, 2> ret(A.extensions()); dft_forward(A, ret); return ret;
|
||||
}
|
||||
|
||||
}}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#include<boost/mpi3/main.hpp>
|
||||
#include<boost/mpi3/environment.hpp>
|
||||
#include<boost/mpi3/ostream.hpp>
|
||||
#include "../fftw.hpp"
|
||||
|
||||
namespace mpi3 = boost::mpi3;
|
||||
namespace multi = boost::multi;
|
||||
|
||||
int mpi3::main(int, char*[], mpi3::communicator world){
|
||||
multi::fftw::mpi::environment fenv;
|
||||
|
||||
multi::fftw::mpi::array<std::complex<double>, 2> A({41, 321}, world);
|
||||
|
||||
mpi3::ostream os{world};
|
||||
os<< "global sizes" << std::get<0>(A.extensions()) <<'x'<< std::get<1>(A.extensions()) <<' '<< A.num_elements() <<std::endl;
|
||||
os<< A.local_cutout().extension() <<'x'<< std::get<1>(A.local_cutout().extensions()) <<"\t#="<< A.local_cutout().num_elements() <<" allocated "<< A.local_count() <<std::endl;
|
||||
|
||||
{
|
||||
auto x = A.local_cutout().extensions();
|
||||
for(auto i : std::get<0>(x))
|
||||
for(auto j : std::get<1>(x))
|
||||
A.local_cutout()[i][j] = std::complex<double>(i + j, i + 2*j + 1)/std::abs(std::complex<double>(i + j, i + 2*j + 1));
|
||||
}
|
||||
|
||||
multi::array<std::complex<double>, 2> A2 = A;
|
||||
assert( A2 == A );
|
||||
|
||||
using multi::fftw::dft_forward;
|
||||
|
||||
dft_forward(A , A );
|
||||
dft_forward(A2, A2);
|
||||
|
||||
{
|
||||
auto x = A.local_cutout().extensions();
|
||||
for(auto i : std::get<0>(x))
|
||||
for(auto j : std::get<1>(x))
|
||||
if(not( std::abs(A.local_cutout()[i][j] - A2[i][j]) < 1e-12 )){
|
||||
std::cout << A.local_cutout()[i][j] - A2[i][j] <<' '<< std::abs(A.local_cutout()[i][j] - A2[i][j]) << std::endl;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -1,157 +0,0 @@
|
|||
#if COMPILATION_INSTRUCTIONS
|
||||
#mpicxx -I$HOME/prj/alf $0 -g -o $0x -lfftw3 -lfftw3_mpi &&mpirun -n 4 valgrind $0x;exit
|
||||
$CXXX $CXXFLAGS -O2 -g `mpicxx -showme:compile|sed 's/-pthread/ /g'` -I$HOME/prj/alf $0 -o $0x `mpicxx -showme:link|sed 's/-pthread/ /g'` -lfftw3 -lfftw3_mpi -lboost_timer&&mpirun -n 4 $0x;exit
|
||||
#endif
|
||||
|
||||
#ifndef MULTI_FFTW_MPI_DISTRIBUTION_HPP
|
||||
#define MULTI_FFTW_MPI_DISTRIBUTION_HPP
|
||||
|
||||
#include <fftw3-mpi.h>
|
||||
|
||||
#include<boost/mpi3/communicator.hpp>
|
||||
|
||||
#include "../../../array_ref.hpp"
|
||||
|
||||
#include <experimental/tuple>
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace fftw{
|
||||
namespace mpi{
|
||||
|
||||
namespace bmpi3 = boost::mpi3;
|
||||
|
||||
using difference_type = std::ptrdiff_t;
|
||||
|
||||
template<std::ptrdiff_t ElementSize>
|
||||
class many{
|
||||
public:
|
||||
using difference_type = std::ptrdiff_t;
|
||||
private:
|
||||
difference_type local_count_;
|
||||
difference_type local_n0_;
|
||||
difference_type local_0_start_;
|
||||
static auto sizes(boost::multi::extensions_type_<2> const& ext){
|
||||
using std::experimental::apply;
|
||||
return apply([](auto... e){return std::array<difference_type, 2>{e.size()...};}, ext);
|
||||
}
|
||||
public:
|
||||
many(extensions_type_<2> const& ext, bmpi3::communicator const& comm, difference_type block0 = FFTW_MPI_DEFAULT_BLOCK)
|
||||
: local_count_{
|
||||
std::max(
|
||||
difference_type(
|
||||
fftw_mpi_local_size_many(
|
||||
2, sizes(ext).data(), ElementSize/sizeof(double),
|
||||
block0, comm.get(),
|
||||
&local_n0_, &local_0_start_
|
||||
)*sizeof(double)/ElementSize
|
||||
),
|
||||
difference_type(1)
|
||||
)
|
||||
}
|
||||
{
|
||||
static_assert( ElementSize%sizeof(double) == 0 , "!" );
|
||||
}
|
||||
difference_type local_count() const{return local_count_ + 100;}
|
||||
multi::iextension local_extension_0() const{return {local_0_start_, local_0_start_ + local_n0_};}
|
||||
multi::iextension local_extension() const{return local_extension_0();}
|
||||
bool operator==(many const& other) const{
|
||||
return std::tie(this->local_count_, this->local_n0_, this->local_0_start_)
|
||||
== std::tie(other.local_count_, other.local_n0_, other.local_0_start_);
|
||||
}
|
||||
bool operator!=(many const& other) const{return not operator==(other);}
|
||||
};
|
||||
|
||||
template<std::ptrdiff_t ElementSize>
|
||||
class many_transposed{
|
||||
public:
|
||||
using difference_type = std::ptrdiff_t;
|
||||
private:
|
||||
difference_type local_count_;
|
||||
difference_type local_n0_;
|
||||
difference_type local_0_start_;
|
||||
difference_type local_n1_;
|
||||
difference_type local_1_start_;
|
||||
static auto sizes(boost::multi::extensions_type_<2> const& ext){
|
||||
using std::experimental::apply;
|
||||
return apply([](auto... e){return std::array<difference_type, 2>{e.size()...};}, ext);
|
||||
}
|
||||
public:
|
||||
static_assert(ElementSize%sizeof(double)==0, "!");
|
||||
many_transposed(
|
||||
extensions_type_<2> const& ext, boost::mpi3::communicator const& comm,
|
||||
difference_type block0 = FFTW_MPI_DEFAULT_BLOCK, difference_type block1 = FFTW_MPI_DEFAULT_BLOCK
|
||||
) :
|
||||
local_count_{
|
||||
std::max(
|
||||
difference_type(
|
||||
fftw_mpi_local_size_many_transposed(
|
||||
2, sizes(ext).data(), ElementSize/sizeof(double),
|
||||
block0, block1, comm.get(),
|
||||
&local_n0_, &local_0_start_,
|
||||
&local_n1_, &local_1_start_
|
||||
)*sizeof(double)/ElementSize
|
||||
),
|
||||
difference_type(1)
|
||||
)
|
||||
}{
|
||||
static_assert( ElementSize%sizeof(double) == 0 , "!");
|
||||
// FFTW_MPI_DEFAULT_BLOCK = (size + comm.size - 1)/comm.size
|
||||
assert( local_count() >= local_extension0().size()*local_extension1().size() );
|
||||
// assert( block0*comm.size() >= std::get<0>(ext).size() or block0 == FFTW_MPI_DEFAULT_BLOCK );
|
||||
}
|
||||
difference_type local_count() const{return local_count_ + 100;}
|
||||
multi::iextension local_extension0() const{return {local_0_start_, local_0_start_ + local_n0_};}
|
||||
multi::iextension local_extension1() const{return {local_1_start_, local_1_start_ + local_n1_};}
|
||||
bool operator==(many_transposed const& other) const{
|
||||
return std::tie(this->local_count_, this->local_n0_, this->local_0_start_, this->local_n1_, this->local_1_start_)
|
||||
== std::tie(other.local_count_, other.local_n0_, other.local_0_start_, other.local_n1_, other.local_1_start_);
|
||||
}
|
||||
bool operator!=(many_transposed const& other) const{return not operator==(other);}
|
||||
};
|
||||
|
||||
}}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#include<boost/mpi3/main_environment.hpp>
|
||||
#include<boost/mpi3/ostream.hpp>
|
||||
|
||||
#include "../../fftw/mpi/environment.hpp"
|
||||
|
||||
namespace bmpi3 = boost::mpi3;
|
||||
namespace multi = boost::multi;
|
||||
namespace mpi = multi::fftw::mpi;
|
||||
|
||||
int bmpi3::main(int, char*[], mpi3::environment& env){
|
||||
multi::fftw::mpi::environment fenv(env);
|
||||
auto world = env.world();
|
||||
|
||||
mpi3::ostream os{world};
|
||||
|
||||
using std::endl;
|
||||
{
|
||||
os<< "forced distribution "<<endl;
|
||||
mpi::many_transposed<sizeof(double)> dist({12, 43}, world, (12+world.size()-1)/world.size());//533/world.size());
|
||||
|
||||
os<< "local element count "<< dist.local_count() <<endl;
|
||||
os<< "local rows "<< dist.local_extension0().size() <<endl;
|
||||
os<< "local extension "<< dist.local_extension0() <<endl;
|
||||
}
|
||||
{
|
||||
os<< "automatic distribution "<<std::endl;
|
||||
mpi::many_transposed<sizeof(double)> dist({12, 43}, world);//533/world.size());
|
||||
|
||||
os<< "local element count "<< dist.local_count() <<endl;
|
||||
os<< "local rows "<< dist.local_extension0().size() <<endl;
|
||||
os<< "local extension "<< dist.local_extension0() <<endl;
|
||||
}
|
||||
mpi::many_transposed<sizeof(double)> forced({12, 43}, world);
|
||||
mpi::many_transposed<sizeof(double)> automa({12, 43}, world);
|
||||
assert( forced == automa );
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
#if COMPILATION_INSTRUCTIONS
|
||||
#mpicxx -I$HOME/prj/alf $0 -g -o $0x -lfftw3 -lfftw3_mpi &&mpirun -n 2 valgrind $0x;exit
|
||||
$CXXX $CXXFLAGS -O2 -g `mpicxx -showme:compile|sed 's/-pthread/ /g'` -I$HOME/prj/alf $0 -o $0x `mpicxx -showme:link|sed 's/-pthread/ /g'` -lfftw3 -lfftw3_mpi -lboost_timer&&mpirun -n 2 $0x;exit
|
||||
#endif
|
||||
|
||||
#ifndef MULTI_FFTW_MPI_ENVIRONMENT_HPP
|
||||
#define MULTI_FFTW_MPI_ENVIRONMENT_HPP
|
||||
|
||||
#include <fftw3-mpi.h>
|
||||
|
||||
#include<boost/mpi3/communicator.hpp>
|
||||
|
||||
#include "../../../array_ref.hpp"
|
||||
|
||||
#include <experimental/tuple>
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace fftw{
|
||||
namespace mpi{
|
||||
|
||||
namespace bmpi3 = boost::mpi3;
|
||||
|
||||
struct environment{
|
||||
environment(bmpi3::environment&){fftw_mpi_init();}
|
||||
~environment(){fftw_mpi_cleanup();}
|
||||
};
|
||||
|
||||
}}}}
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#include<boost/mpi3/main_environment.hpp>
|
||||
|
||||
namespace bmpi3 = boost::mpi3;
|
||||
namespace multi = boost::multi;
|
||||
|
||||
int bmpi3::main(int, char*[], mpi3::environment& env){
|
||||
multi::fftw::mpi::environment fenv(env);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,566 +0,0 @@
|
|||
#if COMPILATION// -*- indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil; -*-
|
||||
$CXXX $CXXFLAGS `mpicxx -showme:compile|sed 's/-pthread/ /g'` -I$HOME/prj/alf $0 -o $0x `mpicxx -showme:link|sed 's/-pthread/ /g'` -lfftw3 -lfftw3_mpi&&mpirun -n 4 $0x&&rm $0x;exit
|
||||
#ln -sf $0 $0.cpp;mpicxx -g -I$HOME/prj/alf $0.cpp -o $0x -lfftw3 -lfftw3_mpi&&mpirun -n 4 valgrind --leak-check=full --track-origins=yes --show-leak-kinds=all --suppressions=$HOME/prj/alf/boost/mpi3/test/communicator_main.cpp.openmpi.supp --error-exitcode=1 $0x&&rm $0x $0.cpp;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
// apt-get install libfftw3-mpi-dev
|
||||
// compile with: mpicc simple_mpi_example.c -Wl,-rpath=/usr/local/lib -lfftw3_mpi -lfftw3 -o simple_mpi_example */
|
||||
|
||||
#ifndef MULTI_ADAPTOR_FFTW_MPI_SCATTERED_ARRAY_HPP
|
||||
#define MULTI_ADAPTOR_FFTW_MPI_SCATTERED_ARRAY_HPP
|
||||
|
||||
#include "../mpi/distribution.hpp"
|
||||
#include "boost/mpi3/process.hpp"
|
||||
|
||||
namespace boost{
|
||||
namespace multi{
|
||||
namespace fftw{
|
||||
namespace mpi{
|
||||
|
||||
namespace bmpi3 = boost::mpi3;
|
||||
|
||||
template<class T, multi::dimensionality_type D, class Alloc = std::allocator<T>> // cannot use fftw::allocator<T> as default because it produces error in nvcc: `template<class _Tp> using __pointer = typename _Tp::pointer’ is protected within this context`
|
||||
class scattered_array;
|
||||
|
||||
template<class T, multi::dimensionality_type D, class Alloc = std::allocator<T>> // cannot use fftw::allocator<T> as default because it produces error in nvcc: `template<class _Tp> using __pointer = typename _Tp::pointer’ is protected within this context`
|
||||
class gathered_array;
|
||||
|
||||
template<class T, class Alloc>
|
||||
struct array{
|
||||
using local_distrubution_type = many_transposed<sizeof(T)>;
|
||||
using local_allocator_type = Alloc;
|
||||
using local_pointer_type = typename std::allocator_traits<local_allocator_type>::pointer;
|
||||
protected:
|
||||
local_distrubution_type local_distrubution_;
|
||||
local_allocator_type alloc_;
|
||||
local_pointer_type local_data_;
|
||||
multi::iextension first_ext_;
|
||||
multi::iextension second_ext_;
|
||||
public:
|
||||
array(
|
||||
multi::extensions_type_<2> exts, bmpi3::communicator comm,
|
||||
difference_type block0 = FFTW_MPI_DEFAULT_BLOCK, difference_type block1 = FFTW_MPI_DEFAULT_BLOCK,
|
||||
Alloc alloc = {}
|
||||
) :
|
||||
local_distrubution_{exts, comm, block0, block1},
|
||||
alloc_{alloc},
|
||||
local_data_{alloc_.allocate(local_distrubution_.local_count())},
|
||||
first_ext_{std::get<0>(exts)},
|
||||
second_ext_{std::get<1>(exts)}
|
||||
{}
|
||||
~array() noexcept{alloc_.deallocate(local_data_, local_distrubution_.local_count());}
|
||||
auto local_cutout() &{return array_ref <T, 2, local_pointer_type>(local_data_, local_distrubution_.local_extension0()*local_distrubution_.local_extension1());}//.rotated();}
|
||||
auto local_cutout() const&{return array_cref<T, 2, local_pointer_type>(local_data_, local_distrubution_.local_extension0()*local_distrubution_.local_extension1());}//.rotated();}
|
||||
};
|
||||
|
||||
template<class T, class Alloc>
|
||||
class gathered_array<T, 2, Alloc> : public array<T, Alloc>{
|
||||
bmpi3::communicator comm_;
|
||||
public:
|
||||
gathered_array(multi::extensions_type_<2> exts, bmpi3::communicator comm, Alloc alloc = {}) :
|
||||
array<T, Alloc>{exts, comm, std::get<0>(exts).size(), std::get<1>(exts).size(), alloc},
|
||||
comm_{std::move(comm)}
|
||||
{}
|
||||
scattered_array<T, 2, Alloc> scatter() const{
|
||||
scattered_array<T, 2, Alloc> other({this->first_ext_, this->second_ext_}, comm_);
|
||||
auto p = fftw_mpi_plan_many_transpose(
|
||||
this->second_ext_.size(), this->first_ext_.size(),
|
||||
sizeof(T)/sizeof(double),
|
||||
this->second_ext_.size(), FFTW_MPI_DEFAULT_BLOCK,
|
||||
reinterpret_cast<double*>(const_cast<T*>(this->local_cutout().base())),
|
||||
reinterpret_cast<double*>( other.local_cutout().base() ),
|
||||
comm_.get(), FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN
|
||||
);
|
||||
fftw_execute(p);
|
||||
fftw_destroy_plan(p);
|
||||
return other;
|
||||
}
|
||||
};
|
||||
|
||||
template<class T, class Alloc>
|
||||
class scattered_array<T, 2, Alloc>{
|
||||
public:
|
||||
using local_distrubution_type = many<sizeof(T)>;
|
||||
using local_allocator_type = Alloc;
|
||||
using local_pointer_type = typename std::allocator_traits<local_allocator_type>::pointer;
|
||||
private:
|
||||
local_distrubution_type local_distribution_;
|
||||
local_allocator_type alloc_;
|
||||
local_pointer_type local_data_;
|
||||
multi::iextension first_ext_;
|
||||
multi::iextension second_ext_;
|
||||
mutable bmpi3::communicator comm_;
|
||||
public:
|
||||
scattered_array(multi::extensions_type_<2> exts, bmpi3::communicator comm, Alloc alloc = {}) :
|
||||
local_distribution_{exts, comm},
|
||||
alloc_{alloc},
|
||||
local_data_{alloc_.allocate(local_distribution_.local_count())},
|
||||
first_ext_{std::get<0>(exts)},
|
||||
second_ext_{std::get<1>(exts)},
|
||||
comm_{std::move(comm)}
|
||||
{}
|
||||
~scattered_array() noexcept{alloc_.deallocate(local_data_, local_distribution_.local_count());}
|
||||
|
||||
array_ref <T, 2, local_pointer_type> local_cutout() &{return array_ref <T, 2, local_pointer_type>(local_data_, local_distribution_.local_extension_0()*second_ext_);}
|
||||
array_cref<T, 2, local_pointer_type> local_cutout() const&{return array_cref<T, 2, local_pointer_type>(local_data_, local_distribution_.local_extension_0()*second_ext_);}
|
||||
|
||||
mpi::gathered_array<T, 2> gather() const{
|
||||
mpi::gathered_array<T, 2> other({first_ext_, second_ext_}, comm_);
|
||||
auto p = fftw_mpi_plan_many_transpose(
|
||||
first_ext_.size(), second_ext_.size(),
|
||||
// std::get<0>(this->extensions()).size(), std::get<1>(this->extensions()).size(),
|
||||
sizeof(T)/sizeof(double),
|
||||
FFTW_MPI_DEFAULT_BLOCK, second_ext_.size(), //this->size(),
|
||||
reinterpret_cast<double*>(const_cast<T*>(this->local_cutout().base())),
|
||||
reinterpret_cast<double*>( other.local_cutout().base() ),
|
||||
comm_.get(), FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_OUT
|
||||
);
|
||||
fftw_execute(p);
|
||||
fftw_destroy_plan(p);
|
||||
return other;
|
||||
}
|
||||
bool operator==(scattered_array const& other) const{return comm_&=(local_cutout() == other.local_cutout());}
|
||||
bool operator!=(scattered_array const& other) const{return not operator==(other);}
|
||||
};
|
||||
|
||||
}}}}
|
||||
|
||||
#if 0
|
||||
template<class T, class Alloc>
|
||||
class scattered_array<T, multi::dimensionality_type{2}, Alloc>{
|
||||
public:
|
||||
using local_allocator_type = Alloc;
|
||||
using local_pointer_t = typename std::allocator_traits<local_allocator_type>::pointer;
|
||||
private:
|
||||
using local_distrubution_type = distribution<sizeof(T)>;
|
||||
using layout_type = layout_t<T, 2>;
|
||||
Alloc alloc_ ;
|
||||
local_pointer_t local_data_; // typename boost::multi::array_ptr<T, 2, local_pointer_t> local_ptr_;
|
||||
public:
|
||||
scattered_array(multi::extensions_type_<2> ext, bmpi3::communicator comm = mpi3::environment::self(), Alloc alloc = {}) :
|
||||
layout_t<T, 2>(ext, comm),
|
||||
alloc_{alloc},
|
||||
local_data_{std::allocator_traits<Alloc>::allocate(alloc_, scattered_array::local_count())}//,
|
||||
{
|
||||
if(not std::is_trivially_default_constructible<typename scattered_array::element_type>{})
|
||||
adl_alloc_uninitialized_default_construct_n(alloc_, local_cutout().data_elements()/*local_ptr_->base()*/, local_cutout().num_elements());//local_ptr_->num_elements());
|
||||
}
|
||||
scattered_array(scattered_array const& other) :
|
||||
layout_t<T, 2> {other},
|
||||
alloc_ {other.alloc_},
|
||||
local_data_ {std::allocator_traits<Alloc>::allocate(alloc_, layout_type::local_count())}
|
||||
{
|
||||
scoped_barrier(other.comm());
|
||||
local_cutout() = other.local_cutout();
|
||||
/*
|
||||
auto p1 = fftw_mpi_plan_many_transpose(
|
||||
std::get<0>(this->extensions()).size(), std::get<1>(this->extensions()).size(), sizeof(T)/sizeof(double),
|
||||
other.block(), this->block(),
|
||||
reinterpret_cast<double*>(const_cast<T*>(other.local_cutout().data_elements())),
|
||||
reinterpret_cast<double*>( this->local_cutout().data_elements() ),
|
||||
this->comm().get(), FFTW_ESTIMATE
|
||||
);
|
||||
auto p2 = fftw_mpi_plan_many_transpose(
|
||||
std::get<1>(this->extensions()).size(), std::get<0>(this->extensions()).size(), sizeof(T)/sizeof(double),
|
||||
other.block(), this->block(),
|
||||
reinterpret_cast<double*>( this->local_cutout().data_elements()),
|
||||
reinterpret_cast<double*>( this->local_cutout().data_elements()),
|
||||
this->comm().get(), FFTW_ESTIMATE
|
||||
);
|
||||
fftw_execute(p1);
|
||||
fftw_execute(p2);
|
||||
fftw_destroy_plan(p2);
|
||||
fftw_destroy_plan(p1);
|
||||
*/
|
||||
}
|
||||
scattered_array(scattered_array&& other) : // intel calls this function to return from a function
|
||||
layout_type{std::exchange(static_cast<layout_type&>(other), layout_type(multi::extensions_type_<2>{}, other.comm()))},
|
||||
alloc_ {std::move(other.alloc_)},
|
||||
local_data_{other.local_data_}
|
||||
{
|
||||
assert(not other.extensions());
|
||||
assert(other.local_count() == 0 );
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, scattered_array const& self){
|
||||
for(int r = 0; r != self.comm().size(); ++r){
|
||||
if(self.comm().rank() == r){
|
||||
if(auto x = self.local_cutout().extensions())
|
||||
for(auto i : std::get<0>(x)){
|
||||
for(auto j : std::get<1>(x))
|
||||
os<< self.local_cutout()[i][j] <<' ';
|
||||
os<<std::endl;
|
||||
}
|
||||
}
|
||||
self.comm().barrier();
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
array_ref <T, 2, local_pointer_t> local_cutout() &//{return *local_ptr_;}
|
||||
{return array_ref <T, 2, local_pointer_t>(local_data_, this->local_extensions());}
|
||||
array_cref<T, 2, local_pointer_t> local_cutout() const&//{return *local_ptr_;}
|
||||
{return array_cref<T, 2, local_pointer_t>(local_data_, this->local_extensions());}
|
||||
|
||||
local_pointer_t local_data(){return local_data_;}
|
||||
typename std::pointer_traits<local_pointer_t>::template rebind<T const> local_data() const{return local_data_;}
|
||||
|
||||
auto extensions() const{return this->global_extensions();}
|
||||
|
||||
operator multi::array<T, 2>() const&{
|
||||
static_assert( std::is_trivially_copy_assignable<T>{}, "!" );
|
||||
multi::array<T, 2> ret(this->global_extensions(), 1., alloc_);
|
||||
this->comm().all_gatherv_n(local_data_, local_cutout().num_elements(), ret.data_elements());
|
||||
return ret;
|
||||
}
|
||||
|
||||
mpi::gathered_array<T, 2> gather() const{
|
||||
mpi::gathered_array<T, 2> other(this->extensions(), this->comm());
|
||||
this->comm_.gatherv_n(local_cutout().data_elements(), local_cutout().num_elements(), other.data_elements());
|
||||
static_assert( std::is_trivially_copy_assignable<T>{} and sizeof(T)%sizeof(double)==0, "!");
|
||||
|
||||
/* {
|
||||
fftw_plan p = fftw_mpi_plan_many_transpose(
|
||||
std::get<0>(this->extensions()).size(), std::get<1>(this->extensions()).size(), sizeof(T)/sizeof(double),
|
||||
this->block(), std::get<0>(this->extensions()).size(),
|
||||
reinterpret_cast<double*>(const_cast<T*>(local_cutout().data_elements())),
|
||||
reinterpret_cast<double*>(ret.data_elements()),
|
||||
this->comm().get(), FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN | FFTW_MPI_TRANSPOSED_OUT
|
||||
);
|
||||
fftw_execute(p);
|
||||
fftw_destroy_plan(p);
|
||||
}*/
|
||||
|
||||
auto p1 = fftw_mpi_plan_many_transpose(
|
||||
std::get<0>(this->extensions()).size(), std::get<1>(this->extensions()).size(),
|
||||
sizeof(T)/sizeof(double),
|
||||
FFTW_MPI_DEFAULT_BLOCK, this->size(),
|
||||
reinterpret_cast<double*>(const_cast<T*>(this->local_cutout().data_elements())),
|
||||
reinterpret_cast<double*>( other.data_elements() ),
|
||||
this->comm().get(), FFTW_ESTIMATE
|
||||
);
|
||||
|
||||
auto p2 = fftw_mpi_plan_many_transpose(
|
||||
std::get<1>(this->extensions()).size(), std::get<0>(this->extensions()).size(),
|
||||
sizeof(T)/sizeof(double),
|
||||
other.block(), other.block(),
|
||||
reinterpret_cast<double*>( other.data_elements()),
|
||||
reinterpret_cast<double*>( other.data_elements()),
|
||||
this->comm().get(), FFTW_ESTIMATE
|
||||
);
|
||||
fftw_execute(p1);
|
||||
fftw_execute(p2);
|
||||
fftw_destroy_plan(p2);
|
||||
fftw_destroy_plan(p1);
|
||||
|
||||
return other;
|
||||
}
|
||||
|
||||
explicit scattered_array(multi::array<T, 2> const& other, bmpi3::communicator comm = mpi3::environment::self(), Alloc alloc = {}) :
|
||||
scattered_array(other.extensions(), comm, alloc)
|
||||
{
|
||||
local_cutout() = other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions()));
|
||||
}
|
||||
// bool operator==(array<T, 2> const& other) const&{assert(comm()==other.comm());
|
||||
// return comm()&=(local_cutout() == other.local_cutout());
|
||||
// }
|
||||
// bool operator!=(array<T, 2> const& other) const&{return not(*this==other);}
|
||||
ptrdiff_t num_elements() const&{return multi::layout_t<2>(extensions()).num_elements();}
|
||||
layout_type layout() const{return *this;}
|
||||
~scattered_array() noexcept{if(this->local_count()) alloc_.deallocate(local_data_, this->local_count());}
|
||||
|
||||
scattered_array& operator=(scattered_array const& other)&{
|
||||
assert(this->comm() == other.comm());
|
||||
if(this->extensions() == other.extensions()){
|
||||
fftw_plan p = fftw_mpi_plan_many_transpose(
|
||||
std::get<0>(this->extensions()).size(), std::get<1>(this->extensions()).size(), sizeof(T)/sizeof(double),
|
||||
other.block(), this->block(),
|
||||
reinterpret_cast<double*>(const_cast<T*>(other.local_cutout().data_elements())),
|
||||
reinterpret_cast<double*>( this->local_cutout().data_elements() ),
|
||||
this->comm().get(), FFTW_ESTIMATE
|
||||
);
|
||||
fftw_execute(p);
|
||||
fftw_destroy_plan(p);
|
||||
}else assert(0);
|
||||
return *this;
|
||||
}
|
||||
#if 0
|
||||
private:
|
||||
typename std::allocator_traits<Alloc>::size_type
|
||||
local_count_2d (multi::extensions_type_<2> ext){return local_2d(ext).first; }
|
||||
auto local_extension_2d(multi::extensions_type_<2> ext){return local_2d(ext).second;}
|
||||
public:
|
||||
Alloc get_allocator() const{return alloc_;}
|
||||
array(bmpi3::communicator comm = mpi3::environment::self(), Alloc alloc = {}) :
|
||||
comm_{std::move(comm)},
|
||||
alloc_{alloc},
|
||||
local_count_{local_count_2d(multi::extensions_type_<2>{})},
|
||||
local_ptr_ {alloc_.allocate(local_count_), local_extension_2d(multi::extensions_type_<2>{})},
|
||||
n0_{multi::layout_t<2>(multi::extensions_type_<2>{}).size()}
|
||||
{}
|
||||
bool empty() const{return extensions().num_elements();}
|
||||
array_ref <T, 2> local_cutout() &{return *local_ptr_;}
|
||||
array_cref<T, 2> local_cutout() const&{return *local_ptr_;}
|
||||
ptrdiff_t local_count() const&{return local_count_;}
|
||||
auto local_data() const&{return local_cutout().data_elements();}
|
||||
multi::extensions_type_<2> extensions() const&{return {n0_, std::get<1>(local_cutout().extensions())};}
|
||||
friend auto extensions(array const& self){return self.extensions();}
|
||||
|
||||
array& operator=(multi::array<T, 2> const& other) &{
|
||||
if(other.extensions() == extensions()) local_cutout() = other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions()));
|
||||
else{
|
||||
array tmp{other};
|
||||
std::swap(*this, tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template<class Array, class=std::enable_if_t<not std::is_same<Array, multi::array<T, 2>>{}> >
|
||||
array& operator=(Array const& other) &{
|
||||
assert( other.extensions() == this->extensions() );
|
||||
|
||||
static_assert( std::is_trivially_assignable<T&, T>{}, "!" );
|
||||
static_assert( sizeof(T)%sizeof(double)==0, "!" );
|
||||
|
||||
auto options = FFTW_ESTIMATE;
|
||||
if(other.layout_.is_transposed){
|
||||
options |= FFTW_MPI_TRANSPOSED_IN;
|
||||
n0_ = std::get<1>(other.extensions()).size();
|
||||
}
|
||||
|
||||
fftw_plan p = fftw_mpi_plan_many_transpose(
|
||||
std::get<0>(extensions()).size(), std::get<1>(extensions()).size(), sizeof(T)/sizeof(double),
|
||||
FFTW_MPI_DEFAULT_BLOCK, other.layout_.block,
|
||||
reinterpret_cast<double*>(const_cast<T*>(other.local_cutout().base())),
|
||||
reinterpret_cast<double*>(this->local_cutout().data_elements()),
|
||||
this->comm_.get(), options
|
||||
);
|
||||
fftw_execute(p);
|
||||
fftw_destroy_plan(p);
|
||||
|
||||
local_ptr_ = array_ptr<T, 2, local_pointer_t>{this->local_cutout().data_elements(), local_extension_2d(other.extensions())};
|
||||
return *this;
|
||||
}
|
||||
bool operator==(multi::array<T, 2> const& other) const&{
|
||||
if(other.extensions() != extensions()) return false;
|
||||
return comm_&=(local_cutout() == other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions())));
|
||||
}
|
||||
friend bool operator==(multi::array<T, 2> const& other, array const& self){
|
||||
return self.operator==(other);
|
||||
}
|
||||
bool operator==(array<T, 2> const& other) const&{assert(comm_==other.comm_);
|
||||
return comm_&=(local_cutout() == other.local_cutout());
|
||||
}
|
||||
array& operator=(array const& other)&{
|
||||
if(other.extensions() == this->extensions() and other.comm_ == other.comm_)
|
||||
local_cutout() = other.local_cutout();
|
||||
else assert(0);
|
||||
return *this;
|
||||
}
|
||||
basic_array<T, typename std::pointer_traits<local_pointer_t>::template rebind<T const>> transposed() const{
|
||||
return basic_array<T, typename std::pointer_traits<local_pointer_t>::template rebind<T const>>{
|
||||
layout_t{n0_, true, FFTW_MPI_DEFAULT_BLOCK}, this->local_cutout().layout().transpose(), this->local_cutout().data_elements()
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
boost::multi::fftw::mpi::scattered_array<std::complex<double>, 2>& dft(
|
||||
boost::multi::fftw::mpi::scattered_array<std::complex<double>, 2> const& A,
|
||||
boost::multi::fftw::mpi::scattered_array<std::complex<double>, 2> & B,
|
||||
fftw::sign /*s*/
|
||||
){
|
||||
(void)A;
|
||||
// assert( A.extensions() == B.extensions() );
|
||||
// assert( A.comm() == B.comm() );
|
||||
#if 0
|
||||
fftw_plan p = fftw_mpi_plan_dft_2d(
|
||||
std::get<0>(A.extensions()).size(), std::get<1>(A.extensions()).size(),
|
||||
(fftw_complex *)A.local_cutout().data_elements(), (fftw_complex *)B.local_cutout().data_elements(),
|
||||
A.comm().get(),
|
||||
s, FFTW_ESTIMATE
|
||||
);
|
||||
fftw_execute(p);
|
||||
fftw_destroy_plan(p);
|
||||
#endif
|
||||
return B;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
array_transposed<std::complex<double>, 2>& dft(
|
||||
array<std::complex<double>, 2> const& A,
|
||||
array_transposed<std::complex<double>, 2>& B,
|
||||
fftw::sign s
|
||||
){
|
||||
// http://www.fftw.org/fftw3_doc/MPI-Plan-Creation.html
|
||||
// assert( A.extensions() == B.extensions() );
|
||||
assert( A.comm() == B.comm() );
|
||||
fftw_plan p = fftw_mpi_plan_dft_2d(
|
||||
std::get<0>(A.extensions()).size(), std::get<1>(A.extensions()).size(),
|
||||
(fftw_complex *)A.local_cutout().data_elements(), (fftw_complex *)B.local_cutout().data_elements(),
|
||||
A.comm().get(),
|
||||
s, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_OUT
|
||||
);
|
||||
fftw_execute(p);
|
||||
fftw_destroy_plan(p);
|
||||
return B;
|
||||
}
|
||||
|
||||
array<std::complex<double>, 2>& dft_forward(array<std::complex<double>, 2> const& A, array<std::complex<double>, 2>& B){
|
||||
return dft(A, B, fftw::forward);
|
||||
}
|
||||
|
||||
array<std::complex<double>, 2> dft_forward(array<std::complex<double>,2> const& A){
|
||||
array<std::complex<double>, 2> ret(A.extensions()); dft_forward(A, ret); return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}}}}
|
||||
#endif
|
||||
|
||||
#if not __INCLUDE_LEVEL__
|
||||
|
||||
#include<boost/mpi3/main_environment.hpp>
|
||||
#include<boost/mpi3/ostream.hpp>
|
||||
|
||||
|
||||
#include "../../fftw/mpi/environment.hpp"
|
||||
|
||||
namespace mpi3 = boost::mpi3;
|
||||
namespace multi = boost::multi;
|
||||
namespace fftw = multi::fftw;
|
||||
namespace mpi = fftw::mpi;
|
||||
|
||||
int mpi3::main(int, char*[], mpi3::environment& menv){
|
||||
mpi::environment fenv(menv);
|
||||
|
||||
auto world = menv.world();
|
||||
mpi3::ostream os{world};
|
||||
|
||||
using T = std::complex<double>;
|
||||
|
||||
mpi::scattered_array<T, 2> S({14, 19}, world);
|
||||
|
||||
using std::get;
|
||||
if(auto x = extensions(S.local_cutout()))
|
||||
for(auto i : get<0>(x))
|
||||
for(auto j : get<1>(x))
|
||||
S.local_cutout()[i][j] = T(i, j);//std::complex<double>(i + j, i + 2*j + 1)/std::abs(std::complex<double>(i + j, i + 2*j + 1));
|
||||
|
||||
mpi::gathered_array<T, 2> G = S.gather();
|
||||
G.local_cutout();
|
||||
|
||||
assert( G.extensions() == {14, 19} );
|
||||
if(world.rank() == 0){
|
||||
assert( G.extensions() == {14, 19} );
|
||||
assert( G.local_cutout().extensions() == {14, 19} );
|
||||
}
|
||||
if(world.rank() != 0){
|
||||
assert( G.extensions() == {14, 19} );
|
||||
assert( G.local_cutout().extensions() == {0, 0} );
|
||||
}
|
||||
|
||||
multi::array<T, 2> A = S.gather();
|
||||
if(world.rank() == 0) assert( A.extensions() == {14, 19} );
|
||||
if(world.rank() != 0) assert( A.empty() );
|
||||
|
||||
world.barrier();
|
||||
if(world.root()){
|
||||
std::cout<<"-------------\n";
|
||||
if(auto x = extensions(G.local_cutout()))
|
||||
for(auto i : get<0>(x)){
|
||||
for(auto j : get<1>(x))
|
||||
std::cout<< G.local_cutout()[i][j] <<'\t';
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
}else assert(G.local_cutout().empty());
|
||||
|
||||
mpi::scattered_array<T, 2> S2 = G.scatter();
|
||||
|
||||
assert( S2 == S );
|
||||
|
||||
|
||||
mpi::gathered_array<T, 2> G2 = S2.gather();
|
||||
|
||||
if(world.root()){
|
||||
std::cout<<"-------------\n";
|
||||
if(auto x = extensions(G2.local_cutout()))
|
||||
for(auto i : get<0>(x)){
|
||||
for(auto j : get<1>(x))
|
||||
std::cout<< G2.local_cutout()[i][j] <<'\t';
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
assert( G2.local_cutout() == G.local_cutout() );
|
||||
}else assert(G2.local_cutout().empty());
|
||||
|
||||
// assert( S == S2 );
|
||||
|
||||
// if(not world.root()) assert( G.local_cutout().empty() );
|
||||
|
||||
// mpi::gathered_array<double, 2> G({8, 15}, world);
|
||||
|
||||
/*
|
||||
auto const A = [&]{
|
||||
os<<"global sizes"<< std::get<0>(A.extensions()) <<'x'<< std::get<1>(A.extensions()) <<' '<< A.num_elements() <<std::endl;
|
||||
os<< A.local_cutout().extension() <<'x'<< std::get<1>(A.local_cutout().extensions()) <<"\t#="<< A.local_cutout().num_elements() <<" allocated "<< A.local_count() <<std::endl;
|
||||
if(auto x = A.local_cutout().extensions())
|
||||
for(auto i : std::get<0>(x))
|
||||
for(auto j : std::get<1>(x))
|
||||
A.local_cutout()[i][j] = i + j;//std::complex<double>(i + j, i + 2*j + 1)/std::abs(std::complex<double>(i + j, i + 2*j + 1));
|
||||
return A;
|
||||
}();
|
||||
*/
|
||||
/*
|
||||
multi::fftw::mpi::scattered_array<std::complex<double>, 2> B(A.extensions(), world);
|
||||
|
||||
multi::array<std::complex<double>, 2> A2 = A;
|
||||
assert( A2 == A );
|
||||
|
||||
using multi::fftw::dft_forward;
|
||||
*/
|
||||
#if 0
|
||||
dft_forward(A , B );
|
||||
dft_forward(A2, A2);
|
||||
|
||||
{
|
||||
auto x = B.local_cutout().extensions();
|
||||
for(auto i : std::get<0>(x))
|
||||
for(auto j : std::get<1>(x))
|
||||
if(not( std::abs(B.local_cutout()[i][j] - A2[i][j]) < 1e-12 )){
|
||||
std::cout<< B.local_cutout()[i][j] - A2[i][j] <<' '<< std::abs(B.local_cutout()[i][j] - A2[i][j]) <<'\n';
|
||||
}
|
||||
}
|
||||
|
||||
multi::fftw::mpi::array_transposed<std::complex<double>, 2> AT(A.extensions(), world);
|
||||
os<< "global sizes" << std::get<0>(AT.extensions()) <<'x'<< std::get<1>(AT.extensions()) <<' '<< AT.num_elements() <<std::endl;
|
||||
os<< AT.local_cutout().extension() <<'x'<< std::get<1>(AT.local_cutout().extensions()) <<"\t#="<< AT.local_cutout().num_elements() <<" allocated "<< AT.local_count() <<std::endl;
|
||||
|
||||
dft(A, AT, multi::fftw::forward);
|
||||
|
||||
if(world.rank() == 0){
|
||||
if(auto x = B.local_cutout().extensions()){
|
||||
for(auto i : std::get<0>(x)){
|
||||
for(auto j : std::get<1>(x))
|
||||
std::cout<< B.local_cutout()[i][j] <<' ';
|
||||
std::cout<<'\n';
|
||||
}
|
||||
}
|
||||
|
||||
if(auto x = AT.local_cutout().extensions()){
|
||||
for(auto i : std::get<0>(x)){
|
||||
for(auto j : std::get<1>(x))
|
||||
std::cout<< AT.local_cutout()[i][j] <<' ';
|
||||
std::cout<<'\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
#if COMPILATION_INSTRUCTIONS
|
||||
mpic++ -I$HOME/prj/alf $0 -o $0x -lfftw3 -lfftw3_mpi&&time mpirun -n 4 $0x&&rm $0x;exit
|
||||
#endif
|
||||
|
||||
#include "../../../fftw/mpi.hpp"
|
||||
|
||||
#include<boost/mpi3/main.hpp>
|
||||
#include<boost/mpi3/environment.hpp>
|
||||
#include<boost/mpi3/ostream.hpp>
|
||||
#include "../../../fftw.hpp"
|
||||
|
||||
namespace mpi3 = boost::mpi3;
|
||||
namespace multi = boost::multi;
|
||||
|
||||
int mpi3::main(int, char*[], mpi3::communicator world){
|
||||
multi::fftw::mpi::environment fenv;
|
||||
|
||||
multi::fftw::mpi::array<std::complex<double>, 2> G({41, 321}, world);
|
||||
|
||||
if(auto x = G.local_cutout().extensions())
|
||||
for(auto i : std::get<0>(x))
|
||||
for(auto j : std::get<1>(x))
|
||||
G.local_cutout()[i][j] = std::complex<double>(i + j, i + 2*j);
|
||||
|
||||
multi::array<std::complex<double>, 2> L = G; // world replicas
|
||||
assert( L == G );
|
||||
|
||||
using multi::fftw::dft_forward;
|
||||
|
||||
dft_forward(L, L); // dft in replicas
|
||||
dft_forward(G, G);
|
||||
|
||||
if(auto x = G.local_cutout().extensions())
|
||||
for(auto i : std::get<0>(x))
|
||||
for(auto j : std::get<1>(x))
|
||||
if(not(std::abs(G.local_cutout()[i][j] - L[i][j]) < 1e-8)) std::cout<< std::abs(G.local_cutout()[i][j] - L[i][j]) << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,88 +0,0 @@
|
|||
# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*-
|
||||
#[=[Multi Test suite can be run like this:
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake .. [-DENABLE_CUDA=1]
|
||||
make -j
|
||||
ctest -j --output-on-error [-T memcheck]
|
||||
exit
|
||||
#]=]
|
||||
cmake_minimum_required(VERSION 3.11)
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
project(boost-multi-adaptors-fftw-test VERSION 0.1 LANGUAGES CXX)
|
||||
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework timer)
|
||||
|
||||
find_package(FFTW REQUIRED COMPONENTS DOUBLE_LIB)
|
||||
include_directories(${FFTW_INCLUDE_DIRS})
|
||||
link_libraries(${FFTW_LIBRARIES})
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
if(ENABLE_CUDA OR DEFINED CXXCUDA)
|
||||
enable_language(CUDA)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe \"--diag_suppress=implicit_return_from_non_void_function\"")
|
||||
endif()
|
||||
|
||||
find_package(CUDA QUIET)
|
||||
|
||||
if (CUDA_FOUND)
|
||||
message("CUDA found")
|
||||
include_directories(${CUDA_INCLUDE_DIRS})
|
||||
else()
|
||||
message("CUDA not found")
|
||||
endif()
|
||||
|
||||
enable_testing()
|
||||
list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.17
|
||||
include(CTest)
|
||||
|
||||
#configure_file("config.hpp.in" ${CMAKE_BINARY_DIR}/config.hpp)
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
#file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
|
||||
set(TEST_SRCS
|
||||
combinations.cpp
|
||||
# copy.cpp
|
||||
core.cpp
|
||||
# thrust.cpp
|
||||
transpose.cpp
|
||||
transpose_square.cpp
|
||||
)
|
||||
|
||||
foreach(TEST_FILE ${TEST_SRCS})
|
||||
SET(TEST_EXE "${TEST_FILE}.x")
|
||||
add_executable (${TEST_EXE} ${TEST_FILE})
|
||||
if(ENABLE_CUDA OR DEFINED CXXCUDA)
|
||||
set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA)
|
||||
target_compile_options (${TEST_EXE} PRIVATE -std=c++17)
|
||||
endif()
|
||||
# target_compile_features (${TEST_EXE} PUBLIC cxx_std_17)
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS")
|
||||
target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS})
|
||||
target_include_directories(${TEST_EXE} PRIVATE ${Boost_INCLUDE_DIRS})
|
||||
target_link_libraries (${TEST_EXE} PRIVATE ${Boost_LIBRARIES})
|
||||
target_link_directories (${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS})
|
||||
if(NOT ENABLE_CUDA)
|
||||
target_compile_options (${TEST_EXE} PRIVATE
|
||||
-Werror -Wall -Wextra -fno-common
|
||||
$<$<CXX_COMPILER_ID:GNU>:
|
||||
-Wpedantic -Wformat-truncation -fstack-usage>#-Wconversion
|
||||
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:
|
||||
-Wpedantic -Wmove>
|
||||
$<$<CXX_COMPILER_ID:Intel>:
|
||||
-wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings -Werror -diag-error:3846
|
||||
>
|
||||
$<$<CXX_COMPILER_ID:MSVC>:
|
||||
/W4>)
|
||||
endif()
|
||||
add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE})
|
||||
endforeach()
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
|
||||
# ==================================================================================================
|
||||
# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
|
||||
# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
|
||||
#
|
||||
# Author(s):
|
||||
# Cedric Nugteren <cedric.nugteren@surfsara.nl>
|
||||
#
|
||||
# ==================================================================================================
|
||||
#
|
||||
# Defines the following variables:
|
||||
# FFTW_FOUND Boolean holding whether or not the FFTW3 library was found
|
||||
# FFTW_INCLUDE_DIRS The FFTW3 include directory
|
||||
# FFTW_LIBRARIES The FFTW3 library
|
||||
#
|
||||
# In case FFTW3 is not installed in the default directory, set the FFTW_ROOT variable to point to
|
||||
# the root of FFTW3, such that 'fftw3.h' can be found in $FFTW_ROOT/include. This can either be done
|
||||
# using an environmental variable (e.g. export FFTW_ROOT=/path/to/fftw3) or using a CMake variable
|
||||
# (e.g. cmake -DFFTW_ROOT=/path/to/fftw3 ..).
|
||||
#
|
||||
# ==================================================================================================
|
||||
|
||||
# Sets the possible install locations
|
||||
set(FFTW_HINTS
|
||||
${FFTW_ROOT}
|
||||
$ENV{FFTW_ROOT}
|
||||
)
|
||||
set(FFTW_PATHS
|
||||
/usr
|
||||
/usr/local
|
||||
)
|
||||
|
||||
# Finds the include directories
|
||||
find_path(FFTW_INCLUDE_DIRS
|
||||
NAMES fftw3.h
|
||||
HINTS ${FFTW_HINTS}
|
||||
PATH_SUFFIXES include api inc include/x86_64 include/x64
|
||||
PATHS ${FFTW_PATHS}
|
||||
DOC "FFTW3 include header fftw3.h"
|
||||
)
|
||||
mark_as_advanced(FFTW_INCLUDE_DIRS)
|
||||
|
||||
# Finds the library
|
||||
find_library(FFTW_LIBRARIES
|
||||
NAMES fftw3
|
||||
HINTS ${FFTW_HINTS}
|
||||
PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32
|
||||
PATHS ${FFTW_PATHS}
|
||||
DOC "FFTW3 library"
|
||||
)
|
||||
mark_as_advanced(FFTW_LIBRARIES)
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
# Notification messages
|
||||
if(NOT FFTW_INCLUDE_DIRS)
|
||||
message(STATUS "Could NOT find 'fftw3.h', install FFTW3 or set FFTW_ROOT")
|
||||
endif()
|
||||
if(NOT FFTW_LIBRARIES)
|
||||
message(STATUS "Could NOT find the FFTW3 library, install it or set FFTW_ROOT")
|
||||
endif()
|
||||
|
||||
# Determines whether or not FFTW3 was found
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(FFTW DEFAULT_MSG FFTW_INCLUDE_DIRS FFTW_LIBRARIES)
|
||||
|
||||
# ==================================================================================================
|
||||
|
|
@ -1,142 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*-
|
||||
$CXX $0 -o $0x -lfftw3 -lboost_unit_test_framework -lboost_timer&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW adaptor (cpu) with thrust complex"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include <boost/timer/timer.hpp>
|
||||
|
||||
#include "../../fftw.hpp"
|
||||
|
||||
#include<complex>
|
||||
#include<chrono>
|
||||
|
||||
#include<thrust/complex.h>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
namespace utf = boost::unit_test::framework;
|
||||
|
||||
struct watch : private std::chrono::high_resolution_clock{
|
||||
std::string label_; time_point start_;
|
||||
watch(std::string label ="") : label_{label}, start_{now()}{}
|
||||
~watch(){
|
||||
std::cerr<< label_<<": "<< std::chrono::duration<double>(now() - start_).count() <<" sec"<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fft_combinations, *boost::unit_test::tolerance(0.00001)){
|
||||
|
||||
using complex = std::complex<double>;
|
||||
|
||||
auto const in = []{
|
||||
multi::array<complex, 4> ret({32, 90, 98, 96});
|
||||
std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
[](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};}
|
||||
);
|
||||
return ret;
|
||||
}();
|
||||
std::cout<<"memory size "<< in.num_elements()*sizeof(complex)/1e6 <<" MB\n";
|
||||
|
||||
std::vector<std::array<bool, 4>> cases = {
|
||||
{false, true , true , true },
|
||||
{false, true , true , false},
|
||||
{true , false, false, false},
|
||||
{true , true , false, false},
|
||||
{false, false, true , false},
|
||||
{false, false, false, false},
|
||||
};
|
||||
|
||||
using std::cout;
|
||||
for(auto c : cases){
|
||||
cout<<"case "; copy(begin(c), end(c), std::ostream_iterator<bool>{cout,", "}); cout<<"\n";
|
||||
multi::array<complex, 4> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cpu_oplac %ws wall, CPU (%p%)\n"};
|
||||
multi::fftw::dft_forward(c, in, out);
|
||||
}
|
||||
{
|
||||
multi::fftw::plan p(c, in, out, multi::fftw::forward);
|
||||
boost::timer::auto_cpu_timer t{"cpu_oplac planned %ws wall, CPU (%p%)\n"};
|
||||
p();
|
||||
}
|
||||
{
|
||||
auto in_rw = in;
|
||||
boost::timer::auto_cpu_timer t{"cpu_iplac %ws wall, CPU (%p%)\n"};
|
||||
multi::fftw::dft_forward(c, in_rw);
|
||||
// BOOST_TEST( abs( in_rw[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}
|
||||
{
|
||||
auto in_rw = in;
|
||||
multi::fftw::plan p(c, in_rw, in_rw, multi::fftw::forward);
|
||||
boost::timer::auto_cpu_timer t{"cpu_iplac planned %ws wall, CPU (%p%)\n"};
|
||||
p();
|
||||
// BOOST_TEST( abs( in_rw[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}
|
||||
{
|
||||
auto in_rw = in;
|
||||
multi::fftw::plan p(c, in_rw, in_rw, multi::fftw::forward);// | FFTW_MEASURE);
|
||||
boost::timer::auto_cpu_timer t{"cpu_iplac planned measured %ws wall, CPU (%p%)\n"};
|
||||
p();
|
||||
// BOOST_TEST( abs( in_rw[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"cpu_alloc %ws wall, CPU (%p%)\n"};
|
||||
auto out_cpy = multi::fftw::dft_forward(c, in);
|
||||
BOOST_TEST( abs( out_cpy[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}
|
||||
{
|
||||
auto in_rw = in;
|
||||
boost::timer::auto_cpu_timer t{"cpu_move %ws wall, CPU (%p%)\n"};
|
||||
auto out_cpy = multi::fftw::dft_forward(c, std::move(in_rw));
|
||||
BOOST_REQUIRE( in_rw.empty() );
|
||||
BOOST_TEST( abs( out_cpy[5][4][3][1] - out[5][4][3][1] ) == 0. );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_4D_power_benchmark, *boost::unit_test::disabled() ){
|
||||
using complex = std::complex<double>;
|
||||
namespace fftw = multi::fftw;
|
||||
|
||||
auto x = multi::array<complex, 4>::extensions_type({64, 128, 128, 128});
|
||||
multi::array<complex, 4> in(x);
|
||||
std::iota(in.data_elements(), in.data_elements() + in.num_elements(), 1.2);
|
||||
|
||||
BOOST_REQUIRE( in[0][0][0][0] == 1.2 );
|
||||
std::array<bool, 4> c = {false, true, true, true};
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" inplace FTTT"}]{
|
||||
fftw::dft(c, in, fftw::forward);
|
||||
}();
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" inplace FTTT"}]{
|
||||
fftw::dft(c, in, fftw::forward);
|
||||
}();
|
||||
auto in0000 = in[0][0][0][0];
|
||||
BOOST_REQUIRE( in0000 != 1.2 );
|
||||
|
||||
|
||||
multi::array<complex, 4> out(x);
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" outofplace FTTT"}]{
|
||||
fftw::dft(c, in, out, fftw::forward);
|
||||
}();
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" outofplace FTTT"}]{
|
||||
fftw::dft(c, in, out, fftw::forward);
|
||||
}();
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" outofplace FTTT"}]{
|
||||
fftw::dft(c, in, out, fftw::forward);
|
||||
}();
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" outofplace+alloc FTTT"}]{
|
||||
multi::array<complex, 4> out2(x);
|
||||
fftw::dft(c, in, out2, fftw::forward);
|
||||
}();
|
||||
[&, _ = watch{utf::current_test_case().full_name()+" outofplace+alloc FTTT"}]{
|
||||
multi::array<complex, 4> out2(x);
|
||||
fftw::dft(c, in, out2, fftw::forward);
|
||||
}();
|
||||
BOOST_REQUIRE( in0000 == in[0][0][0][0] );
|
||||
|
||||
}
|
||||
|
|
@ -1,153 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*-
|
||||
$CXXX $CXXFLAGS -O3 $0 -o $0x -DHAVE_FFTW3_THREADS -lfftw3 -lfftw3_threads -lboost_unit_test_framework -lboost_timer&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW copy"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
#include<boost/timer/timer.hpp>
|
||||
|
||||
#include "../../fftw.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_copy){
|
||||
|
||||
using complex = std::complex<double>;
|
||||
auto const in = []{
|
||||
multi::array<complex, 4> ret({96, 96, 96, 96});
|
||||
std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
[](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};}
|
||||
);
|
||||
return ret;
|
||||
}();
|
||||
std::cout<<"memory size "<< in.num_elements()*sizeof(complex)/1e6 <<" MB\n";
|
||||
{
|
||||
multi::array<complex, 4> out(extensions(in), 0.);
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw_copy in-inorder %ws wall, CPU (%p%)\n"};
|
||||
multi::fftw::copy(in, rotated(out));
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
BOOST_REQUIRE( rotated(out) == in );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out(extensions(in), 0.);
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw_copy out-inorder %ws wall, CPU (%p%)\n"};
|
||||
multi::fftw::copy(unrotated(in), out);
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
BOOST_REQUIRE( rotated(out) == in );
|
||||
BOOST_REQUIRE( out == unrotated(in) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out(extensions(in), 0.);
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"assignment in-inorder %ws wall, CPU (%p%)\n"};
|
||||
rotated(out) = in;
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out(extensions(in), 0.);
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"assignment out-inorder %ws wall, CPU (%p%)\n"};
|
||||
out = unrotated(in);
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"assignment inplace out-inorder %ws wall, CPU (%p%)\n"};
|
||||
out = unrotated(out);
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
BOOST_REQUIRE( out == unrotated(in) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"assignment inplace in-inorder %ws wall, CPU (%p%)\n"};
|
||||
rotated(out) = out;
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
// BOOST_REQUIRE( rotated(out) == in );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"assignment inplace with copy out-inorder %ws wall, CPU (%p%)\n"};
|
||||
out = unrotated(multi::array<complex, 4>{out});
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
BOOST_REQUIRE( out == unrotated(in) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"assignment inplace with copy in-inorder %ws wall, CPU (%p%)\n"};
|
||||
rotated(out) = multi::array<complex, 4>{out};
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
BOOST_REQUIRE( out == unrotated(in) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw copy inplace in-inorder %ws wall, CPU (%p%)\n"};
|
||||
multi::fftw::copy(out, rotated(out));
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
BOOST_REQUIRE( out == unrotated(in) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw copy inplace out-inorder %ws wall, CPU (%p%)\n"};
|
||||
multi::fftw::copy(unrotated(out), out);
|
||||
}
|
||||
BOOST_REQUIRE( out[1][2][3][4] == in[2][3][4][1] );
|
||||
BOOST_REQUIRE( out == unrotated(in) );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw move construct inplace in-inorder %ws wall, CPU (%p%)\n"};
|
||||
multi::array<complex, 4> out2 = multi::fftw::copy( out.move().unrotated() );
|
||||
BOOST_REQUIRE( out.empty() );
|
||||
BOOST_REQUIRE( p == out2.data_elements() );
|
||||
BOOST_TEST( out2[1][2][3][4].real() == in[2][3][4][1].real() );
|
||||
}
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out = in;
|
||||
auto p = out.data_elements();
|
||||
multi::array<complex, 4> out2;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw move assign inplace in-inorder %ws wall, CPU (%p%)\n"};
|
||||
out2 = multi::fftw::copy( out.move().unrotated() );
|
||||
BOOST_REQUIRE( out.empty() );
|
||||
BOOST_REQUIRE( p == out2.data_elements() );
|
||||
BOOST_TEST( out2[1][2][3][4].real() == in[2][3][4][1].real() );
|
||||
}
|
||||
}
|
||||
{
|
||||
multi::array<complex, 4> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw move self-assign inplace in-inorder %ws wall, CPU (%p%)\n"};
|
||||
out = multi::fftw::copy( out.move().unrotated() );
|
||||
BOOST_REQUIRE( p == out.data_elements() );
|
||||
BOOST_TEST( out[1][2][3][4].real() == in[2][3][4][1].real() );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,374 +0,0 @@
|
|||
// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*-
|
||||
// © Alfredo A. Correa 2020-2021
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW transpose"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../../array.hpp"
|
||||
#include "../../../adaptors/../complex.hpp"
|
||||
|
||||
#include "../../../adaptors/fftw.hpp"
|
||||
|
||||
#include<chrono>
|
||||
#include<random>
|
||||
|
||||
//#include<thrust/complex.h> // TODO make lib work with thrust complex
|
||||
|
||||
namespace{
|
||||
|
||||
namespace multi = boost::multi;
|
||||
namespace fftw = multi::fftw;
|
||||
|
||||
using complex = std::complex<double>; MAYBE_UNUSED complex const I{0, 1};
|
||||
|
||||
template<class M> auto power(M const& m)->decltype(std::norm(m)){return std::norm(m);}
|
||||
|
||||
template<class M, DELETE((M::rank_v < 1))> double power(M const& m){return accumulate(begin(m), end(m), 0., [](auto const& a, auto const& b){return a + power(b);});}
|
||||
|
||||
struct sum_power{
|
||||
template<class A, class B> auto operator()(A const& a, B const& b) const{return a+power(b);}
|
||||
};
|
||||
|
||||
MAYBE_UNUSED constexpr int N = 16;
|
||||
}
|
||||
|
||||
struct watch : private std::chrono::high_resolution_clock{
|
||||
std::string label_; time_point start_;
|
||||
watch(std::string label ="") : label_{label}, start_{now()}{}
|
||||
~watch(){
|
||||
std::cerr<< label_<<": "<< std::chrono::duration<double>(now() - start_).count() <<" sec"<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<class T> struct randomizer{
|
||||
template<class M> void operator()(M&& m) const{for(auto&& e:m) operator()(e);}
|
||||
void operator()(T& e) const{
|
||||
static std::random_device r; static std::mt19937 g{r()}; static std::normal_distribution<T> d;
|
||||
e = d(g);
|
||||
}
|
||||
};
|
||||
|
||||
template<class T> struct randomizer<std::complex<T>>{
|
||||
template<class M> void operator()(M&& m) const{for(auto&& e:m) operator()(e);}
|
||||
void operator()(std::complex<T>& e) const{
|
||||
static std::random_device r; static std::mt19937 g{r()}; static std::normal_distribution<T> d;
|
||||
e = std::complex<T>(d(g), d(g));
|
||||
}
|
||||
};
|
||||
|
||||
struct fftw_fixture : fftw::environment{
|
||||
void setup(){}
|
||||
void teardown(){}//fftw_cleanup();}
|
||||
};
|
||||
|
||||
BOOST_TEST_GLOBAL_FIXTURE( fftw_fixture );
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_3D){
|
||||
using complex = std::complex<double>; //TODO make it work with thrust
|
||||
multi::array<complex, 3> in({10, 10, 10});
|
||||
in[2][3][4] = 99.;
|
||||
auto fwd = multi::fftw::dft(in, fftw::forward);
|
||||
BOOST_REQUIRE(in[2][3][4] == 99.);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_1D_const){
|
||||
multi::array<complex, 1> const in = {1. + 2.*I, 2. + 3. *I, 4. + 5.*I, 5. + 6.*I};
|
||||
|
||||
auto fwd = multi::fftw::dft(in, fftw::forward); // Fourier[in, FourierParameters -> {1, -1}]
|
||||
BOOST_REQUIRE( size(fwd) == size(in) );
|
||||
BOOST_REQUIRE( fwd[2] == -2. - 2.*I );
|
||||
BOOST_REQUIRE( in[1] == +2. + 3.*I );
|
||||
|
||||
auto bwd = multi::fftw::dft(in, fftw::forward); // InverseFourier[in, FourierParameters -> {-1, -1}]
|
||||
BOOST_REQUIRE( bwd[2] == -2. - 2.*I );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_identity_2, *boost::unit_test::tolerance(0.0001)){
|
||||
multi::array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
multi::array<complex, 2> out(extensions(in));
|
||||
multi::fftw::dft({false, false}, in, out, fftw::forward); // out = in;
|
||||
BOOST_REQUIRE( power(in) == power(out) );
|
||||
BOOST_REQUIRE( out == in );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_identity, *boost::unit_test::tolerance(0.0001)){
|
||||
multi::array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
auto fwd = multi::fftw::dft({}, in, fftw::forward);
|
||||
BOOST_REQUIRE( fwd == in );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D, *boost::unit_test::tolerance(0.0001)){
|
||||
multi::array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
|
||||
namespace fftw = multi::fftw;
|
||||
auto fwd = fftw::dft_forward(in);
|
||||
BOOST_TEST_REQUIRE( fwd[3][1].real() == -19.0455 ); // Fourier[in, FourierParameters -> {1, -1}][[4]][[2]]
|
||||
BOOST_TEST_REQUIRE( fwd[3][1].imag() == - 2.22717 );
|
||||
|
||||
multi::array<complex, 1> const in0 = {1. + 2.*I, 9. - 1.*I, 2. + 4.*I};
|
||||
|
||||
auto b = multi::fftw::dft_forward(in0);
|
||||
auto a = multi::fftw::dft_forward(in[0]);
|
||||
BOOST_REQUIRE( fftw::dft_forward(in[0]) == fftw::dft_forward(in0) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_rotated, *boost::unit_test::tolerance(0.0001)){
|
||||
using multi::array;
|
||||
array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
using multi::fftw::dft_forward;
|
||||
auto fwd = dft_forward(in);
|
||||
BOOST_REQUIRE(
|
||||
dft_forward(rotated(in)[0])
|
||||
== dft_forward(array<complex, 1>{1.+2.*I, 3.+3.*I, 4. + 1.*I, 3. - 1.*I, 31. - 1.*I})
|
||||
);
|
||||
BOOST_REQUIRE( dft_forward(rotated(in)) == rotated(fwd) );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_many, *boost::unit_test::tolerance(0.0001)){
|
||||
multi::array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
multi::array<complex, 2> out(extensions(in));
|
||||
|
||||
using multi::fftw::dft_forward;
|
||||
|
||||
multi::fftw::dft({fftw::none, fftw::forward}, in, out);
|
||||
BOOST_REQUIRE( dft_forward(in[0]) == out[0] );
|
||||
|
||||
multi::fftw::dft({false, true}, rotated(in), rotated(out), fftw::forward);
|
||||
BOOST_REQUIRE( dft_forward(rotated(in)[0]) == rotated(out)[0] );
|
||||
|
||||
multi::fftw::dft_forward({false, false}, rotated(in), rotated(out));
|
||||
BOOST_REQUIRE( in == out );
|
||||
|
||||
multi::fftw::many_dft(in.begin(), in.end(), out.begin(), fftw::forward);
|
||||
BOOST_REQUIRE( dft_forward(in[0]) == out[0] );
|
||||
}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_many1_from_2){
|
||||
// multi::array<complex, 2> in({3, 10}); randomizer<complex>{}(in);
|
||||
// multi::array<complex, 2> out({3, 10});
|
||||
// fftw::dft({false, true}, in, out, fftw::forward);
|
||||
|
||||
// multi::array<complex, 2> out2({3, 10});
|
||||
// for(int i = 0; i!=size(in); ++i)
|
||||
// fftw::dft(in[i], out2[i], fftw::forward);
|
||||
|
||||
// BOOST_REQUIRE(out2 == out);
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_many2_from_3){
|
||||
// multi::array<complex, 3> in({3, 5, 6}); randomizer<complex>{}(in);
|
||||
// multi::array<complex, 3> out({3, 5, 6});
|
||||
// fftw::dft({false, true, true}, in, out, FFTW_FORWARD);
|
||||
|
||||
// multi::array<complex, 3> out2({3, 5, 6});
|
||||
// for(int i = 0; i!=size(in); ++i)
|
||||
// fftw::dft(in[i], out2[i], FFTW_FORWARD);
|
||||
|
||||
// BOOST_REQUIRE(out2 == out);
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_many2_from_2){
|
||||
// multi::array<complex, 2> in({5, 6}); randomizer<complex>{}(in);
|
||||
// multi::array<complex, 2> out({5, 6});
|
||||
// fftw::dft({true, true}, in, out, FFTW_FORWARD);
|
||||
|
||||
// multi::array<complex, 2> out2({5, 6});
|
||||
// fftw::dft(in, out2, FFTW_FORWARD);
|
||||
// BOOST_REQUIRE(out2 == out);
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_4D){
|
||||
// multi::array<complex, 4> const in = []{
|
||||
// multi::array<complex, 4> in({10, 10, 10, 10}); in[2][3][4][5] = 99.; return in;
|
||||
// }();
|
||||
// auto fwd = multi::fftw::dft({true, true, true, true}, in, fftw::forward);
|
||||
// BOOST_REQUIRE(in[2][3][4][5] == 99.);
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_4D_many){
|
||||
|
||||
// auto const in = []{
|
||||
// multi::array<complex, 4> in({97, 95, 101, 10}, 0.);
|
||||
// in[2][3][4][5] = 99.; return in;
|
||||
// }();
|
||||
// auto fwd = multi::fftw::dft({true, true, true, false}, in, fftw::forward);
|
||||
// BOOST_REQUIRE( in[2][3][4][5] == 99. );
|
||||
|
||||
// multi::array<complex, 4> out(extensions(in));
|
||||
// multi::fftw::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), fftw::forward);
|
||||
// BOOST_REQUIRE( out == fwd );
|
||||
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(cufft_many_2D){
|
||||
// auto const in = []{
|
||||
// multi::array<complex, 3> ret({10, 10, 10});
|
||||
// std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
// [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};}
|
||||
// );
|
||||
// return ret;
|
||||
// }();
|
||||
// multi::array<complex, 3> out(extensions(in));
|
||||
// multi::fftw::many_dft((in<<1).begin(), (in<<1).end(), (out<<1).begin(), multi::fftw::forward);
|
||||
|
||||
// multi::array<complex, 3> out2(extensions(in));
|
||||
// multi::fftw::dft({true, false, true}, in, out2, multi::fftw::forward);
|
||||
// BOOST_REQUIRE( out == out2 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_5D){
|
||||
// multi::array<complex, 5> in({4, 5, 6, 7, 8});
|
||||
// in[2][3][4][5][6] = 99.;
|
||||
// auto fwd = multi::fftw::dft(in, fftw::forward);
|
||||
// BOOST_REQUIRE(in[2][3][4][5][6] == 99.);
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_1D_power){
|
||||
// multi::array<complex, 1> in(N, 0.); assert( size(in) == N );
|
||||
// std::iota(begin(in), end(in), 1.);
|
||||
// multi::array<complex, 1> out(extensions(in));
|
||||
// static_assert( in.dimensionality() == out.dimensionality(), "!");
|
||||
// auto p = multi::fftw_plan_dft(in, out, fftw::forward, FFTW_PRESERVE_INPUT);
|
||||
// fftw_execute(p);
|
||||
// fftw_destroy_plan(p);
|
||||
// BOOST_REQUIRE( (power(in) - power(out)/num_elements(out)) < 1e-17 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_2D_power){
|
||||
// multi::array<complex, 2> in({N, N});
|
||||
// std::iota(in.data_elements(), in.data_elements() + in.num_elements(), 1.2);
|
||||
// multi::array<complex, 2> out(in.extensions());
|
||||
// auto p = multi::fftw_plan_dft(in, out, fftw::forward, FFTW_PRESERVE_INPUT);
|
||||
// fftw_execute(p); fftw_destroy_plan(p);
|
||||
// BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-12 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_2D_power_plan){
|
||||
// multi::array<complex, 2> in({16, 16});
|
||||
// std::iota(in.data_elements(), in.data_elements() + in.num_elements(), 1.2);
|
||||
// multi::array<complex, 2> out(in.extensions());
|
||||
// multi::fftw::plan const p{in, out, fftw::forward, FFTW_PRESERVE_INPUT};
|
||||
// p(); //execute(p); //p.execute();
|
||||
// BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_2D_power_dft){
|
||||
// multi::array<complex, 2> in({16, 16});
|
||||
// std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2);
|
||||
// multi::array<complex, 2> out(extensions(in));
|
||||
// multi::fftw::dft(in, out, fftw::forward);
|
||||
// BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_2D_power_dft_out){
|
||||
// multi::array<complex, 2> in({16, 16}); std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2);
|
||||
// auto out = multi::fftw::dft(in, fftw::forward);
|
||||
// BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_2D_power_dft_out_default){
|
||||
// multi::array<complex, 2> in({16, 16}); std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2);
|
||||
// auto out = multi::fftw::dft(in, fftw::forward);
|
||||
// BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_3D_power){
|
||||
// multi::array<complex, 3> in({4, 4, 4}); std::iota(in.data_elements(), in.data_elements() + in.num_elements(), 1.2);
|
||||
// multi::array<complex, 3> out = fftw::dft(in, fftw::forward);
|
||||
// BOOST_REQUIRE( std::abs(power(in) - power(out)/num_elements(out)) < 1e-10 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_3D_power_in_place){
|
||||
// multi::array<complex, 3> io({4, 4, 4}); std::iota(io.data_elements(), io.data_elements() + io.num_elements(), 1.2);
|
||||
// auto powerin = power(io);
|
||||
// fftw::dft_inplace(io, fftw::forward);
|
||||
// BOOST_REQUIRE( powerin - power(io)/num_elements(io) < 1e-10 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_3D_power_in_place_over_ref_inplace){
|
||||
// multi::array<complex, 3> io({4, 4, 4}); std::iota(io.data_elements(), io.data_elements() + io.num_elements(), 1.2);
|
||||
// auto powerin = power(io);
|
||||
//// fftw::dft_inplace(multi::array_ref<complex, 3>(io.data(), io.extensions()), fftw::forward);
|
||||
//// fftw::dft_inplace(multi::array_ref<complex, 3>(data_elements(io), extensions(io)), fftw::forward);
|
||||
// fftw::dft_inplace(io(), fftw::forward);
|
||||
// BOOST_REQUIRE( powerin - power(io)/num_elements(io) < 1e-10 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_3D_power_out_of_place_over_ref){
|
||||
// multi::array<complex, 3> in({4, 4, 4});
|
||||
// std::iota(in.data_elements(), in.data_elements()+in.num_elements(), 1.2);
|
||||
// multi::array<complex, 3> out({4, 4, 4});
|
||||
// out() = fftw::dft(in.protect(), fftw::forward);
|
||||
// BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-10 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_3D_power_out_of_place_over_temporary){
|
||||
// double powerin;
|
||||
// auto f = [&](){
|
||||
// multi::array<complex, 3> in({4, 4, 4});
|
||||
// std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2);
|
||||
// powerin = power(in);
|
||||
// return in;
|
||||
// };
|
||||
// auto out = fftw::dft(f(), fftw::forward);
|
||||
// BOOST_REQUIRE( std::abs(powerin - power(out)/num_elements(out)) < 1e-10 );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_2D_transposition_square_inplace){
|
||||
// multi::array<complex, 2> in = {
|
||||
// {11., 12.},
|
||||
// {21., 22.}
|
||||
// };
|
||||
// BOOST_REQUIRE( in[1][0] == 21. );
|
||||
|
||||
// multi::fftw::copy(in, rotated(in));
|
||||
// BOOST_TEST( in[0][1].real() == 21. );
|
||||
// BOOST_TEST( in[0][1].imag() == 0. );
|
||||
//}
|
||||
|
||||
//BOOST_AUTO_TEST_CASE(fftw_4D_inq_poisson){
|
||||
|
||||
// multi::array<complex, 4> const in = []{
|
||||
// multi::array<complex, 4> in({50, 100, 137, 1});
|
||||
// std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2);
|
||||
// return in;
|
||||
// }();
|
||||
//
|
||||
// multi::array<complex, 4> out(extensions(in));
|
||||
// multi::fftw::dft({0, 1, 1, 0}, in, out);
|
||||
|
||||
// BOOST_TEST( power(in) == power(out)/std::get<1>(sizes(out))/std::get<2>(sizes(out)) , boost::test_tools::tolerance(1e-10) );
|
||||
|
||||
//}
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*-
|
||||
$CXX $0 -o $0x -lfftw3 -lboost_unit_test_framework -ftemplate-backtrace-limit=0&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW adaptor (cpu) with thrust complex"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
|
||||
#include "../../fftw.hpp"
|
||||
|
||||
#include<complex>
|
||||
#include <thrust/complex.h>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_2D_identity){
|
||||
|
||||
using complex = thrust::complex<double>; complex const I{0, 1};
|
||||
|
||||
multi::array<complex, 2> const in = {
|
||||
{ 1. + 2.*I, 9. - 1.*I, 2. + 4.*I},
|
||||
{ 3. + 3.*I, 7. - 4.*I, 1. + 9.*I},
|
||||
{ 4. + 1.*I, 5. + 3.*I, 2. + 4.*I},
|
||||
{ 3. - 1.*I, 8. + 7.*I, 2. + 1.*I},
|
||||
{ 31. - 1.*I, 18. + 7.*I, 2. + 10.*I}
|
||||
};
|
||||
auto fwd = multi::fftw::dft({true, true}, in, multi::fftw::forward);
|
||||
|
||||
multi::array<thrust::complex<double>, 2> const in_t = in;
|
||||
|
||||
auto fwd_t = multi::fftw::dft({true, true}, in_t, multi::fftw::forward);
|
||||
|
||||
BOOST_REQUIRE( fwd == fwd_t );
|
||||
|
||||
}
|
||||
|
|
@ -1,120 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*-
|
||||
$CXXX $CXXFLAGS -O3 $0 -o $0x -DHAVE_FFTW3_THREADS -lfftw3 -lfftw3_threads -lboost_unit_test_framework -lboost_timer&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW transpose"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
#include<boost/timer/timer.hpp>
|
||||
|
||||
#include "../../fftw.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_transpose){
|
||||
|
||||
// multi::fftw::initialize_threads();
|
||||
multi::fftw::plan::with_nthreads(1);
|
||||
|
||||
using complex = std::complex<double>;
|
||||
|
||||
|
||||
{
|
||||
auto const in = []{
|
||||
multi::array<complex, 2> ret({10137, 9973});
|
||||
std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
[](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};}
|
||||
);
|
||||
std::cout<<"memory size "<< ret.num_elements()*sizeof(complex)/1e6 <<" MB\n";
|
||||
return ret;
|
||||
}();
|
||||
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"transposition with aux %ws wall, CPU (%p%)\n"};
|
||||
multi::array<complex, 2> aux = ~out;
|
||||
out = std::move(aux);
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw trans mve 1 thread %ws wall, CPU (%p%)\n"};
|
||||
multi::array<complex, 2> out2 = multi::fftw::copy( ~move(out) );
|
||||
BOOST_REQUIRE( out2.data_elements() == p );
|
||||
BOOST_REQUIRE( out2[35][79] == in[79][35] );
|
||||
}
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw transpose fun thread %ws wall, CPU (%p%)\n"};
|
||||
multi::fftw::transpose( out );
|
||||
BOOST_REQUIRE( out.data_elements() == p );
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
BOOST_REQUIRE( out == ~in );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw transpose 1 thread %ws wall, CPU (%p%)\n"};
|
||||
out = multi::fftw::copy( ~move(out) );
|
||||
BOOST_REQUIRE( out.data_elements() == p );
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
}
|
||||
multi::fftw::plan::with_nthreads(2);
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw trans mve 2 thread %ws wall, CPU (%p%)\n"};
|
||||
multi::array<complex, 2> out2 = multi::fftw::copy( ~move(out) );
|
||||
BOOST_REQUIRE( out2.data_elements() == p );
|
||||
BOOST_REQUIRE( out2[35][79] == in[79][35] );
|
||||
}
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw transpose 2 threads %ws wall, CPU (%p%)\n"};
|
||||
out = multi::fftw::copy( ~move(out) );
|
||||
BOOST_REQUIRE( out.data_elements() == p );
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
}
|
||||
multi::fftw::plan::with_nthreads(3);
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw transpose 3 threads %ws wall, CPU (%p%)\n"};
|
||||
out = multi::fftw::copy( ~move(out) );
|
||||
BOOST_REQUIRE( out.data_elements() == p );
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
}
|
||||
multi::fftw::plan::with_nthreads(4);
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw transpose 4 threads %ws wall, CPU (%p%)\n"};
|
||||
out = multi::fftw::copy( ~move(out) );
|
||||
BOOST_REQUIRE( out.data_elements() == p );
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,114 +0,0 @@
|
|||
#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*-
|
||||
$CXXX $CXXFLAGS -Ofast $0 -o $0x -DHAVE_FFTW3_THREADS -lfftw3 -lfftw3_threads -lboost_unit_test_framework -lboost_timer&&$0x&&rm $0x;exit
|
||||
#endif
|
||||
// © Alfredo A. Correa 2020
|
||||
|
||||
#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW transpose"
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#include<boost/test/unit_test.hpp>
|
||||
#include<boost/timer/timer.hpp>
|
||||
|
||||
#include "../../fftw.hpp"
|
||||
|
||||
#include<complex>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
|
||||
using complex = std::complex<double>; complex const I{0, 1};
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fftw_transpose){
|
||||
|
||||
multi::fftw::initialize_threads();
|
||||
|
||||
{
|
||||
auto const in = []{
|
||||
multi::array<complex, 2> ret({8192, 8192});
|
||||
std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(),
|
||||
[](){return std::rand()*1./RAND_MAX + std::rand()*1./RAND_MAX*I;}
|
||||
);
|
||||
std::cout<<"memory size "<< ret.num_elements()*sizeof(complex)/1e6 <<" MB\n";
|
||||
return ret;
|
||||
}();
|
||||
multi::fftw::plan::with_nthreads(1);
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw trans mve 1 thread %ws wall, CPU (%p%)\n"};
|
||||
multi::fftw::transpose( out );
|
||||
BOOST_REQUIRE( out.data_elements() == p );
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
BOOST_REQUIRE( out == ~in );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw trans mve 1 thread %ws wall, CPU (%p%)\n"};
|
||||
out = multi::fftw::copy( transposed( move(out) ) );
|
||||
BOOST_REQUIRE( out.data_elements() == p );
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
BOOST_REQUIRE( out == ~in );
|
||||
}
|
||||
multi::fftw::plan::with_nthreads(2);
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw trans mve 2 thread %ws wall, CPU (%p%)\n"};
|
||||
out = multi::fftw::copy( ~move(out) );
|
||||
BOOST_REQUIRE( out.data_elements() == p );
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
BOOST_REQUIRE( out == ~in );
|
||||
}
|
||||
multi::fftw::plan::with_nthreads(4);
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
auto p = out.data_elements();
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"fftw trans mve 4 thread %ws wall, CPU (%p%)\n"};
|
||||
out = multi::fftw::copy( ~move(out) );
|
||||
BOOST_REQUIRE( out.data_elements() == p );
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
BOOST_REQUIRE( out == ~in );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
multi::array<complex, 2> aux(extensions(out));
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"auxiliary copy %ws wall, CPU (%p%)\n"};
|
||||
aux = ~out;
|
||||
out = std::move(aux);
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
BOOST_REQUIRE( out == ~in );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"transposition with loop %ws wall, CPU (%p%)\n"};
|
||||
for(auto i: extension(out))
|
||||
for(auto j = 0l; j != i; ++j)
|
||||
std::swap(out[i][j], out[j][i]);
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
BOOST_REQUIRE( out == ~in );
|
||||
}
|
||||
{
|
||||
multi::array<complex, 2> out = in;
|
||||
{
|
||||
boost::timer::auto_cpu_timer t{"transposition with loop 2 %ws wall, CPU (%p%)\n"};
|
||||
for(auto i = 0l; i != out.size(); ++i)
|
||||
for(auto j = i + 1; j != out.size(); ++j)
|
||||
std::swap(out[i][j], out[j][i]);
|
||||
BOOST_REQUIRE( out[35][79] == in[79][35] );
|
||||
}
|
||||
BOOST_REQUIRE( out == ~in );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "lapack/getrf.hpp"
|
||||
|
|
@ -1,228 +0,0 @@
|
|||
#ifdef COMPILATION_INSTRUCTIONS
|
||||
(echo "#include\""$0"\"" > $0x.cpp) && clang++ `#-DNDEBUG` -O3 -std=c++14 -Wall -Wextra -Wpedantic -Wfatal-errors -D_TEST_MULTI_ADAPTORS_LAPACK_CORE -DADD_ $0x.cpp -o $0x.x -lblas -llapack && time $0x.x $@ && rm -f $0x.x $0x.cpp; exit
|
||||
#endif
|
||||
// Alfredo A. Correa 2019 ©
|
||||
|
||||
#ifndef MULTI_ADAPTORS_LAPACK_CORE_HPP
|
||||
#define MULTI_ADAPTORS_LAPACK_CORE_HPP
|
||||
|
||||
//#include<iostream>
|
||||
#include<cassert>
|
||||
#include<complex>
|
||||
|
||||
//#include <cblas/cblas.h>
|
||||
#include<lapacke.h>
|
||||
|
||||
#define s float
|
||||
#define d double
|
||||
#define c std::complex<s>
|
||||
#define z std::complex<d>
|
||||
#define v void
|
||||
|
||||
#define INT int
|
||||
#define INTEGER INT const&
|
||||
|
||||
//#define N INTEGER n
|
||||
#define CHARACTER char const&
|
||||
#define UPLO CHARACTER
|
||||
#define JOBZ CHARACTER
|
||||
#define LAPACK(NamE) NamE##_
|
||||
#define LWORK INTEGER lwork
|
||||
#define LIWORK INTEGER liwork
|
||||
#define IWORK int*
|
||||
|
||||
#define xPOTRF(T) v LAPACK(T##potrf)(UPLO, int const& N, T*, int const& LDA, int& INFO)
|
||||
#define xSYEV(T) v LAPACK(T##syev) (JOBZ, UPLO, int const& N, T*, int const& LDA, T*, T*, LWORK, int& INFO)
|
||||
#define xSYEVD(T) v LAPACK(T##syevd)(JOBZ, UPLO, int const& N, T*, int const& LDA, T*, T*, LWORK, IWORK, LIWORK, int& INFO)
|
||||
#define xHEEV(T) v LAPACK(T##heev) (JOBZ, UPLO, int const& N, T*, int const& LDA, T*, T*, LWORK, int& INFO)
|
||||
|
||||
#define subroutine void
|
||||
#define integer int const&
|
||||
#define integer_out int&
|
||||
#define integer_ptr int*
|
||||
#define integer_cptr int const*
|
||||
#define character char const&
|
||||
|
||||
// http://www.netlib.org/lapack/explore-html/dd/d9a/group__double_g_ecomputational_ga0019443faea08275ca60a734d0593e60.html
|
||||
#define xGETRF(T) \
|
||||
subroutine T##getrf_( \
|
||||
integer M, /*The number of rows of the matrix A. M >= 0.*/ \
|
||||
integer N, /*The number of columns of the matrix A. N >= 0.*/ \
|
||||
T* A, /*On entry, the M-by-N matrix to be factored.*/ \
|
||||
/*On exit, the factors L and U from the factorization*/ \
|
||||
integer LDA, /*The leading dimension of the array A. LDA >= max(1,M).*/\
|
||||
integer_ptr IPIV, /*The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).*/\
|
||||
integer_out INFO /*= 0: successful exit*/\
|
||||
/*< 0: if INFO = -i, the i-th argument had an illegal value*/\
|
||||
/*> 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.*/\
|
||||
)
|
||||
|
||||
// http://www.netlib.org/lapack/explore-html/d8/ddc/group__real_g_ecomputational_gaa00bcf4d83a118cb6f0b6619d6ffaa24.html
|
||||
#define xGETRS(T) \
|
||||
subroutine T##getrs_( \
|
||||
character TRANS,/*Specifies the form of the system of equations: */\
|
||||
/* = 'N': A * X = B (No transpose) */\
|
||||
/* = 'T': A**T* X = B (Transpose) */\
|
||||
/* = 'C': A**T* X = B (Conjugate transpose = Transpose) */\
|
||||
integer N, /*The order of the matrix A. N >= 0. */\
|
||||
integer NRHS, /*The number of right hand sides, i.e., the number of columns*/\
|
||||
/*of the matrix B. NRHS >= 0. */\
|
||||
T const* A, /* The factors L and U from the factorization A = P*L*U */\
|
||||
/*as computed by SGETRF. */\
|
||||
integer LDA, /*The leading dimension of the array A. LDA >= max(1,N). */\
|
||||
integer_cptr IPIV, /*The pivot indices from SGETRF; for 1<=i<=N, row i of the */\
|
||||
/*matrix was interchanged with row IPIV(i). */\
|
||||
T* B, /*On entry, the right hand side matrix B. */\
|
||||
/*On exit, the solution matrix X. */\
|
||||
integer LDB, /*The leading dimension of the array B. LDB >= max(1,N). */\
|
||||
integer INFO /*= 0: successful exit */\
|
||||
/*< 0: if INFO = -i, the i-th argument had an illegal value */\
|
||||
)
|
||||
|
||||
// TODO // http://www.netlib.org/lapack/explore-html/d7/d3b/group__double_g_esolve_ga5ee879032a8365897c3ba91e3dc8d512.html
|
||||
|
||||
|
||||
extern "C"{
|
||||
//xGETRF(s) ; xGETRF(d) ; xGETRF(c) ; xGETRF(z) ;
|
||||
//xGETRS(s) ; xGETRS(d) ; xGETRS(c) ; xGETRS(z) ;
|
||||
}
|
||||
|
||||
namespace core{
|
||||
// http://www.netlib.org/lapack/explore-html/da/d30/a18643_ga5b625680e6251feb29e386193914981c.html
|
||||
|
||||
int getrf(lapack_int m, lapack_int n, double* A, lapack_int lda, int* ipiv){
|
||||
assert( m >= 0 );
|
||||
assert( n >= 0 );
|
||||
assert( lda >= std::max(lapack_int{1}, m) );
|
||||
int info;
|
||||
dgetrf_(&m, &n, A, &lda, ipiv, &info);
|
||||
assert(info >= 0);
|
||||
return info;
|
||||
}
|
||||
|
||||
void getrs(char trans, lapack_int const n, lapack_int const nrhs, double const* A, lapack_int const lda, int const* ipiv, double* B, lapack_int const ldb){
|
||||
assert( trans == 'T' or trans == 'N' or trans == 'C' );
|
||||
assert( n >= 0 );
|
||||
assert( nrhs >= 0 );
|
||||
assert( lda >= std::max(1, n) );
|
||||
int info;
|
||||
dgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info);
|
||||
switch(info){
|
||||
case -1: throw std::logic_error{"transa ≠ 'N', 'T', or 'C'"};
|
||||
case -2: throw std::logic_error{"n < 0" };
|
||||
case -3: throw std::logic_error{"nrhs < 0" };
|
||||
case -4: throw std::logic_error{"n > lda" };
|
||||
case -5: throw std::logic_error{"lda ≤ 0" };
|
||||
case -6: throw std::logic_error{"n > ldb" };
|
||||
case -7: throw std::logic_error{"ldb ≤ 0" };
|
||||
case -8: throw std::logic_error{"error!" };
|
||||
}
|
||||
assert(info == 0 );
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace lapack{
|
||||
|
||||
struct context{
|
||||
template<class... Args> static auto getrf(Args&&... args)->decltype(core::getrf(args...)){return core::getrf(args...);}
|
||||
template<class... Args> static auto getrs(Args&&... args)->decltype(core::getrs(args...)){return core::getrs(args...);}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
extern "C"{
|
||||
//xPOTRF(s) ; xPOTRF(d) ;
|
||||
//xPOTRF(c) ; xPOTRF(z) ;
|
||||
|
||||
//xSYEV(s) ; xSYEV(d) ;
|
||||
//xSYEVD(s) ; xSYEVD(d) ;
|
||||
// xHEEV(c) ; xHEEV(z) ;
|
||||
}
|
||||
|
||||
#undef subroutine
|
||||
#undef integer
|
||||
#undef character
|
||||
|
||||
#undef JOBZ
|
||||
#undef UPLO
|
||||
#undef INFO
|
||||
#undef CHARACTER
|
||||
#undef N
|
||||
#undef LDA
|
||||
|
||||
#undef INTEGER
|
||||
#undef INT
|
||||
|
||||
|
||||
#define xpotrf(T) template<class S> v potrf(char uplo, S n, T *x, S incx, int& info){LAPACK(T##potrf)(uplo, n, x, incx, info);}
|
||||
|
||||
namespace core{
|
||||
xpotrf(s) xpotrf(d)
|
||||
xpotrf(c) xpotrf(z)
|
||||
}
|
||||
|
||||
// http://www.netlib.org/lapack/explore-html/d2/d8a/group__double_s_yeigen_ga442c43fca5493590f8f26cf42fed4044.html
|
||||
#define xsyev(T) template<class S> v syev(char jobz, char uplo, S n, T* a, S lda, T* w, T* work, S lwork, int& info){LAPACK(T##syev)(jobz, uplo, n, a, lda, w, work, lwork, info);}
|
||||
// http://www.netlib.org/lapack/explore-html/d2/d8a/group__double_s_yeigen_ga77dfa610458b6c9bd7db52533bfd53a1.html
|
||||
#define xsyevd(T) template<class S> v syevd(char jobz, char uplo, S n, T* a, S lda, T* w, T* work, S lwork, int* iwork, S liwork, int& info){ \
|
||||
if(n <= 1 ){assert(lwork >= 1 ); assert(liwork >=1 );} \
|
||||
if(jobz == 'N' and n > 1){assert(lwork >= 2*n+1 ); assert(liwork >= 1 );} \
|
||||
if(jobz == 'V' and n > 1){assert(lwork >= 1 + 6*n + 2*n*n); assert(liwork >= 3 + 5*n);} \
|
||||
LAPACK(T##syevd)(jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info); \
|
||||
}
|
||||
#define xheev(T) template<class S> v heev(char jobz, char uplo, S n, T* a, S lda, T* w, T* work, S lwork, int& info){LAPACK(T##heev)(jobz, uplo, n, a, lda, w, work, lwork, info);}
|
||||
|
||||
namespace core{
|
||||
xsyev (s) xsyev (d)
|
||||
xsyevd(s) xsyevd(d)
|
||||
xheev(c) xheev(z)
|
||||
}
|
||||
|
||||
#undef s
|
||||
#undef d
|
||||
#undef c
|
||||
#undef z
|
||||
#undef v
|
||||
|
||||
#define TRANS const char& trans
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if _TEST_MULTI_ADAPTORS_LAPACK_CORE
|
||||
|
||||
#include "../../array.hpp"
|
||||
#include "../../utility.hpp"
|
||||
|
||||
#include<iostream>
|
||||
#include<numeric>
|
||||
#include<vector>
|
||||
|
||||
namespace multi = boost::multi;
|
||||
using std::cout;
|
||||
|
||||
int main(){
|
||||
using core::potrf;
|
||||
|
||||
std::vector<double> v = {
|
||||
2., 1.,
|
||||
1., 2.
|
||||
};
|
||||
cout
|
||||
<< v[0] <<'\t'<< v[1] <<'\n'
|
||||
<< v[2] <<'\t'<< v[3] <<'\n' << std::endl
|
||||
;
|
||||
int info;
|
||||
potrf('U', 2, v.data(), 2, info);
|
||||
cout << "error " << info << std::endl;
|
||||
cout
|
||||
<< v[0] <<'\t'<< v[1] <<'\n'
|
||||
<< v[2] <<'\t'<< v[3] <<'\n'
|
||||
;
|
||||
cout << std::endl;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue