qiskit/Cargo.lock

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

564 lines
14 KiB
Plaintext
Raw Normal View History

# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crossbeam-channel"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
"once_cell",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ff1f980957787286a554052d03c7aee98d99cc32e09f6d45f0a814133c87978"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "getrandom"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3"
dependencies = [
"ahash",
"rayon",
]
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
[[package]]
name = "indexmap"
version = "1.9.1"
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
dependencies = [
"autocfg",
"hashbrown",
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
]
[[package]]
name = "indoc"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05a0bd019339e5d968b37855180087b7b9d512c5046fbd244cf8c95687927d6e"
[[package]]
name = "libc"
version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "libm"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33a33a362ce288760ec6a508b94caaec573ae7d3bbbd91b87aa0bad4456839db"
[[package]]
name = "lock_api"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "matrixmultiply"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "add85d4dd35074e6fedc608f8c8f513a3548619a9024b751949ef0e8e45a4d84"
dependencies = [
"rawpointer",
]
[[package]]
name = "memoffset"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg",
]
[[package]]
name = "ndarray"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec23e6762830658d2b3d385a75aa212af2f67a4586d4442907144f3bb6a1ca8"
dependencies = [
"matrixmultiply",
"num-complex",
"num-integer",
"num-traits",
"rawpointer",
"rayon",
]
[[package]]
name = "num-bigint"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19"
dependencies = [
"num-traits",
]
[[package]]
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "num_cpus"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "numpy"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "383ae168529a39fc97cbc1d9d4fa865377731a519bc27553ed96f50594de7c45"
dependencies = [
"libc",
"ndarray",
"num-complex",
"num-traits",
"pyo3",
]
[[package]]
name = "once_cell"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225"
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-sys",
]
[[package]]
name = "ppv-lite86"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
[[package]]
name = "proc-macro2"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
dependencies = [
"unicode-ident",
]
[[package]]
name = "pyo3"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e6302e85060011447471887705bb7838f14aba43fcb06957d823739a496b3dc"
dependencies = [
"cfg-if",
"hashbrown",
"indoc",
"libc",
"num-bigint",
Replace pauli expectation value cython with multithreaded rust implementation (#7702) * Replace pauli expectation value cython with rust implementation This commit replaces the cython implementation of the pauli expectation value functions with a multithreaded rust implementation. This was done primarily for two reasons, the first and primary reason for this change is because after #7658 this module was the only cython code left in the qiskit-terra repository so unifying on a single compiled language will reduce the maintanence burden in qiskit-terra. The second reason is similar to the rationale in #7658 around why using rust over cython for multi-threaded hybrid python module. The difference here though is unlike in stochastic swap this module isn't as performance critical as it's not nearly as widely used. * Tune single threaded performance for rust sum This commit tunes the sum for the single threaded path. Using the iterator sum() method is very convienent but for the single threaded path it doesn't create the most efficient output. This was causing a regression in performance over the previous cython version. To address that issue, this commit adds a new tuned function which does a chunked sum which the compiler can handle better. It more closely models how we'd do this with vectorized SIMD instructions. As a future step we can look at using simdeez https://github.com/jackmott/simdeez to further optimize this by doing runtime CPU feature detection and leveraging SIMD instrinsics (we might want to look at using `fast_sum()` in the multithreaded path if we do that too). * Add release notes * Fix lint * Add docstring and signature to rust functions * Define parallel threshold as a constant * Add attribution comment to fast_sum() * Rename eval_parallel_env -> getenv_use_multiple_threads * Use inline literal type for size Co-authored-by: Kevin Hartman <kevin@hart.mn> * Add overflow check on num_qubits The functions only work for at most for number of qubits < usize bits anything larger would cause an overflow. While rust provides overflow checking in debug mode it disables this for performance in release mode. Sice we ship binaries in release mode this commit adds an overflow check for the num_qubits argument to ensure that we don't overflow and produce incorrect results. * Remove unecessary setup_requires field from setup.py The setup_requires field in the setup.py is deprecated and has been superseded by the pyproject.toml to define build system dependencies. Since we're already relying on the pyproject.toml to install setuptools-rust for us having the setup_requires line will do nothing but potentially cause issues as it will use an older install mechanism that will potentially conflict with people's environments. * Drop `.iter().take(LANES)`. * Fix typo. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 04:54:24 +08:00
"num-complex",
"parking_lot",
"pyo3-build-config",
"pyo3-ffi",
"pyo3-macros",
"unindent",
]
[[package]]
name = "pyo3-build-config"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b65b546c35d8a3b1b2f0ddbac7c6a569d759f357f2b9df884f5d6b719152c8"
dependencies = [
"once_cell",
"target-lexicon",
]
[[package]]
name = "pyo3-ffi"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c275a07127c1aca33031a563e384ffdd485aee34ef131116fcd58e3430d1742b"
dependencies = [
"libc",
"pyo3-build-config",
]
[[package]]
name = "pyo3-macros"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "284fc4485bfbcc9850a6d661d627783f18d19c2ab55880b021671c4ba83e90f7"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
"syn",
]
[[package]]
name = "pyo3-macros-backend"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53bda0f58f73f5c5429693c96ed57f7abdb38fdfc28ae06da4101a257adb7faf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "qiskit-terra"
version = "0.21.0"
dependencies = [
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
"ahash",
"hashbrown",
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
"indexmap",
"ndarray",
"num-bigint",
Replace pauli expectation value cython with multithreaded rust implementation (#7702) * Replace pauli expectation value cython with rust implementation This commit replaces the cython implementation of the pauli expectation value functions with a multithreaded rust implementation. This was done primarily for two reasons, the first and primary reason for this change is because after #7658 this module was the only cython code left in the qiskit-terra repository so unifying on a single compiled language will reduce the maintanence burden in qiskit-terra. The second reason is similar to the rationale in #7658 around why using rust over cython for multi-threaded hybrid python module. The difference here though is unlike in stochastic swap this module isn't as performance critical as it's not nearly as widely used. * Tune single threaded performance for rust sum This commit tunes the sum for the single threaded path. Using the iterator sum() method is very convienent but for the single threaded path it doesn't create the most efficient output. This was causing a regression in performance over the previous cython version. To address that issue, this commit adds a new tuned function which does a chunked sum which the compiler can handle better. It more closely models how we'd do this with vectorized SIMD instructions. As a future step we can look at using simdeez https://github.com/jackmott/simdeez to further optimize this by doing runtime CPU feature detection and leveraging SIMD instrinsics (we might want to look at using `fast_sum()` in the multithreaded path if we do that too). * Add release notes * Fix lint * Add docstring and signature to rust functions * Define parallel threshold as a constant * Add attribution comment to fast_sum() * Rename eval_parallel_env -> getenv_use_multiple_threads * Use inline literal type for size Co-authored-by: Kevin Hartman <kevin@hart.mn> * Add overflow check on num_qubits The functions only work for at most for number of qubits < usize bits anything larger would cause an overflow. While rust provides overflow checking in debug mode it disables this for performance in release mode. Sice we ship binaries in release mode this commit adds an overflow check for the num_qubits argument to ensure that we don't overflow and produce incorrect results. * Remove unecessary setup_requires field from setup.py The setup_requires field in the setup.py is deprecated and has been superseded by the pyproject.toml to define build system dependencies. Since we're already relying on the pyproject.toml to install setuptools-rust for us having the setup_requires line will do nothing but potentially cause issues as it will use an older install mechanism that will potentially conflict with people's environments. * Drop `.iter().take(LANES)`. * Fix typo. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 04:54:24 +08:00
"num-complex",
"numpy",
"pyo3",
"rand",
"rand_distr",
"rand_pcg",
"rayon",
]
[[package]]
name = "quote"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_distr"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
dependencies = [
"num-traits",
"rand",
]
[[package]]
name = "rand_pcg"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e"
dependencies = [
"rand_core",
]
[[package]]
name = "rawpointer"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "rayon"
version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d"
dependencies = [
"autocfg",
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"num_cpus",
]
[[package]]
name = "redox_syscall"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
dependencies = [
"bitflags",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "smallvec"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
[[package]]
name = "syn"
version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "target-lexicon"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1"
[[package]]
name = "unicode-ident"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
[[package]]
name = "unindent"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52fee519a3e570f7df377a06a1a7775cdbfb7aa460be7e08de2b1f0e69973a44"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-sys"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]]
name = "windows_i686_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]]
name = "windows_i686_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]]
name = "windows_x86_64_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]]
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"