qiskit/Cargo.lock

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

565 lines
14 KiB
Plaintext
Raw Normal View History

# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crossbeam-channel"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
"once_cell",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "either"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be"
[[package]]
name = "getrandom"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
"rayon",
]
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
[[package]]
name = "indexmap"
version = "1.9.1"
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
dependencies = [
"autocfg",
"hashbrown",
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
"rayon",
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
]
[[package]]
name = "indoc"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05a0bd019339e5d968b37855180087b7b9d512c5046fbd244cf8c95687927d6e"
[[package]]
name = "libc"
version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "libm"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33a33a362ce288760ec6a508b94caaec573ae7d3bbbd91b87aa0bad4456839db"
[[package]]
name = "lock_api"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "matrixmultiply"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "add85d4dd35074e6fedc608f8c8f513a3548619a9024b751949ef0e8e45a4d84"
dependencies = [
"rawpointer",
]
[[package]]
name = "memoffset"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg",
]
[[package]]
name = "ndarray"
version = "0.15.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
dependencies = [
"matrixmultiply",
"num-complex",
"num-integer",
"num-traits",
"rawpointer",
"rayon",
]
[[package]]
name = "num-bigint"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19"
dependencies = [
"num-traits",
]
[[package]]
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "num_cpus"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "numpy"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "383ae168529a39fc97cbc1d9d4fa865377731a519bc27553ed96f50594de7c45"
dependencies = [
"libc",
"ndarray",
"num-complex",
"num-traits",
"pyo3",
]
[[package]]
name = "once_cell"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1"
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-sys",
]
[[package]]
name = "ppv-lite86"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
[[package]]
name = "proc-macro2"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
dependencies = [
"unicode-ident",
]
[[package]]
name = "pyo3"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e6302e85060011447471887705bb7838f14aba43fcb06957d823739a496b3dc"
dependencies = [
"cfg-if",
"hashbrown",
"indoc",
"libc",
"num-bigint",
Replace pauli expectation value cython with multithreaded rust implementation (#7702) * Replace pauli expectation value cython with rust implementation This commit replaces the cython implementation of the pauli expectation value functions with a multithreaded rust implementation. This was done primarily for two reasons, the first and primary reason for this change is because after #7658 this module was the only cython code left in the qiskit-terra repository so unifying on a single compiled language will reduce the maintanence burden in qiskit-terra. The second reason is similar to the rationale in #7658 around why using rust over cython for multi-threaded hybrid python module. The difference here though is unlike in stochastic swap this module isn't as performance critical as it's not nearly as widely used. * Tune single threaded performance for rust sum This commit tunes the sum for the single threaded path. Using the iterator sum() method is very convienent but for the single threaded path it doesn't create the most efficient output. This was causing a regression in performance over the previous cython version. To address that issue, this commit adds a new tuned function which does a chunked sum which the compiler can handle better. It more closely models how we'd do this with vectorized SIMD instructions. As a future step we can look at using simdeez https://github.com/jackmott/simdeez to further optimize this by doing runtime CPU feature detection and leveraging SIMD instrinsics (we might want to look at using `fast_sum()` in the multithreaded path if we do that too). * Add release notes * Fix lint * Add docstring and signature to rust functions * Define parallel threshold as a constant * Add attribution comment to fast_sum() * Rename eval_parallel_env -> getenv_use_multiple_threads * Use inline literal type for size Co-authored-by: Kevin Hartman <kevin@hart.mn> * Add overflow check on num_qubits The functions only work for at most for number of qubits < usize bits anything larger would cause an overflow. While rust provides overflow checking in debug mode it disables this for performance in release mode. Sice we ship binaries in release mode this commit adds an overflow check for the num_qubits argument to ensure that we don't overflow and produce incorrect results. * Remove unecessary setup_requires field from setup.py The setup_requires field in the setup.py is deprecated and has been superseded by the pyproject.toml to define build system dependencies. Since we're already relying on the pyproject.toml to install setuptools-rust for us having the setup_requires line will do nothing but potentially cause issues as it will use an older install mechanism that will potentially conflict with people's environments. * Drop `.iter().take(LANES)`. * Fix typo. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 04:54:24 +08:00
"num-complex",
"parking_lot",
"pyo3-build-config",
"pyo3-ffi",
"pyo3-macros",
"unindent",
]
[[package]]
name = "pyo3-build-config"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b65b546c35d8a3b1b2f0ddbac7c6a569d759f357f2b9df884f5d6b719152c8"
dependencies = [
"once_cell",
"target-lexicon",
]
[[package]]
name = "pyo3-ffi"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c275a07127c1aca33031a563e384ffdd485aee34ef131116fcd58e3430d1742b"
dependencies = [
"libc",
"pyo3-build-config",
]
[[package]]
name = "pyo3-macros"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "284fc4485bfbcc9850a6d661d627783f18d19c2ab55880b021671c4ba83e90f7"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
"syn",
]
[[package]]
name = "pyo3-macros-backend"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53bda0f58f73f5c5429693c96ed57f7abdb38fdfc28ae06da4101a257adb7faf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "qiskit-terra"
version = "0.22.0"
dependencies = [
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
"ahash",
"hashbrown",
Reimplement DenseLayout as a Parallel Algorithm in Rust (#7740) * Reimplement DenseLayout as a Parallel Algorithm in Rust This commit reimplements the core of the DenseLayout transpiler pass in Rust and to run in multiple threads. Previously this algorithm used scipy sparse matrices to create a CSR sparse matrix representation of the coupling graph and iterate over that to find a densely connected subgraph in the coupling graph. This performs and scales well for modest sized circuits and coupling graphs. But as the size of the coupling graphs and circuits grows running the algorithm by iterating over the sparse matrix in Python starts to hit a limit. The underlying traversal can be efficiently executed in parallel using rust as the algorithm iterates over the coupling graph in BFS order for each node to try and find the best subgraph. We can do the BFS traversals in parallel and then iteratively compare the results in parallel until the best is found. This greatly speeds up the execution of the pass, for example running on a 1081 qubit quantum volume circuit on 1081 qubit heavy hexagon coupling graph takes ~134 seconds with the previous iteration and ~0.144 seconds after this commit (on my local workstation with 32 physical cores and 64 logical cores, scaling likely won't be as good on smaller systems). The tradeoff here comes in slightly increased memory consumption as to have a shared representation of the adjacency matrix (and the error) between Python and Rust we use numpy arrays as they can be passed by reference between the languages. In practice this will not matter much until the graphs get truly large (e.g. to represent a 10000 qubit adjacency matrix and error matrix would require ~1.6 GB of memory) and if it does become an issue (either for memory or runtime performance) we can add a shared compressed sparse matrix representation to Qiskit for use in both Python in Rust. * Fix measurement error calculation * Fix lint * Compute adjacency matrix at init instead of run() * Fix test failures This commit fixes the 4 remaining test failures. The results from the rust version of the pass were correct but different than the results from the Python version. This is because the parallel reduce() was comparing in a different order that was returning a different subgraph. This commit reverses the arg order to correct this so the behavior should be identical to the previous implementation * Fix error and matrix building The error matrix building was not working because it was comparing a list of qubits to a tuple. This was used prior to the rust rewrite so we probably were not actually checking noise properties prior to this commit. * Add release notes * Add rust docstring * Update tests for fixed noise awareness In an earlier commit we fixed the noise awareness of the laoyout pass, doing this had a side effect of changing a test that was looking explicitly for the layout found by the pass. Since the pass is now correctly using error rates the best layout found is now different. This commit updates the tests to account for this. * Fix dag qubit count to include registerless bits * Update docs and fix release note typos Co-authored-by: Kevin Hartman <kevin@hart.mn> * Reduce overhead of subgraph creation This commit reduces the overhead of the internal loop over the bfs sorted nodes to create the subgraph by making 2 changes. First, instead of passing around the full bfs sorted list everywhere this commit truncates it to just the nodes we're going to use. We never use any qubits in the bfs sort > num_qubits so we can just truncate the Vec and not have to worry about limiting to the first num_qubits elements. The second change made is that instead of traversing the bfs nodes to check if a node is in the subgraph we create an intermediate set to do a constant time lookup for the membership check. * Truncate bfs sort instead of truncating result This commit adjusts the truncation logic around the bfs sort. In the previous commit we truncated the bfs result to just the first n qubits (where n is the number of qubits in the circuit) after performing a full traversal and generating the full list. Instead of doing that we can truncate the search when we've found n qubits already and save ourselves the extra work of continuing to traverse the adjacency matrix. * Simplify subgraph creation iterator using filter_map() * Only run multithreaded outside of parallel context This commit adds the env variable based switching to prevent us from running dense layout in parallel when we're already running under parallel_map to prevent overloading the system. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 06:47:34 +08:00
"indexmap",
"ndarray",
"num-bigint",
Replace pauli expectation value cython with multithreaded rust implementation (#7702) * Replace pauli expectation value cython with rust implementation This commit replaces the cython implementation of the pauli expectation value functions with a multithreaded rust implementation. This was done primarily for two reasons, the first and primary reason for this change is because after #7658 this module was the only cython code left in the qiskit-terra repository so unifying on a single compiled language will reduce the maintanence burden in qiskit-terra. The second reason is similar to the rationale in #7658 around why using rust over cython for multi-threaded hybrid python module. The difference here though is unlike in stochastic swap this module isn't as performance critical as it's not nearly as widely used. * Tune single threaded performance for rust sum This commit tunes the sum for the single threaded path. Using the iterator sum() method is very convienent but for the single threaded path it doesn't create the most efficient output. This was causing a regression in performance over the previous cython version. To address that issue, this commit adds a new tuned function which does a chunked sum which the compiler can handle better. It more closely models how we'd do this with vectorized SIMD instructions. As a future step we can look at using simdeez https://github.com/jackmott/simdeez to further optimize this by doing runtime CPU feature detection and leveraging SIMD instrinsics (we might want to look at using `fast_sum()` in the multithreaded path if we do that too). * Add release notes * Fix lint * Add docstring and signature to rust functions * Define parallel threshold as a constant * Add attribution comment to fast_sum() * Rename eval_parallel_env -> getenv_use_multiple_threads * Use inline literal type for size Co-authored-by: Kevin Hartman <kevin@hart.mn> * Add overflow check on num_qubits The functions only work for at most for number of qubits < usize bits anything larger would cause an overflow. While rust provides overflow checking in debug mode it disables this for performance in release mode. Sice we ship binaries in release mode this commit adds an overflow check for the num_qubits argument to ensure that we don't overflow and produce incorrect results. * Remove unecessary setup_requires field from setup.py The setup_requires field in the setup.py is deprecated and has been superseded by the pyproject.toml to define build system dependencies. Since we're already relying on the pyproject.toml to install setuptools-rust for us having the setup_requires line will do nothing but potentially cause issues as it will use an older install mechanism that will potentially conflict with people's environments. * Drop `.iter().take(LANES)`. * Fix typo. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-03-11 04:54:24 +08:00
"num-complex",
"numpy",
"pyo3",
"rand",
"rand_distr",
"rand_pcg",
"rayon",
]
[[package]]
name = "quote"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_distr"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
dependencies = [
"num-traits",
"rand",
]
[[package]]
name = "rand_pcg"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e"
dependencies = [
"rand_core",
]
[[package]]
name = "rawpointer"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "rayon"
version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d"
dependencies = [
"autocfg",
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"num_cpus",
]
[[package]]
name = "redox_syscall"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
dependencies = [
"bitflags",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "smallvec"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
[[package]]
name = "syn"
version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "target-lexicon"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1"
[[package]]
name = "unicode-ident"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
Reimplement SabreSwap heuristic scoring in Rust (#7977) * Reimplement SabreSwap heuristic scoring in multithreaded Rust This commit re-implements the core heuristic scoring of swap candidates in the SabreSwap pass as a multithread Rust routine. The heuristic scoring in sabre previously looped over all potential swap candidates serially in Python and applied a computed a heuristic score on which to candidate to pick. This can easily be done in parallel as there is no data dependency between scoring the different candidates. By performing this in Rust not only is the scoring operation done more quickly for each candidate but we can also leverage multithreading to do this efficiently in parallel. * Make sabre_swap a separate Rust module This commit moves the sabre specific code into a separate rust module. We already were using a separate Python module for the sabre code this just mirrors that in the rust code for better organization. * Fix lint * Remove unnecessary parallel iteration This commit removes an unecessary parallel iterator over the swap scores to find the minimum and just does it serially. The threading overhead for the parallel iterator is unecessary as it is fairly quick. * Revert change to DECAY_RESET_INTERVAL behavior * Avoid Bit._index * Add __str__ definition for DEBUG logs * Cleanup greedy swap path * Preserve insertion order in SwapScores The use of an inner hashmap meant the swap candidates were being evaluated in a different order based on the hash seeding instead of the order generated from the python side. This commit fixes by switching the internal type to an IndexMap which for a little overhead preserves the insertion order on iteration. * Work with virtual indices win obtain swap * Simplify decay reset() method * Fix lint * Fix typo * Rename nlayout methods * Update docstrings for SwapScores type * Use correct swap method for _undo_operations() * Fix rebase error * Revert test change * Reverse if condition in lookahead cost * Fix missing len division on lookahead cost * Remove unused EXTENDED_SET_WEIGHT python global * Switch to serial iterator for heuristic scoring While the heuristic scoring can be done in parallel as there is no data dependency between computing the score for candidates the overhead of dealing with multithreading eliminates and benefits from parallel execution. This is because the relative computation is fairly quick and the number of candidates is never very large (since coupling maps are typically sparsely connected). This commit switches to a serial iterator which will speed up execution in practice over running the iteration in parallel. * Return a 2d numpy array for best swaps and avoid conversion cost * Migrate obtain_swaps to rust This commit simplifies the rust loop by avoiding the need to have a mutable shared swap scores between rust and python. Instead the obtain swaps function to get the swap candidates for each layer is migrated to rust using a new neighbors table which is computed once per sabre class. This moves the iteration from obtain swaps to rust and eliminates it as a bottleneck. * Remove unused SwapScores class * Fix module metadata path * Add release note * Add rust docstrings * Pre-allocate candidate_swaps * Double swap instead of clone * Remove unnecessary list comprehensions * Move random choice into rust After rewriting the heuristic scoring in rust the biggest bottleneck in the function (outside of computing the extended set and applying gates to the dag) was performing the random choice between the best candidates via numpy. This wasn't necessary since we can just do the random choice in rust and have it return the best candidate. This commit adds a new class to represent a shared rng that is reused on each scoring call and changes sabre_score_heuristic to return the best swap. The tradeoff with this PR is that it changes the seeding so when compared to previous versions of SabreSwap different results will be returned with the same seed value. * Use int32 for max default rng seed for windows compat * Fix bounds check on custom sequence type's __getitem__ Co-authored-by: Kevin Hartman <kevin@hart.mn> * Only run parallel sort if not in a parallel context This commit updates the sort step in the sabre algorithm to only run a parallel sort if we're not already in a parallel context. This is to prevent a potential over dispatch of work if we're trying to use multiple threads from multiple processes. At the same time the sort algorithm used is switched to the unstable variant because a stable sort isn't necessary for this application and an unstable sort has less overhead. Co-authored-by: Kevin Hartman <kevin@hart.mn>
2022-07-19 23:34:38 +08:00
checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
[[package]]
name = "unindent"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52fee519a3e570f7df377a06a1a7775cdbfb7aa460be7e08de2b1f0e69973a44"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-sys"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]]
name = "windows_i686_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]]
name = "windows_i686_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]]
name = "windows_x86_64_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]]
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"