mirror of https://github.com/Qiskit/qiskit-aer.git
Reduce warning meesages (#2013)
* reduce warning meesages * format * fix Windows * uint -> uint_t * fix Thrust seg fault * format
This commit is contained in:
parent
f7fcbc2b4c
commit
180a0b6431
|
@ -143,7 +143,7 @@ void bind_aer_circuit(MODULE m) {
|
|||
<< ", num_registers=" << circ.num_registers;
|
||||
|
||||
ss << ", ops={";
|
||||
for (auto i = 0; i < circ.ops.size(); ++i)
|
||||
for (uint_t i = 0; i < circ.ops.size(); ++i)
|
||||
if (i == 0)
|
||||
ss << circ.ops[i];
|
||||
else
|
||||
|
|
|
@ -130,8 +130,8 @@ void bind_aer_state(MODULE m) {
|
|||
size_t mat_len = (1UL << qubits.size());
|
||||
auto ptr = values.unchecked<2>();
|
||||
cmatrix_t mat(mat_len, mat_len);
|
||||
for (auto i = 0; i < mat_len; ++i)
|
||||
for (auto j = 0; j < mat_len; ++j)
|
||||
for (uint_t i = 0; i < mat_len; ++i)
|
||||
for (uint_t j = 0; j < mat_len; ++j)
|
||||
mat(i, j) = ptr(i, j);
|
||||
state.apply_unitary(qubits, mat);
|
||||
});
|
||||
|
@ -144,10 +144,10 @@ void bind_aer_state(MODULE m) {
|
|||
size_t mat_size = (1UL << control_qubits.size());
|
||||
auto ptr = values.unchecked<3>();
|
||||
std::vector<cmatrix_t> mats;
|
||||
for (auto i = 0; i < mat_size; ++i) {
|
||||
for (uint_t i = 0; i < mat_size; ++i) {
|
||||
cmatrix_t mat(mat_len, mat_len);
|
||||
for (auto j = 0; j < mat_len; ++j)
|
||||
for (auto k = 0; k < mat_len; ++k)
|
||||
for (uint_t j = 0; j < mat_len; ++j)
|
||||
for (uint_t k = 0; k < mat_len; ++k)
|
||||
mat(j, k) = ptr(i, j, k);
|
||||
mats.push_back(mat);
|
||||
}
|
||||
|
|
|
@ -414,7 +414,7 @@ size_t Controller::get_system_memory_mb() {
|
|||
size_t Controller::get_gpu_memory_mb() {
|
||||
size_t total_physical_memory = 0;
|
||||
#ifdef AER_THRUST_GPU
|
||||
for (int_t iDev = 0; iDev < target_gpus_.size(); iDev++) {
|
||||
for (uint_t iDev = 0; iDev < target_gpus_.size(); iDev++) {
|
||||
size_t freeMem, totalMem;
|
||||
cudaSetDevice(target_gpus_[iDev]);
|
||||
cudaMemGetInfo(&freeMem, &totalMem);
|
||||
|
@ -515,7 +515,7 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
|
|||
uint_t result_size;
|
||||
reg_t result_offset(circuits.size());
|
||||
result_size = 0;
|
||||
for (int_t i = 0; i < circuits.size(); i++) {
|
||||
for (uint_t i = 0; i < circuits.size(); i++) {
|
||||
result_offset[i] = result_size;
|
||||
result_size += circuits[i]->num_bind_params;
|
||||
}
|
||||
|
@ -532,11 +532,11 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
|
|||
// set parallelization for experiments
|
||||
try {
|
||||
uint_t res_pos = 0;
|
||||
for (int i = 0; i < circuits.size(); i++) {
|
||||
for (uint_t i = 0; i < circuits.size(); i++) {
|
||||
executors[i] = make_circuit_executor(methods[i]);
|
||||
required_memory_mb_list[i] =
|
||||
executors[i]->required_memory_mb(config, *circuits[i], noise_model);
|
||||
for (int j = 0; j < circuits[i]->num_bind_params; j++) {
|
||||
for (uint_t j = 0; j < circuits[i]->num_bind_params; j++) {
|
||||
result.results[res_pos++].metadata.add(required_memory_mb_list[i],
|
||||
"required_memory_mb");
|
||||
}
|
||||
|
@ -588,9 +588,9 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
|
|||
reg_t seeds(result_size);
|
||||
reg_t avg_seeds(result_size);
|
||||
int_t iseed = 0;
|
||||
for (int_t i = 0; i < circuits.size(); i++) {
|
||||
for (uint_t i = 0; i < circuits.size(); i++) {
|
||||
if (circuits[i]->num_bind_params > 1) {
|
||||
for (int_t j = 0; i < circuits[i]->num_bind_params; i++)
|
||||
for (uint_t j = 0; i < circuits[i]->num_bind_params; i++)
|
||||
seeds[iseed++] = circuits[i]->seed_for_params[j];
|
||||
} else
|
||||
seeds[iseed++] = circuits[i]->seed;
|
||||
|
@ -598,9 +598,9 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
|
|||
MPI_Allreduce(seeds.data(), avg_seeds.data(), result_size, MPI_UINT64_T,
|
||||
MPI_SUM, MPI_COMM_WORLD);
|
||||
iseed = 0;
|
||||
for (int_t i = 0; i < circuits.size(); i++) {
|
||||
for (uint_t i = 0; i < circuits.size(); i++) {
|
||||
if (circuits[i]->num_bind_params > 1) {
|
||||
for (int_t j = 0; i < circuits[i]->num_bind_params; i++)
|
||||
for (uint_t j = 0; i < circuits[i]->num_bind_params; i++)
|
||||
circuits[i]->seed_for_params[j] =
|
||||
avg_seeds[iseed++] / num_processes_;
|
||||
} else
|
||||
|
@ -626,7 +626,7 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
|
|||
|
||||
bool all_failed = true;
|
||||
result.status = Result::Status::completed;
|
||||
for (int i = 0; i < result.results.size(); ++i) {
|
||||
for (uint_t i = 0; i < result.results.size(); ++i) {
|
||||
auto &experiment = result.results[i];
|
||||
if (experiment.status == ExperimentResult::Status::completed) {
|
||||
all_failed = false;
|
||||
|
|
|
@ -118,13 +118,13 @@ Result controller_execute(std::vector<std::shared_ptr<Circuit>> &input_circs,
|
|||
param_circ->global_phase_for_params.resize(num_params);
|
||||
for (size_t j = 0; j < num_params; j++)
|
||||
param_circ->global_phase_for_params[j] = params.second[j];
|
||||
} else if (instr_pos >= num_instr) {
|
||||
} else if ((uint_t)instr_pos >= num_instr) {
|
||||
throw std::invalid_argument(
|
||||
R"(Invalid parameterized qobj: instruction position out of range)");
|
||||
}
|
||||
auto &op = param_circ->ops[instr_pos];
|
||||
if (!op.has_bind_params) {
|
||||
if (param_pos >= op.params.size()) {
|
||||
if ((uint_t)param_pos >= op.params.size()) {
|
||||
throw std::invalid_argument(
|
||||
R"(Invalid parameterized qobj: instruction param position out of range)");
|
||||
}
|
||||
|
@ -160,7 +160,7 @@ Result controller_execute(std::vector<std::shared_ptr<Circuit>> &input_circs,
|
|||
// negative position is for global phase
|
||||
circ->global_phase_angle = params.second[j];
|
||||
} else {
|
||||
if (instr_pos >= num_instr) {
|
||||
if ((uint_t)instr_pos >= num_instr) {
|
||||
std::cout << "Invalid parameterization: instruction position "
|
||||
"out of range: "
|
||||
<< instr_pos << std::endl;
|
||||
|
@ -168,7 +168,7 @@ Result controller_execute(std::vector<std::shared_ptr<Circuit>> &input_circs,
|
|||
R"(Invalid parameterization: instruction position out of range)");
|
||||
}
|
||||
auto &op = param_circ->ops[instr_pos];
|
||||
if (param_pos >= op.params.size()) {
|
||||
if ((uint_t)param_pos >= op.params.size()) {
|
||||
throw std::invalid_argument(
|
||||
R"(Invalid parameterization: instruction param position out of range)");
|
||||
}
|
||||
|
@ -215,7 +215,7 @@ Result controller_execute(std::vector<std::shared_ptr<Circuit>> &input_circs,
|
|||
for (auto &circ : circs) {
|
||||
circ->seed = seed + seed_shift;
|
||||
circ->seed_for_params.resize(circ->num_bind_params);
|
||||
for (int_t i = 0; i < circ->num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ->num_bind_params; i++) {
|
||||
circ->seed_for_params[i] = seed + seed_shift;
|
||||
seed_shift += 2113;
|
||||
}
|
||||
|
|
|
@ -630,7 +630,7 @@ void AerState::set_seed(int_t seed) {
|
|||
reg_t AerState::allocate_qubits(uint_t num_qubits) {
|
||||
assert_not_initialized();
|
||||
reg_t ret;
|
||||
for (auto i = 0; i < num_qubits; ++i)
|
||||
for (uint_t i = 0; i < num_qubits; ++i)
|
||||
ret.push_back(num_of_qubits_++);
|
||||
return ret;
|
||||
};
|
||||
|
@ -816,7 +816,7 @@ reg_t AerState::initialize_statevector(uint_t num_of_qubits, complex_t *data,
|
|||
|
||||
reg_t ret;
|
||||
ret.reserve(num_of_qubits);
|
||||
for (auto i = 0; i < num_of_qubits; ++i)
|
||||
for (uint_t i = 0; i < num_of_qubits; ++i)
|
||||
ret.push_back(i);
|
||||
return ret;
|
||||
};
|
||||
|
@ -861,7 +861,7 @@ reg_t AerState::initialize_density_matrix(uint_t num_of_qubits, complex_t *data,
|
|||
|
||||
reg_t ret;
|
||||
ret.reserve(num_of_qubits);
|
||||
for (auto i = 0; i < num_of_qubits; ++i)
|
||||
for (uint_t i = 0; i < num_of_qubits; ++i)
|
||||
ret.push_back(i);
|
||||
return ret;
|
||||
};
|
||||
|
@ -892,7 +892,7 @@ AER::Vector<complex_t> AerState::move_to_vector() {
|
|||
throw std::runtime_error("move_to_vector() supports only statevector or "
|
||||
"matrix_product_state or density_matrix methods");
|
||||
}
|
||||
for (auto i = 0; i < num_of_qubits_; ++i)
|
||||
for (uint_t i = 0; i < num_of_qubits_; ++i)
|
||||
op.qubits.push_back(i);
|
||||
op.string_params.push_back("s");
|
||||
op.save_type = Operations::DataSubType::single;
|
||||
|
@ -907,7 +907,7 @@ AER::Vector<complex_t> AerState::move_to_vector() {
|
|||
.value()["s"]
|
||||
.value());
|
||||
clear();
|
||||
return std::move(vec);
|
||||
return vec;
|
||||
} else if (method_ == Method::density_matrix) {
|
||||
auto mat =
|
||||
std::move(static_cast<DataMap<AverageData, matrix<complex_t>, 1>>(
|
||||
|
@ -917,7 +917,7 @@ AER::Vector<complex_t> AerState::move_to_vector() {
|
|||
auto vec = Vector<complex_t>::move_from_buffer(
|
||||
mat.GetColumns() * mat.GetRows(), mat.move_to_buffer());
|
||||
clear();
|
||||
return std::move(vec);
|
||||
return vec;
|
||||
} else {
|
||||
throw std::runtime_error("move_to_vector() supports only statevector or "
|
||||
"matrix_product_state or density_matrix methods");
|
||||
|
@ -941,7 +941,7 @@ matrix<complex_t> AerState::move_to_matrix() {
|
|||
throw std::runtime_error("move_to_matrix() supports only statevector or "
|
||||
"matrix_product_state or density_matrix methods");
|
||||
}
|
||||
for (auto i = 0; i < num_of_qubits_; ++i)
|
||||
for (uint_t i = 0; i < num_of_qubits_; ++i)
|
||||
op.qubits.push_back(i);
|
||||
op.string_params.push_back("s");
|
||||
op.save_type = Operations::DataSubType::single;
|
||||
|
@ -966,7 +966,7 @@ matrix<complex_t> AerState::move_to_matrix() {
|
|||
.value())["s"]
|
||||
.value());
|
||||
clear();
|
||||
return std::move(mat);
|
||||
return mat;
|
||||
} else {
|
||||
throw std::runtime_error("move_to_matrix() supports only statevector or "
|
||||
"matrix_product_state or density_matrix methods");
|
||||
|
|
|
@ -263,7 +263,7 @@ void std::from_json(const json_t &js,
|
|||
template <typename RealType>
|
||||
void std::to_json(json_t &js, const AER::Vector<std::complex<RealType>> &vec) {
|
||||
std::vector<std::vector<RealType>> out;
|
||||
for (int64_t i = 0; i < vec.size(); ++i) {
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
auto &z = vec[i];
|
||||
out.push_back(std::vector<RealType>{real(z), imag(z)});
|
||||
}
|
||||
|
|
|
@ -35,7 +35,8 @@ T *malloc_data(size_t size) {
|
|||
// Data allocated here may need to be properly aligned to be compliant with
|
||||
// AVX2.
|
||||
void *data = nullptr;
|
||||
posix_memalign(&data, 64, sizeof(T) * size);
|
||||
if (posix_memalign(&data, 64, sizeof(T) * size) != 0)
|
||||
throw std::runtime_error("Cannot allocate memory by posix_memalign");
|
||||
return reinterpret_cast<T *>(data);
|
||||
#else
|
||||
return reinterpret_cast<T *>(malloc(sizeof(T) * size));
|
||||
|
|
|
@ -52,11 +52,13 @@ enum class BinaryOp {
|
|||
GreaterEqual
|
||||
};
|
||||
|
||||
bool isBoolBinaryOp(const BinaryOp binary_op);
|
||||
bool isBoolBinaryOp(const BinaryOp binary_op) {
|
||||
return binary_op != BinaryOp::BitAnd && binary_op != BinaryOp::BitOr &&
|
||||
binary_op != BinaryOp::BitXor;
|
||||
}
|
||||
|
||||
uint_t truncate(const uint_t val, const size_t width);
|
||||
uint_t truncate(const uint_t val, const size_t width) {
|
||||
size_t shift = 64 - width;
|
||||
return (val << shift) >> shift;
|
||||
|
@ -68,8 +70,8 @@ enum class ValueType { Bool, Uint };
|
|||
|
||||
class ScalarType {
|
||||
public:
|
||||
ScalarType(const ValueType type_, const size_t width_)
|
||||
: type(type_), width(width_) {}
|
||||
ScalarType(const ValueType _type, const size_t width_)
|
||||
: type(_type), width(width_) {}
|
||||
|
||||
public:
|
||||
const ValueType type;
|
||||
|
@ -97,8 +99,8 @@ public:
|
|||
|
||||
class CExpr {
|
||||
public:
|
||||
CExpr(const CExprType expr_type_, const std::shared_ptr<ScalarType> type_)
|
||||
: expr_type(expr_type_), type(type_) {}
|
||||
CExpr(const CExprType _expr_type, const std::shared_ptr<ScalarType> _type)
|
||||
: expr_type(_expr_type), type(_type) {}
|
||||
virtual bool eval_bool(const std::string &memory) { return false; };
|
||||
virtual uint_t eval_uint(const std::string &memory) { return 0ul; };
|
||||
|
||||
|
@ -109,9 +111,9 @@ public:
|
|||
|
||||
class CastExpr : public CExpr {
|
||||
public:
|
||||
CastExpr(std::shared_ptr<ScalarType> type,
|
||||
CastExpr(std::shared_ptr<ScalarType> _type,
|
||||
const std::shared_ptr<CExpr> operand_)
|
||||
: CExpr(CExprType::Cast, type), operand(operand_) {}
|
||||
: CExpr(CExprType::Cast, _type), operand(operand_) {}
|
||||
|
||||
virtual bool eval_bool(const std::string &memory) {
|
||||
if (type->type != ValueType::Bool)
|
||||
|
@ -143,9 +145,9 @@ public:
|
|||
|
||||
class VarExpr : public CExpr {
|
||||
public:
|
||||
VarExpr(std::shared_ptr<ScalarType> type,
|
||||
const std::vector<uint_t> &cbit_idxs)
|
||||
: CExpr(CExprType::Var, type), cbit_idxs(cbit_idxs) {}
|
||||
VarExpr(std::shared_ptr<ScalarType> _type,
|
||||
const std::vector<uint_t> &_cbit_idxs)
|
||||
: CExpr(CExprType::Var, _type), cbit_idxs(_cbit_idxs) {}
|
||||
|
||||
virtual bool eval_bool(const std::string &memory) {
|
||||
if (type->type != ValueType::Bool)
|
||||
|
@ -164,7 +166,6 @@ public:
|
|||
private:
|
||||
uint_t eval_uint_(const std::string &memory) {
|
||||
uint_t val = 0ul;
|
||||
const uint_t memory_size = memory.size();
|
||||
uint_t shift = 0;
|
||||
for (const uint_t cbit_idx : cbit_idxs) {
|
||||
if (memory.size() <= cbit_idx)
|
||||
|
@ -182,7 +183,8 @@ public:
|
|||
|
||||
class ValueExpr : public CExpr {
|
||||
public:
|
||||
ValueExpr(std::shared_ptr<ScalarType> type) : CExpr(CExprType::Value, type) {}
|
||||
ValueExpr(std::shared_ptr<ScalarType> _type)
|
||||
: CExpr(CExprType::Value, _type) {}
|
||||
};
|
||||
|
||||
class UintValue : public ValueExpr {
|
||||
|
@ -943,6 +945,11 @@ inline Op make_bfunc(const std::string &mask, const std::string &val,
|
|||
return op;
|
||||
}
|
||||
|
||||
Op make_gate(const std::string &name, const reg_t &qubits,
|
||||
const std::vector<complex_t> ¶ms,
|
||||
const std::vector<std::string> &string_params,
|
||||
const int_t conditional, const std::shared_ptr<CExpr> expr,
|
||||
const std::string &label);
|
||||
Op make_gate(const std::string &name, const reg_t &qubits,
|
||||
const std::vector<complex_t> ¶ms,
|
||||
const std::vector<std::string> &string_params,
|
||||
|
@ -1313,12 +1320,12 @@ inline Op bind_parameter(const Op &src, const uint_t iparam,
|
|||
if (src.params.size() > 0) {
|
||||
uint_t stride = src.params.size() / num_params;
|
||||
op.params.resize(stride);
|
||||
for (int_t i = 0; i < stride; i++)
|
||||
for (uint_t i = 0; i < stride; i++)
|
||||
op.params[i] = src.params[iparam * stride + i];
|
||||
} else if (src.mats.size() > 0) {
|
||||
uint_t stride = src.mats.size() / num_params;
|
||||
op.mats.resize(stride);
|
||||
for (int_t i = 0; i < stride; i++)
|
||||
for (uint_t i = 0; i < stride; i++)
|
||||
op.mats[i] = src.mats[iparam * stride + i];
|
||||
}
|
||||
return op;
|
||||
|
@ -1528,6 +1535,7 @@ json_t op_to_json(const Op &op) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
void to_json(json_t &js, const OpType &type);
|
||||
void to_json(json_t &js, const OpType &type) {
|
||||
std::stringstream ss;
|
||||
ss << type;
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
|
||||
#include "misc/warnings.hpp"
|
||||
DISABLE_WARNING_PUSH
|
||||
#pragma GCC diagnostic ignored "-Wfloat-equal"
|
||||
|
||||
#include <pybind11/cast.h>
|
||||
#include <pybind11/complex.h>
|
||||
#include <pybind11/numpy.h>
|
||||
|
@ -40,6 +42,7 @@ DISABLE_WARNING_PUSH
|
|||
|
||||
#include <nlohmann/json.hpp>
|
||||
DISABLE_WARNING_POP
|
||||
#pragma GCC diagnostic warning "-Wfloat-equal"
|
||||
|
||||
#include "framework/json.hpp"
|
||||
|
||||
|
@ -293,7 +296,7 @@ void std::from_json(const json_t &js, py::object &o) {
|
|||
o = py::str(js.get<nl::json::string_t>());
|
||||
} else if (js.is_array()) {
|
||||
std::vector<py::object> obj(js.size());
|
||||
for (auto i = 0; i < js.size(); i++) {
|
||||
for (size_t i = 0; i < js.size(); i++) {
|
||||
py::object tmp;
|
||||
from_json(js[i], tmp);
|
||||
obj[i] = tmp;
|
||||
|
|
|
@ -155,12 +155,12 @@ Qobj::Qobj(const inputdata_t &input) {
|
|||
// negative position is for global phase
|
||||
param_circuit->global_phase_angle = params.second[j];
|
||||
} else {
|
||||
if (instr_pos >= num_instr) {
|
||||
if ((uint_t)instr_pos >= num_instr) {
|
||||
throw std::invalid_argument(
|
||||
R"(Invalid parameterized qobj: instruction position out of range)");
|
||||
}
|
||||
auto &op = param_circuit->ops[instr_pos];
|
||||
if (param_pos >= op.params.size()) {
|
||||
if ((uint_t)param_pos >= op.params.size()) {
|
||||
throw std::invalid_argument(
|
||||
R"(Invalid parameterized qobj: instruction param position out of range)");
|
||||
}
|
||||
|
|
|
@ -1270,7 +1270,7 @@ uint_t (*popcount)(uint_t) = is_avx2_supported() ? &_instrinsic_weight
|
|||
bool (*hamming_parity)(uint_t) = &_naive_parity;
|
||||
uint_t (*popcount)(uint_t) = &_naive_weight;
|
||||
#endif
|
||||
|
||||
size_t get_system_memory_mb();
|
||||
size_t get_system_memory_mb() {
|
||||
size_t total_physical_memory = 0;
|
||||
#if defined(__linux__)
|
||||
|
|
|
@ -386,7 +386,7 @@ void NoiseModel::enable_superop_method(int num_threads) {
|
|||
exs.resize(std::max(num_threads, 1));
|
||||
#pragma omp parallel for if (num_threads > 1 && quantum_errors_.size() > 10) \
|
||||
num_threads(num_threads)
|
||||
for (int i = 0; i < quantum_errors_.size(); i++) {
|
||||
for (int i = 0; i < (int_t)quantum_errors_.size(); i++) {
|
||||
try {
|
||||
quantum_errors_[i].compute_superoperator();
|
||||
} catch (...) {
|
||||
|
@ -406,7 +406,7 @@ void NoiseModel::enable_kraus_method(int num_threads) {
|
|||
exs.resize(std::max(num_threads, 1));
|
||||
#pragma omp parallel for if (num_threads > 1 && quantum_errors_.size() > 10) \
|
||||
num_threads(num_threads)
|
||||
for (int i = 0; i < quantum_errors_.size(); i++) {
|
||||
for (int i = 0; i < (int_t)quantum_errors_.size(); i++) {
|
||||
try {
|
||||
quantum_errors_[i].compute_kraus();
|
||||
} catch (...) {
|
||||
|
@ -851,6 +851,8 @@ cmatrix_t NoiseModel::op2superop(const Operations::Op &op) const {
|
|||
case ParamGate::cu:
|
||||
return Linalg::SMatrix::cu(op.params[0], op.params[1], op.params[2],
|
||||
op.params[3]);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Check if we can convert this gate to a standard superoperator matrix
|
||||
|
@ -897,6 +899,8 @@ cmatrix_t NoiseModel::op2unitary(const Operations::Op &op) const {
|
|||
return Linalg::Matrix::rzx(op.params[0]);
|
||||
case ParamGate::cp:
|
||||
return Linalg::Matrix::cphase(op.params[0]);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Check if we can convert this gate to a standard superoperator matrix
|
||||
|
|
|
@ -133,7 +133,7 @@ void BatchShotsExecutor<state_t>::set_parallelization(
|
|||
enable_batch_multi_shots_ = false;
|
||||
if (batched_shots_gpu_ && Base::sim_device_ != Device::CPU) {
|
||||
enable_batch_multi_shots_ = true;
|
||||
if (circ.num_qubits > batched_shots_gpu_max_qubits_)
|
||||
if (circ.num_qubits > (uint_t)batched_shots_gpu_max_qubits_)
|
||||
enable_batch_multi_shots_ = false;
|
||||
else if (circ.shots == 1 && circ.num_bind_params == 1)
|
||||
enable_batch_multi_shots_ = false;
|
||||
|
@ -156,8 +156,7 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
|
|||
}
|
||||
Noise::NoiseModel dummy_noise;
|
||||
state_t dummy_state;
|
||||
int_t i;
|
||||
int_t i_begin, n_shots;
|
||||
uint_t i_begin, n_shots;
|
||||
|
||||
Base::num_qubits_ = circ.num_qubits;
|
||||
Base::num_creg_memory_ = circ.num_memory;
|
||||
|
@ -195,7 +194,7 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
|
|||
fusion_result);
|
||||
auto time_taken =
|
||||
std::chrono::duration<double>(myclock_t::now() - timer_start).count();
|
||||
for (i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.metadata.copy(fusion_result.metadata);
|
||||
// Add batched multi-shots optimizaiton metadata
|
||||
|
@ -222,13 +221,13 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
|
|||
while (i_begin < Base::num_local_states_) {
|
||||
// loop for states can be stored in available memory
|
||||
n_shots = Base::num_local_states_ - i_begin;
|
||||
n_shots = std::min(n_shots, (int_t)Base::num_max_shots_);
|
||||
n_shots = std::min(n_shots, Base::num_max_shots_);
|
||||
|
||||
// allocate shots
|
||||
this->allocate_states(n_shots, config);
|
||||
|
||||
// Set state config
|
||||
for (i = 0; i < n_shots; i++) {
|
||||
for (uint_t i = 0; i < n_shots; i++) {
|
||||
Base::states_[i].set_parallelization(Base::parallel_state_update_);
|
||||
}
|
||||
|
||||
|
@ -256,7 +255,7 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
|
|||
auto apply_ops_lambda = [this, circ, init_rng, first_meas, final_ops,
|
||||
dummy_noise, &result_it](int_t i) {
|
||||
std::vector<RngEngine> rng(Base::num_states_in_group_[i]);
|
||||
for (int_t j = 0; j < Base::num_states_in_group_[i]; j++) {
|
||||
for (uint_t j = 0; j < Base::num_states_in_group_[i]; j++) {
|
||||
uint_t iparam =
|
||||
Base::global_state_index_ + Base::top_state_of_group_[i] + j;
|
||||
if (iparam == 0)
|
||||
|
@ -284,8 +283,8 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
|
|||
if (Base::num_process_per_experiment_ > 1) {
|
||||
Base::gather_creg_memory(Base::cregs_, Base::state_index_begin_);
|
||||
|
||||
for (i = 0; i < circ.num_bind_params; i++) {
|
||||
for (int_t j = 0; j < circ.shots; j++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t j = 0; j < circ.shots; j++) {
|
||||
(result_it + i)
|
||||
->save_count_data(Base::cregs_[i * circ.shots + j],
|
||||
Base::save_creg_memory_);
|
||||
|
@ -304,7 +303,7 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
|
|||
}
|
||||
if (nDev > Base::num_groups_)
|
||||
nDev = Base::num_groups_;
|
||||
for (i = 0; i < circ.num_bind_params; i++)
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++)
|
||||
(result_it + i)
|
||||
->metadata.add(nDev, "batched_shots_optimization_parallel_gpus");
|
||||
}
|
||||
|
@ -362,10 +361,9 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
|
|||
|
||||
Base::max_matrix_qubits_ = Base::get_max_matrix_qubits(circ_opt);
|
||||
|
||||
int_t i;
|
||||
int_t i_begin, n_shots;
|
||||
uint_t i_begin, n_shots;
|
||||
|
||||
for (i = 0; i < Base::num_bind_params_; i++) {
|
||||
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.metadata.copy(fusion_result.metadata);
|
||||
// Add batched multi-shots optimizaiton metadata
|
||||
|
@ -382,13 +380,13 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
|
|||
while (i_begin < Base::num_local_states_) {
|
||||
// loop for states can be stored in available memory
|
||||
n_shots = Base::num_local_states_ - i_begin;
|
||||
n_shots = std::min(n_shots, (int_t)Base::num_max_shots_);
|
||||
n_shots = std::min(n_shots, Base::num_max_shots_);
|
||||
|
||||
// allocate shots
|
||||
this->allocate_states(n_shots, config);
|
||||
|
||||
// Set state config
|
||||
for (i = 0; i < n_shots; i++) {
|
||||
for (uint_t i = 0; i < n_shots; i++) {
|
||||
Base::states_[i].set_parallelization(Base::parallel_state_update_);
|
||||
}
|
||||
|
||||
|
@ -418,7 +416,7 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
|
|||
noise](int_t i) {
|
||||
par_results[i].resize(circ.num_bind_params);
|
||||
std::vector<RngEngine> rng(Base::num_states_in_group_[i]);
|
||||
for (int_t j = 0; j < Base::num_states_in_group_[i]; j++) {
|
||||
for (uint_t j = 0; j < Base::num_states_in_group_[i]; j++) {
|
||||
uint_t ishot =
|
||||
Base::global_state_index_ + Base::top_state_of_group_[i] + j;
|
||||
uint_t iparam = ishot / Base::num_shots_per_bind_param_;
|
||||
|
@ -441,13 +439,13 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
|
|||
Base::num_groups_, apply_ops_lambda, Base::num_groups_);
|
||||
|
||||
for (auto &res : par_results) {
|
||||
for (i = 0; i < Base::num_bind_params_; i++) {
|
||||
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
|
||||
(result_it + i)->combine(std::move(res[i]));
|
||||
}
|
||||
}
|
||||
|
||||
// collect measured bits and copy memory
|
||||
for (i = 0; i < n_shots; i++) {
|
||||
for (uint_t i = 0; i < n_shots; i++) {
|
||||
if (Base::num_process_per_experiment_ > 1) {
|
||||
Base::states_[i].qreg().read_measured_data(
|
||||
Base::cregs_[Base::global_state_index_ + i_begin + i]);
|
||||
|
@ -469,7 +467,7 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
|
|||
if (Base::num_process_per_experiment_ > 1) {
|
||||
Base::gather_creg_memory(Base::cregs_, Base::state_index_begin_);
|
||||
|
||||
for (i = 0; i < circ_opt.shots; i++) {
|
||||
for (uint_t i = 0; i < circ_opt.shots; i++) {
|
||||
uint_t iparam = i / Base::num_shots_per_bind_param_;
|
||||
(result_it + iparam)
|
||||
->save_count_data(Base::cregs_[i], Base::save_creg_memory_);
|
||||
|
@ -487,7 +485,7 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
|
|||
}
|
||||
if (nDev > Base::num_groups_)
|
||||
nDev = Base::num_groups_;
|
||||
for (i = 0; i < Base::num_bind_params_; i++)
|
||||
for (uint_t i = 0; i < Base::num_bind_params_; i++)
|
||||
(result_it + i)
|
||||
->metadata.add(nDev, "batched_shots_optimization_parallel_gpus");
|
||||
}
|
||||
|
@ -516,7 +514,7 @@ void BatchShotsExecutor<state_t>::apply_ops_batched_shots_for_group(
|
|||
Base::states_[j].qreg().read_measured_data(Base::states_[j].creg());
|
||||
std::vector<Operations::Op> nops = noise.sample_noise_loc(
|
||||
*op, rng[j - Base::top_state_of_group_[i_group]]);
|
||||
for (int_t k = 0; k < nops.size(); k++) {
|
||||
for (uint_t k = 0; k < nops.size(); k++) {
|
||||
Base::states_[j].apply_op(
|
||||
nops[k], *result_it,
|
||||
rng[j - Base::top_state_of_group_[i_group]], false);
|
||||
|
@ -534,13 +532,13 @@ void BatchShotsExecutor<state_t>::apply_ops_batched_shots_for_group(
|
|||
uint_t non_pauli_gate_count = 0;
|
||||
if (num_inner_threads > 1) {
|
||||
#pragma omp parallel for reduction(+: count_ops,non_pauli_gate_count) num_threads(num_inner_threads)
|
||||
for (int_t j = 0; j < count; j++) {
|
||||
for (int_t j = 0; j < (int_t)count; j++) {
|
||||
noise_ops[j] = noise.sample_noise_loc(*op, rng[j]);
|
||||
|
||||
if (!(noise_ops[j].size() == 0 ||
|
||||
(noise_ops[j].size() == 1 && noise_ops[j][0].name == "id"))) {
|
||||
count_ops++;
|
||||
for (int_t k = 0; k < noise_ops[j].size(); k++) {
|
||||
for (uint_t k = 0; k < noise_ops[j].size(); k++) {
|
||||
if (noise_ops[j][k].name != "id" && noise_ops[j][k].name != "x" &&
|
||||
noise_ops[j][k].name != "y" && noise_ops[j][k].name != "z" &&
|
||||
noise_ops[j][k].name != "pauli") {
|
||||
|
@ -551,13 +549,13 @@ void BatchShotsExecutor<state_t>::apply_ops_batched_shots_for_group(
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t j = 0; j < count; j++) {
|
||||
for (uint_t j = 0; j < count; j++) {
|
||||
noise_ops[j] = noise.sample_noise_loc(*op, rng[j]);
|
||||
|
||||
if (!(noise_ops[j].size() == 0 ||
|
||||
(noise_ops[j].size() == 1 && noise_ops[j][0].name == "id"))) {
|
||||
count_ops++;
|
||||
for (int_t k = 0; k < noise_ops[j].size(); k++) {
|
||||
for (uint_t k = 0; k < noise_ops[j].size(); k++) {
|
||||
if (noise_ops[j][k].name != "id" && noise_ops[j][k].name != "x" &&
|
||||
noise_ops[j][k].name != "y" && noise_ops[j][k].name != "z" &&
|
||||
noise_ops[j][k].name != "pauli") {
|
||||
|
@ -584,7 +582,7 @@ void BatchShotsExecutor<state_t>::apply_ops_batched_shots_for_group(
|
|||
continue;
|
||||
}
|
||||
// call apply_op for each state
|
||||
for (int_t j = 0; j < Base::num_states_in_group_[i_group]; j++) {
|
||||
for (uint_t j = 0; j < Base::num_states_in_group_[i_group]; j++) {
|
||||
uint_t is = Base::top_state_of_group_[i_group] + j;
|
||||
uint_t ip =
|
||||
(Base::global_state_index_ + is) / Base::num_shots_per_bind_param_;
|
||||
|
@ -602,13 +600,13 @@ template <class state_t>
|
|||
void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
|
||||
const int_t i_group, const std::vector<std::vector<Operations::Op>> &ops,
|
||||
ResultItr result_it, std::vector<RngEngine> &rng) {
|
||||
int_t i, j, k, count, nop, pos = 0;
|
||||
uint_t count;
|
||||
uint_t istate = Base::top_state_of_group_[i_group];
|
||||
count = ops.size();
|
||||
|
||||
reg_t mask(count);
|
||||
std::vector<bool> finished(count, false);
|
||||
for (i = 0; i < count; i++) {
|
||||
for (uint_t i = 0; i < count; i++) {
|
||||
int_t cond_reg = -1;
|
||||
|
||||
if (finished[i])
|
||||
|
@ -620,7 +618,7 @@ void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
|
|||
mask[i] = 1;
|
||||
|
||||
// find same ops to be exectuted in a batch
|
||||
for (j = i + 1; j < count; j++) {
|
||||
for (uint_t j = i + 1; j < count; j++) {
|
||||
if (finished[j]) {
|
||||
mask[j] = 0;
|
||||
continue;
|
||||
|
@ -638,7 +636,7 @@ void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
|
|||
}
|
||||
|
||||
mask[j] = true;
|
||||
for (k = 0; k < ops[i].size(); k++) {
|
||||
for (uint_t k = 0; k < ops[i].size(); k++) {
|
||||
if (ops[i][k].conditional) {
|
||||
cond_reg = ops[i][k].conditional_reg;
|
||||
}
|
||||
|
@ -657,7 +655,7 @@ void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
|
|||
cond_reg, mask);
|
||||
|
||||
// batched execution on same ops
|
||||
for (k = 0; k < ops[i].size(); k++) {
|
||||
for (uint_t k = 0; k < ops[i].size(); k++) {
|
||||
Operations::Op cop = ops[i][k];
|
||||
|
||||
// mark op conditional to mask shots
|
||||
|
@ -666,7 +664,7 @@ void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
|
|||
|
||||
if (!apply_batched_op(istate, cop, result_it, rng, false)) {
|
||||
// call apply_op for each state
|
||||
for (int_t j = 0; j < Base::num_states_in_group_[i_group]; j++) {
|
||||
for (uint_t j = 0; j < Base::num_states_in_group_[i_group]; j++) {
|
||||
uint_t is = Base::top_state_of_group_[i_group] + j;
|
||||
uint_t ip = (Base::global_state_index_ + is) /
|
||||
Base::num_shots_per_bind_param_;
|
||||
|
@ -688,7 +686,7 @@ void BatchShotsExecutor<state_t>::apply_batched_expval(const int_t istate,
|
|||
ResultItr result) {
|
||||
std::vector<double> val;
|
||||
bool variance = (op.type == Operations::OpType::save_expval_var);
|
||||
for (int_t i = 0; i < op.expval_params.size(); i++) {
|
||||
for (uint_t i = 0; i < op.expval_params.size(); i++) {
|
||||
std::complex<double> cprm;
|
||||
|
||||
if (variance)
|
||||
|
@ -706,7 +704,7 @@ void BatchShotsExecutor<state_t>::apply_batched_expval(const int_t istate,
|
|||
return;
|
||||
|
||||
if (variance) {
|
||||
for (int_t i = 0; i < val.size() / 2; i++) {
|
||||
for (uint_t i = 0; i < val.size() / 2; i++) {
|
||||
uint_t ip = (Base::global_state_index_ + istate + i) /
|
||||
Base::num_shots_per_bind_param_;
|
||||
|
||||
|
@ -719,7 +717,7 @@ void BatchShotsExecutor<state_t>::apply_batched_expval(const int_t istate,
|
|||
op.save_type);
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < val.size(); i++) {
|
||||
for (uint_t i = 0; i < val.size(); i++) {
|
||||
uint_t ip = (Base::global_state_index_ + istate + i) /
|
||||
Base::num_shots_per_bind_param_;
|
||||
|
||||
|
@ -737,7 +735,7 @@ void BatchShotsExecutor<state_t>::batched_measure_sampler(
|
|||
InputIterator first_meas, InputIterator last_meas, uint_t shots,
|
||||
uint_t i_group, ResultItr result, std::vector<RngEngine> &rng) {
|
||||
uint_t par_states = 1;
|
||||
if (Base::max_parallel_threads_ >= Base::num_groups_ * 2) {
|
||||
if ((uint_t)Base::max_parallel_threads_ >= Base::num_groups_ * 2) {
|
||||
par_states =
|
||||
std::min((uint_t)(Base::max_parallel_threads_ / Base::num_groups_),
|
||||
Base::num_states_in_group_[i_group]);
|
||||
|
@ -798,7 +796,7 @@ void BatchShotsExecutor<state_t>::batched_measure_sampler(
|
|||
state_end = Base::num_states_in_group_[i_group] * (i + 1) / par_states;
|
||||
|
||||
for (; i_state < state_end; i_state++) {
|
||||
for (int_t j = 0; j < shots; j++)
|
||||
for (uint_t j = 0; j < shots; j++)
|
||||
rnd_shots[i_state * shots + j] =
|
||||
rng[i_state].rand(0, 1) + (double)i_state;
|
||||
}
|
||||
|
@ -830,14 +828,14 @@ void BatchShotsExecutor<state_t>::batched_measure_sampler(
|
|||
uint_t is = Base::top_state_of_group_[i_group] + i_state;
|
||||
uint_t ip = (Base::global_state_index_ + is);
|
||||
|
||||
for (int_t i = 0; i < shots; i++) {
|
||||
for (uint_t i = 0; i < shots; i++) {
|
||||
ClassicalRegister creg;
|
||||
creg.initialize(num_memory, num_registers);
|
||||
reg_t all_samples(meas_qubits.size());
|
||||
|
||||
uint_t val = allbit_samples[i_state * shots + i] & mask;
|
||||
reg_t allbit_sample = Utils::int2reg(val, 2, Base::num_qubits_);
|
||||
for (int_t mq = 0; mq < meas_qubits.size(); mq++) {
|
||||
for (uint_t mq = 0; mq < meas_qubits.size(); mq++) {
|
||||
all_samples[mq] = allbit_sample[meas_qubits[mq]];
|
||||
}
|
||||
|
||||
|
@ -870,7 +868,7 @@ void BatchShotsExecutor<state_t>::batched_measure_sampler(
|
|||
auto time_taken =
|
||||
std::chrono::duration<double>(myclock_t::now() - timer_start).count();
|
||||
|
||||
for (int_t i_state = 0; i_state < Base::num_states_in_group_[i_group];
|
||||
for (uint_t i_state = 0; i_state < Base::num_states_in_group_[i_group];
|
||||
i_state++) {
|
||||
uint_t ip = Base::global_state_index_ + Base::top_state_of_group_[i_group] +
|
||||
i_state;
|
||||
|
|
|
@ -22,13 +22,22 @@ namespace AER {
|
|||
|
||||
namespace Chunk {
|
||||
|
||||
void get_qubits_inout(const int chunk_qubits, const reg_t &qubits,
|
||||
reg_t &qubits_in, reg_t &qubits_out);
|
||||
void get_inout_ctrl_qubits(const Operations::Op &op, const uint_t num_qubits,
|
||||
reg_t &qubits_in, reg_t &qubits_out);
|
||||
Operations::Op correct_gate_op_in_chunk(const Operations::Op &op,
|
||||
reg_t &qubits_in);
|
||||
void block_diagonal_matrix(const uint_t gid, const uint_t chunk_bits,
|
||||
reg_t &qubits, cvector_t &diag);
|
||||
|
||||
void get_qubits_inout(const int chunk_qubits, const reg_t &qubits,
|
||||
reg_t &qubits_in, reg_t &qubits_out) {
|
||||
int_t i;
|
||||
uint_t i;
|
||||
qubits_in.clear();
|
||||
qubits_out.clear();
|
||||
for (i = 0; i < qubits.size(); i++) {
|
||||
if (qubits[i] < chunk_qubits) { // in chunk
|
||||
if (qubits[i] < (uint_t)chunk_qubits) { // in chunk
|
||||
qubits_in.push_back(qubits[i]);
|
||||
} else {
|
||||
qubits_out.push_back(qubits[i]);
|
||||
|
@ -40,7 +49,7 @@ void get_inout_ctrl_qubits(const Operations::Op &op, const uint_t num_qubits,
|
|||
reg_t &qubits_in, reg_t &qubits_out) {
|
||||
if (op.type == Operations::OpType::gate &&
|
||||
(op.name[0] == 'c' || op.name.find("mc") == 0)) {
|
||||
for (int i = 0; i < op.qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < op.qubits.size(); i++) {
|
||||
if (op.qubits[i] < num_qubits)
|
||||
qubits_in.push_back(op.qubits[i]);
|
||||
else
|
||||
|
|
|
@ -114,7 +114,7 @@ protected:
|
|||
uint_t distributed_group_; // group id of distribution
|
||||
int_t distributed_proc_bits_; // distributed_procs_=2^distributed_proc_bits_
|
||||
// (if nprocs != power of 2, set -1)
|
||||
int num_process_per_experiment_ = 1;
|
||||
uint_t num_process_per_experiment_ = 1;
|
||||
|
||||
#ifdef AER_MPI
|
||||
// communicator group to simulate a circuit (for multi-experiments)
|
||||
|
@ -215,6 +215,20 @@ protected:
|
|||
void gather_creg_memory(std::vector<ClassicalRegister> &cregs,
|
||||
reg_t &shot_index);
|
||||
#endif
|
||||
|
||||
// Sample n-measurement outcomes without applying the measure operation
|
||||
// to the system state
|
||||
virtual std::vector<reg_t> sample_measure(const reg_t &qubits, uint_t shots,
|
||||
RngEngine &rng) const {
|
||||
std::vector<reg_t> ret;
|
||||
return ret;
|
||||
};
|
||||
virtual std::vector<reg_t> sample_measure(state_t &state, const reg_t &qubits,
|
||||
uint_t shots,
|
||||
std::vector<RngEngine> &rng) const {
|
||||
// this is for single rng, impement in sub-class for multi-shots case
|
||||
return state.sample_measure(qubits, shots, rng[0]);
|
||||
}
|
||||
};
|
||||
|
||||
template <class state_t>
|
||||
|
@ -437,7 +451,6 @@ void Executor<state_t>::set_parallelization(const Config &config,
|
|||
distributed_group_ = myrank_ / distributed_procs_;
|
||||
|
||||
distributed_proc_bits_ = 0;
|
||||
int proc_bits = 0;
|
||||
uint_t p = distributed_procs_;
|
||||
while (p > 1) {
|
||||
if ((p & 1) != 0) { // procs is not power of 2
|
||||
|
@ -518,11 +531,11 @@ void Executor<state_t>::set_parallelization(const Config &config,
|
|||
// Parallel shots is > 1
|
||||
// Limit parallel shots by available memory and number of shots
|
||||
// And assign the remaining threads to state update
|
||||
int circ_memory_mb =
|
||||
required_memory_mb(config, circ, noise) / num_process_per_experiment_;
|
||||
int circ_memory_mb = (int)(required_memory_mb(config, circ, noise) /
|
||||
num_process_per_experiment_);
|
||||
size_t mem_size =
|
||||
(sim_device_ == Device::GPU) ? max_gpu_memory_mb_ : max_memory_mb_;
|
||||
if (mem_size < circ_memory_mb)
|
||||
if (mem_size < (size_t)circ_memory_mb)
|
||||
throw std::runtime_error(
|
||||
"a circuit requires more memory than max_memory_mb.");
|
||||
// If circ memory is 0, set it to 1 so that we don't divide by zero
|
||||
|
@ -561,7 +574,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
|
|||
rng.set_seed(circ.seed);
|
||||
|
||||
// Output data container
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.set_config(config);
|
||||
result.metadata.add(method_names_.at(method), "method");
|
||||
|
@ -602,7 +615,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
|
|||
// Ideal circuit
|
||||
if (noise.is_ideal()) {
|
||||
opt_circ = circ;
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.metadata.add("ideal", "noise");
|
||||
}
|
||||
|
@ -610,7 +623,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
|
|||
// Readout error only
|
||||
else if (noise.has_quantum_errors() == false) {
|
||||
opt_circ = noise.sample_noise(circ, rng);
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.metadata.add("readout", "noise");
|
||||
}
|
||||
|
@ -621,7 +634,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
|
|||
// Sample noise using SuperOp method
|
||||
opt_circ =
|
||||
noise.sample_noise(circ, rng, Noise::NoiseModel::Method::superop);
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.metadata.add("superop", "noise");
|
||||
}
|
||||
|
@ -631,7 +644,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
|
|||
noise.opset().contains(Operations::OpType::superop)) {
|
||||
opt_circ =
|
||||
noise.sample_noise(circ, rng, Noise::NoiseModel::Method::kraus);
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.metadata.add("kraus", "noise");
|
||||
}
|
||||
|
@ -639,7 +652,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
|
|||
// General circuit noise sampling
|
||||
else {
|
||||
noise_sampling = true;
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.metadata.add("circuit", "noise");
|
||||
}
|
||||
|
@ -658,7 +671,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
|
|||
run_circuit_shots(opt_circ, noise, config, rng, result_it, false);
|
||||
}
|
||||
}
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
// Report success
|
||||
result.status = ExperimentResult::Status::completed;
|
||||
|
@ -692,7 +705,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
|
|||
auto timer_stop = myclock_t::now(); // stop timer
|
||||
double time_taken =
|
||||
std::chrono::duration<double>(timer_stop - timer_start).count();
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.time_taken = time_taken;
|
||||
// save time also to metadata to pick time in primitive result
|
||||
|
@ -701,7 +714,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
|
|||
}
|
||||
// If an exception occurs during execution, catch it and pass it to the output
|
||||
catch (std::exception &e) {
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.status = ExperimentResult::Status::error;
|
||||
result.message = e.what();
|
||||
|
@ -816,30 +829,26 @@ void Executor<state_t>::run_circuit_shots(
|
|||
std::vector<ClassicalRegister> cregs;
|
||||
reg_t shot_begin(distributed_procs_);
|
||||
reg_t shot_end(distributed_procs_);
|
||||
for (int_t i = 0; i < distributed_procs_; i++) {
|
||||
for (uint_t i = 0; i < distributed_procs_; i++) {
|
||||
shot_begin[i] = num_shots * i / distributed_procs_;
|
||||
shot_end[i] = num_shots * (i + 1) / distributed_procs_;
|
||||
}
|
||||
uint_t num_local_shots =
|
||||
shot_end[distributed_rank_] - shot_begin[distributed_rank_];
|
||||
|
||||
int max_matrix_qubits;
|
||||
auto fusion_pass = transpile_fusion(circ.opset(), config);
|
||||
int max_matrix_qubits = 1;
|
||||
if (!sample_noise) {
|
||||
Noise::NoiseModel dummy_noise;
|
||||
state_t dummy_state;
|
||||
auto fusion_pass = transpile_fusion(circ.opset(), config);
|
||||
ExperimentResult fusion_result;
|
||||
auto fusion_pass = transpile_fusion(circ.opset(), config);
|
||||
fusion_pass.optimize_circuit(circ, dummy_noise, dummy_state.opset(),
|
||||
fusion_result);
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
ExperimentResult &result = *(result_it + i);
|
||||
result.metadata.copy(fusion_result.metadata);
|
||||
}
|
||||
max_matrix_qubits = get_max_matrix_qubits(circ);
|
||||
} else {
|
||||
max_matrix_qubits = get_max_matrix_qubits(circ);
|
||||
max_matrix_qubits = std::max(max_matrix_qubits, (int)fusion_pass.max_qubit);
|
||||
}
|
||||
num_bind_params_ = circ.num_bind_params;
|
||||
|
||||
|
@ -857,9 +866,9 @@ void Executor<state_t>::run_circuit_shots(
|
|||
init_rng, max_matrix_qubits,
|
||||
num_local_shots](int_t i) {
|
||||
state_t state;
|
||||
uint_t i_shot, shot_end;
|
||||
uint_t i_shot, e_shot;
|
||||
i_shot = num_local_shots * i / par_shots;
|
||||
shot_end = num_local_shots * (i + 1) / par_shots;
|
||||
e_shot = num_local_shots * (i + 1) / par_shots;
|
||||
|
||||
auto fusion_pass = transpile_fusion(circ.opset(), config);
|
||||
|
||||
|
@ -871,7 +880,7 @@ void Executor<state_t>::run_circuit_shots(
|
|||
state.set_distribution(this->num_process_per_experiment_);
|
||||
state.set_num_global_qubits(circ.num_qubits);
|
||||
|
||||
for (; i_shot < shot_end; i_shot++) {
|
||||
for (; i_shot < e_shot; i_shot++) {
|
||||
RngEngine rng;
|
||||
uint_t shot_index = shot_begin[distributed_rank_] + i_shot;
|
||||
uint_t iparam = shot_index / circ.shots;
|
||||
|
@ -892,7 +901,9 @@ void Executor<state_t>::run_circuit_shots(
|
|||
circ_opt = noise.sample_noise(circ, rng);
|
||||
fusion_pass.optimize_circuit(circ_opt, dummy_noise, state.opset(),
|
||||
result);
|
||||
state.set_max_matrix_qubits(get_max_matrix_qubits(circ_opt));
|
||||
int max_bits = get_max_matrix_qubits(circ_opt);
|
||||
state.set_max_matrix_qubits(
|
||||
std::max(max_bits, (int)fusion_pass.max_qubit));
|
||||
} else
|
||||
state.set_max_matrix_qubits(max_matrix_qubits);
|
||||
|
||||
|
@ -947,11 +958,11 @@ void Executor<state_t>::run_circuit_shots(
|
|||
num_shots = circ.shots * circ.num_bind_params;
|
||||
auto save_cregs = [this, &par_results, par_shots, num_shots, circ,
|
||||
cregs](int_t i) {
|
||||
uint_t i_shot, shot_end;
|
||||
uint_t i_shot, e_shot;
|
||||
i_shot = num_shots * i / par_shots;
|
||||
shot_end = num_shots * (i + 1) / par_shots;
|
||||
e_shot = num_shots * (i + 1) / par_shots;
|
||||
|
||||
for (; i_shot < shot_end; i_shot++) {
|
||||
for (; i_shot < e_shot; i_shot++) {
|
||||
uint_t ip = i_shot / circ.shots;
|
||||
par_results[i][ip].save_count_data(cregs[i_shot], save_creg_memory_);
|
||||
}
|
||||
|
@ -962,12 +973,12 @@ void Executor<state_t>::run_circuit_shots(
|
|||
#endif
|
||||
|
||||
for (auto &res : par_results) {
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
(result_it + i)->combine(std::move(res[i]));
|
||||
}
|
||||
}
|
||||
if (sim_device_ == Device::GPU) {
|
||||
for (int_t i = 0; i < circ.num_bind_params; i++) {
|
||||
for (uint_t i = 0; i < circ.num_bind_params; i++) {
|
||||
#ifdef AER_CUSTATEVEC
|
||||
(result_it + i)->metadata.add(cuStateVec_enable_, "cuStateVec_enable");
|
||||
#endif
|
||||
|
@ -1292,7 +1303,7 @@ int_t Executor<state_t>::get_matrix_bits(const Operations::Op &op) const {
|
|||
template <class state_t>
|
||||
int_t Executor<state_t>::get_max_matrix_qubits(const Circuit &circ) const {
|
||||
int_t max_bits = 0;
|
||||
int_t i;
|
||||
uint_t i;
|
||||
|
||||
if (sim_device_ != Device::CPU) { // Only applicable for GPU (and Thrust)
|
||||
for (i = 0; i < circ.ops.size(); i++) {
|
||||
|
@ -1315,7 +1326,6 @@ bool Executor<state_t>::has_statevector_ops(const Circuit &circ) const {
|
|||
template <class state_t>
|
||||
void Executor<state_t>::gather_creg_memory(
|
||||
std::vector<ClassicalRegister> &cregs, reg_t &shot_index) {
|
||||
int_t i, j;
|
||||
uint_t n64, i64, ibit, num_local_shots;
|
||||
|
||||
if (distributed_procs_ == 0)
|
||||
|
@ -1337,9 +1347,9 @@ void Executor<state_t>::gather_creg_memory(
|
|||
|
||||
reg_t bin_memory(n64 * num_local_shots, 0);
|
||||
// compress memory string to binary
|
||||
#pragma omp parallel for private(i, j, i64, ibit)
|
||||
for (i = 0; i < num_local_shots; i++) {
|
||||
for (j = 0; j < size; j++) {
|
||||
#pragma omp parallel for private(i64, ibit)
|
||||
for (int_t i = 0; i < (int_t)num_local_shots; i++) {
|
||||
for (int_t j = 0; j < size; j++) {
|
||||
i64 = j >> 6;
|
||||
ibit = j & 63;
|
||||
if (cregs[shot_index[distributed_rank_] + i].creg_memory()[j] == '1') {
|
||||
|
@ -1352,21 +1362,22 @@ void Executor<state_t>::gather_creg_memory(
|
|||
std::vector<int> recv_counts(distributed_procs_);
|
||||
std::vector<int> recv_offset(distributed_procs_);
|
||||
|
||||
for (i = 0; i < distributed_procs_ - 1; i++) {
|
||||
for (uint_t i = 0; i < distributed_procs_ - 1; i++) {
|
||||
recv_offset[i] = shot_index[i];
|
||||
recv_counts[i] = shot_index[i + 1] - shot_index[i];
|
||||
}
|
||||
recv_offset[distributed_procs_ - 1] = shot_index[distributed_procs_ - 1];
|
||||
recv_counts[i] = cregs.size() - shot_index[distributed_procs_ - 1];
|
||||
recv_counts[distributed_procs_ - 1] =
|
||||
cregs.size() - shot_index[distributed_procs_ - 1];
|
||||
|
||||
MPI_Allgatherv(&bin_memory[0], n64 * num_local_shots, MPI_UINT64_T, &recv[0],
|
||||
&recv_counts[0], &recv_offset[0], MPI_UINT64_T,
|
||||
distributed_comm_);
|
||||
|
||||
// store gathered memory
|
||||
#pragma omp parallel for private(i, j, i64, ibit)
|
||||
for (i = 0; i < cregs.size(); i++) {
|
||||
for (j = 0; j < size; j++) {
|
||||
#pragma omp parallel for private(i64, ibit)
|
||||
for (int_t i = 0; i < (int_t)cregs.size(); i++) {
|
||||
for (int_t j = 0; j < size; j++) {
|
||||
i64 = j >> 6;
|
||||
ibit = j & 63;
|
||||
if (((recv[i * n64 + i64] >> ibit) & 1) == 1)
|
||||
|
|
|
@ -242,13 +242,13 @@ void DensityMatrix<data_t>::initialize_from_vector(list_t &&vec) {
|
|||
|
||||
template <typename data_t>
|
||||
void DensityMatrix<data_t>::transpose() {
|
||||
const size_t rows = BaseMatrix::num_rows();
|
||||
const int_t rows = BaseMatrix::num_rows();
|
||||
#pragma omp parallel for if (BaseVector::num_qubits_ > \
|
||||
BaseVector::omp_threshold_ && \
|
||||
BaseVector::omp_threads_ > 1) \
|
||||
num_threads(BaseVector::omp_threads_)
|
||||
for (int_t i = 0; i < rows; i++) {
|
||||
for (int_t j = i + 1; j < rows; j++) {
|
||||
for (uint_t j = i + 1; j < rows; j++) {
|
||||
const uint_t pos_a = i * rows + j;
|
||||
const uint_t pos_b = j * rows + i;
|
||||
const auto tmp = BaseVector::data_[pos_a];
|
||||
|
@ -483,7 +483,7 @@ DensityMatrix<data_t>::expval_pauli(const reg_t &qubits,
|
|||
auto lambda = [&](const int_t i, double &val_re, double &val_im) -> void {
|
||||
(void)val_im; // unused
|
||||
auto idx_vec = ((i << 1) & mask_u) | (i & mask_l);
|
||||
auto idx_mat = idx_vec ^ x_mask + nrows * idx_vec;
|
||||
auto idx_mat = (idx_vec ^ x_mask) + nrows * idx_vec;
|
||||
// Since rho is hermitian rho[i, j] + rho[j, i] = 2 real(rho[i, j])
|
||||
auto val = 2 * std::real(phase * BaseVector::data_[idx_mat]);
|
||||
if (z_mask && (AER::Utils::popcount(idx_vec & z_mask) & 1)) {
|
||||
|
@ -511,7 +511,7 @@ double DensityMatrix<data_t>::expval_pauli_non_diagonal_chunk(
|
|||
|
||||
auto lambda = [&](const int_t i, double &val_re, double &val_im) -> void {
|
||||
(void)val_im; // unused
|
||||
auto idx_mat = i ^ x_mask + nrows * i;
|
||||
auto idx_mat = (i ^ x_mask) + nrows * i;
|
||||
auto val = std::real(phase * BaseVector::data_[idx_mat]);
|
||||
if (z_mask && (AER::Utils::popcount(i & z_mask) & 1)) {
|
||||
val = -val;
|
||||
|
|
|
@ -40,6 +40,7 @@ class Executor : public CircuitExecutor::ParallelStateExecutor<state_t>,
|
|||
using Base = CircuitExecutor::MultiStateExecutor<state_t>;
|
||||
using BasePar = CircuitExecutor::ParallelStateExecutor<state_t>;
|
||||
using BaseBatch = CircuitExecutor::BatchShotsExecutor<state_t>;
|
||||
using Base::sample_measure;
|
||||
|
||||
protected:
|
||||
public:
|
||||
|
@ -203,14 +204,14 @@ protected:
|
|||
//-------------------------------------------------------------------------
|
||||
template <class densmat_t>
|
||||
void Executor<densmat_t>::initialize_qreg(uint_t num_qubits) {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++) {
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++) {
|
||||
Base::states_[i].qreg().set_num_qubits(BasePar::chunk_bits_);
|
||||
}
|
||||
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
|
||||
if (Base::global_state_index_ + iChunk == 0) {
|
||||
Base::states_[iChunk].qreg().initialize();
|
||||
|
@ -220,7 +221,7 @@ void Executor<densmat_t>::initialize_qreg(uint_t num_qubits) {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++) {
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++) {
|
||||
if (Base::global_state_index_ + i == 0) {
|
||||
Base::states_[i].qreg().initialize();
|
||||
} else {
|
||||
|
@ -236,11 +237,10 @@ void Executor<densmat_t>::initialize_from_vector(const list_t &vec) {
|
|||
if ((1ull << (Base::num_qubits_ * 2)) == vec.size()) {
|
||||
BasePar::initialize_from_vector(vec);
|
||||
} else if ((1ull << (Base::num_qubits_ * 2)) == vec.size() * vec.size()) {
|
||||
int_t iChunk;
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
|
||||
uint_t irow_chunk = ((iChunk + Base::global_state_index_) >>
|
||||
((Base::num_qubits_ - BasePar::chunk_bits_)))
|
||||
|
@ -251,7 +251,7 @@ void Executor<densmat_t>::initialize_from_vector(const list_t &vec) {
|
|||
<< (BasePar::chunk_bits_);
|
||||
|
||||
// copy part of state for this chunk
|
||||
uint_t i, row, col;
|
||||
uint_t i;
|
||||
list_t vec1(1ull << BasePar::chunk_bits_);
|
||||
list_t vec2(1ull << BasePar::chunk_bits_);
|
||||
|
||||
|
@ -264,7 +264,7 @@ void Executor<densmat_t>::initialize_from_vector(const list_t &vec) {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (iChunk = 0; iChunk < Base::states_.size(); iChunk++) {
|
||||
for (uint_t iChunk = 0; iChunk < Base::states_.size(); iChunk++) {
|
||||
uint_t irow_chunk = ((iChunk + Base::global_state_index_) >>
|
||||
((Base::num_qubits_ - BasePar::chunk_bits_)))
|
||||
<< (BasePar::chunk_bits_);
|
||||
|
@ -274,7 +274,7 @@ void Executor<densmat_t>::initialize_from_vector(const list_t &vec) {
|
|||
<< (BasePar::chunk_bits_);
|
||||
|
||||
// copy part of state for this chunk
|
||||
uint_t i, row, col;
|
||||
uint_t i;
|
||||
list_t vec1(1ull << BasePar::chunk_bits_);
|
||||
list_t vec2(1ull << BasePar::chunk_bits_);
|
||||
|
||||
|
@ -515,12 +515,12 @@ void Executor<densmat_t>::apply_save_amplitudes_sq(const Operations::Op &op,
|
|||
throw std::invalid_argument(
|
||||
"Invalid save_amplitudes_sq instructions (empty params).");
|
||||
}
|
||||
const int_t size = op.int_params.size();
|
||||
const uint_t size = op.int_params.size();
|
||||
rvector_t amps_sq(size);
|
||||
|
||||
int_t iChunk;
|
||||
#pragma omp parallel for if (BasePar::chunk_omp_parallel_) private(iChunk)
|
||||
for (iChunk = 0; iChunk < Base::states_.size(); iChunk++) {
|
||||
for (iChunk = 0; iChunk < (int_t)Base::states_.size(); iChunk++) {
|
||||
uint_t irow, icol;
|
||||
irow = (Base::global_state_index_ + iChunk) >>
|
||||
((Base::num_qubits_ - BasePar::chunk_bits_));
|
||||
|
@ -529,7 +529,7 @@ void Executor<densmat_t>::apply_save_amplitudes_sq(const Operations::Op &op,
|
|||
if (irow != icol)
|
||||
continue;
|
||||
|
||||
for (int_t i = 0; i < size; ++i) {
|
||||
for (uint_t i = 0; i < size; ++i) {
|
||||
uint_t idx = BasePar::mapped_index(op.int_params[i]);
|
||||
if (idx >= (irow << BasePar::chunk_bits_) &&
|
||||
idx < ((irow + 1) << BasePar::chunk_bits_))
|
||||
|
@ -691,7 +691,7 @@ cmatrix_t Executor<densmat_t>::reduced_density_matrix(const reg_t &qubits,
|
|||
if (qubits.empty()) {
|
||||
reduced_state = cmatrix_t(1, 1);
|
||||
std::complex<double> sum = 0.0;
|
||||
for (int_t i = 0; i < Base::states_.size(); i++) {
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++) {
|
||||
sum += Base::states_[i].qreg().trace();
|
||||
}
|
||||
#ifdef AER_MPI
|
||||
|
@ -719,7 +719,7 @@ template <class densmat_t>
|
|||
cmatrix_t
|
||||
Executor<densmat_t>::reduced_density_matrix_helper(const reg_t &qubits,
|
||||
const reg_t &qubits_sorted) {
|
||||
int_t iChunk;
|
||||
uint_t iChunk;
|
||||
uint_t size = 1ull << (BasePar::chunk_bits_ * 2);
|
||||
uint_t mask = (1ull << (BasePar::chunk_bits_)) - 1;
|
||||
uint_t num_threads = Base::states_[0].qreg().get_omp_threads();
|
||||
|
@ -753,12 +753,12 @@ Executor<densmat_t>::reduced_density_matrix_helper(const reg_t &qubits,
|
|||
BasePar::recv_data(tmp.data(), size, 0, iChunk);
|
||||
#endif
|
||||
#pragma omp parallel for if (num_threads > 1) num_threads(num_threads)
|
||||
for (i = 0; i < size; i++) {
|
||||
for (i = 0; i < (int_t)size; i++) {
|
||||
uint_t irow = (i >> (BasePar::chunk_bits_)) + irow_chunk;
|
||||
uint_t icol = (i & mask) + icol_chunk;
|
||||
uint_t irow_out = 0;
|
||||
uint_t icol_out = 0;
|
||||
int j;
|
||||
uint_t j;
|
||||
for (j = 0; j < qubits.size(); j++) {
|
||||
if ((irow >> qubits[j]) & 1) {
|
||||
irow &= ~(1ull << qubits[j]);
|
||||
|
@ -803,7 +803,7 @@ void Executor<densmat_t>::apply_save_density_matrix(
|
|||
final_op);
|
||||
|
||||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -843,7 +843,7 @@ void Executor<densmat_t>::apply_save_state(CircuitExecutor::Branch &root,
|
|||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
if (final_op) {
|
||||
auto state = Base::states_[root.state_index()].move_to_matrix();
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -855,7 +855,7 @@ void Executor<densmat_t>::apply_save_state(CircuitExecutor::Branch &root,
|
|||
} else {
|
||||
auto state = Base::states_[root.state_index()].copy_to_matrix();
|
||||
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -878,7 +878,7 @@ void Executor<densmat_t>::apply_save_probs(CircuitExecutor::Branch &root,
|
|||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
if (op.type == Operations::OpType::save_probs_ket) {
|
||||
// Convert to ket dict
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -890,7 +890,7 @@ void Executor<densmat_t>::apply_save_probs(CircuitExecutor::Branch &root,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -918,7 +918,7 @@ void Executor<densmat_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
|
|||
Base::states_[root.state_index()].qreg().probability(op.int_params[i]);
|
||||
}
|
||||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -951,7 +951,7 @@ template <class densmat_t>
|
|||
rvector_t Executor<densmat_t>::measure_probs(const reg_t &qubits) const {
|
||||
uint_t dim = 1ull << qubits.size();
|
||||
rvector_t sum(dim, 0.0);
|
||||
int_t i, j, k;
|
||||
uint_t i, j, k;
|
||||
reg_t qubits_in_chunk;
|
||||
reg_t qubits_out_chunk;
|
||||
|
||||
|
@ -965,7 +965,7 @@ rvector_t Executor<densmat_t>::measure_probs(const reg_t &qubits) const {
|
|||
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for private(i, j, k)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++) {
|
||||
uint_t irow, icol;
|
||||
|
@ -1084,14 +1084,14 @@ template <class densmat_t>
|
|||
void Executor<densmat_t>::apply_reset(const reg_t &qubits) {
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
|
||||
Base::states_[iChunk].qreg().apply_reset(qubits);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().apply_reset(qubits);
|
||||
}
|
||||
}
|
||||
|
@ -1120,13 +1120,13 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
|
|||
mdiag[meas_state] = 1. / std::sqrt(meas_prob);
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t i = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++)
|
||||
Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag);
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag);
|
||||
}
|
||||
|
||||
|
@ -1135,13 +1135,13 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
|
|||
if (qubits[0] < BasePar::chunk_bits_) {
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t i = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++)
|
||||
Base::states_[i].qreg().apply_x(qubits[0]);
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().apply_x(qubits[0]);
|
||||
}
|
||||
} else {
|
||||
|
@ -1158,13 +1158,13 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
|
|||
mdiag[meas_state] = 1. / std::sqrt(meas_prob);
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t i = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++)
|
||||
Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag);
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag);
|
||||
}
|
||||
|
||||
|
@ -1183,7 +1183,7 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
|
|||
reg_t qubits_in_chunk;
|
||||
reg_t qubits_out_chunk;
|
||||
|
||||
for (int_t i = 0; i < qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits.size(); i++) {
|
||||
if (qubits[i] < BasePar::chunk_bits_) {
|
||||
qubits_in_chunk.push_back(qubits[i]);
|
||||
} else {
|
||||
|
@ -1193,18 +1193,18 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
|
|||
if (qubits_in_chunk.size() > 0) { // in chunk exchange
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t i = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++)
|
||||
Base::states_[i].qreg().apply_unitary_matrix(qubits, perm);
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().apply_unitary_matrix(qubits, perm);
|
||||
}
|
||||
}
|
||||
if (qubits_out_chunk.size() > 0) { // out of chunk exchange
|
||||
for (int_t i = 0; i < qubits_out_chunk.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits_out_chunk.size(); i++) {
|
||||
BasePar::apply_chunk_x(qubits_out_chunk[i]);
|
||||
BasePar::apply_chunk_x(qubits_out_chunk[i] +
|
||||
(Base::num_qubits_ - BasePar::chunk_bits_));
|
||||
|
@ -1225,13 +1225,13 @@ std::vector<reg_t> Executor<densmat_t>::sample_measure(const reg_t &qubits,
|
|||
rnds.push_back(rng.rand(0, 1));
|
||||
reg_t allbit_samples(shots, 0);
|
||||
|
||||
int_t i, j;
|
||||
uint_t i, j;
|
||||
std::vector<double> chunkSum(Base::states_.size() + 1, 0);
|
||||
double sum, localSum;
|
||||
// calculate per chunk sum
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for private(i)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++) {
|
||||
uint_t irow, icol;
|
||||
|
@ -1348,7 +1348,7 @@ Executor<state_t>::sample_measure_with_prob(CircuitExecutor::Branch &root,
|
|||
uint_t nshots = root.num_shots();
|
||||
reg_t shot_branch(nshots);
|
||||
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
shot_branch[i] = root.rng_shots()[i].rand_int(probs);
|
||||
}
|
||||
|
||||
|
@ -1382,11 +1382,11 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
|
|||
root.branches()[i]->add_op_after_branch(op);
|
||||
|
||||
if (final_state >= 0 && final_state != i) {
|
||||
Operations::Op op;
|
||||
op.type = OpType::gate;
|
||||
op.name = "x";
|
||||
op.qubits = qubits;
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
Operations::Op op2;
|
||||
op2.type = OpType::gate;
|
||||
op2.name = "x";
|
||||
op2.qubits = qubits;
|
||||
root.branches()[i]->add_op_after_branch(op2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1394,7 +1394,7 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
|
|||
else {
|
||||
// Diagonal matrix for projecting and renormalizing to measurement outcome
|
||||
const size_t dim = 1ULL << qubits.size();
|
||||
for (int_t i = 0; i < dim; i++) {
|
||||
for (uint_t i = 0; i < dim; i++) {
|
||||
cvector_t mdiag(dim, 0.);
|
||||
mdiag[i] = 1. / std::sqrt(meas_probs[i]);
|
||||
|
||||
|
@ -1404,20 +1404,20 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
|
|||
op.params = mdiag;
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
|
||||
if (final_state >= 0 && final_state != i) {
|
||||
if (final_state >= 0 && final_state != (int_t)i) {
|
||||
// build vectorized permutation matrix
|
||||
cvector_t perm(dim * dim, 0.);
|
||||
perm[final_state * dim + i] = 1.;
|
||||
perm[i * dim + final_state] = 1.;
|
||||
for (size_t j = 0; j < dim; j++) {
|
||||
if (j != final_state && j != i)
|
||||
if ((int_t)j != final_state && j != i)
|
||||
perm[j * dim + j] = 1.;
|
||||
}
|
||||
Operations::Op op;
|
||||
op.type = OpType::matrix;
|
||||
op.qubits = qubits;
|
||||
op.mats.push_back(Utils::devectorize_matrix(perm));
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
Operations::Op op2;
|
||||
op2.type = OpType::matrix;
|
||||
op2.qubits = qubits;
|
||||
op2.mats.push_back(Utils::devectorize_matrix(perm));
|
||||
root.branches()[i]->add_op_after_branch(op2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1430,41 +1430,23 @@ void Executor<state_t>::apply_measure(CircuitExecutor::Branch &root,
|
|||
rvector_t probs = sample_measure_with_prob(root, qubits);
|
||||
|
||||
// save result to cregs
|
||||
for (int_t i = 0; i < probs.size(); i++) {
|
||||
for (uint_t i = 0; i < probs.size(); i++) {
|
||||
const reg_t outcome = Utils::int2reg(i, 2, qubits.size());
|
||||
root.branches()[i]->creg().store_measure(outcome, cmemory, cregister);
|
||||
}
|
||||
|
||||
measure_reset_update(root, qubits, -1, probs);
|
||||
}
|
||||
/*
|
||||
template <class state_t>
|
||||
void Executor<state_t>::apply_reset(CircuitExecutor::Branch& root, const
|
||||
reg_t &qubits)
|
||||
{
|
||||
rvector_t probs = sample_measure_with_prob(root, qubits);
|
||||
|
||||
measure_reset_update(root, qubits, 0, probs);
|
||||
}
|
||||
*/
|
||||
|
||||
template <class state_t>
|
||||
std::vector<reg_t>
|
||||
Executor<state_t>::sample_measure(state_t &state, const reg_t &qubits,
|
||||
uint_t shots,
|
||||
std::vector<RngEngine> &rng) const {
|
||||
int_t i, j;
|
||||
uint_t i;
|
||||
std::vector<double> rnds;
|
||||
rnds.reserve(shots);
|
||||
|
||||
/*
|
||||
double norm = std::real( state.qreg().trace() );
|
||||
std::cout << " trace = " << norm<<std::endl;
|
||||
|
||||
for (i = 0; i < shots; ++i)
|
||||
rnds.push_back(rng[i].rand(0, norm));
|
||||
*/
|
||||
|
||||
for (i = 0; i < shots; ++i)
|
||||
rnds.push_back(rng[i].rand(0, 1));
|
||||
|
||||
|
@ -1496,15 +1478,15 @@ void Executor<densmat_t>::apply_kraus(const reg_t &qubits,
|
|||
const std::vector<cmatrix_t> &kmats) {
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
|
||||
Base::states_[iChunk].qreg().apply_superop_matrix(
|
||||
qubits, Utils::vectorize_matrix(Utils::kraus_superop(kmats)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().apply_superop_matrix(
|
||||
qubits, Utils::vectorize_matrix(Utils::kraus_superop(kmats)));
|
||||
}
|
||||
|
@ -1549,7 +1531,7 @@ template <class densmat_t>
|
|||
void Executor<densmat_t>::apply_multi_chunk_swap(const reg_t &qubits) {
|
||||
reg_t qubits_density;
|
||||
|
||||
for (int_t i = 0; i < qubits.size(); i += 2) {
|
||||
for (uint_t i = 0; i < qubits.size(); i += 2) {
|
||||
uint_t q0, q1;
|
||||
q0 = qubits[i * 2];
|
||||
q1 = qubits[i * 2 + 1];
|
||||
|
|
|
@ -362,7 +362,6 @@ void State<densmat_t>::initialize_qreg(uint_t num_qubits, densmat_t &&state) {
|
|||
|
||||
template <class densmat_t>
|
||||
void State<densmat_t>::initialize_omp() {
|
||||
uint_t i;
|
||||
BaseState::qreg_.set_omp_threshold(omp_qubit_threshold_);
|
||||
if (BaseState::threads_ > 0)
|
||||
BaseState::qreg_.set_omp_threads(
|
||||
|
@ -404,7 +403,6 @@ void State<densmat_t>::set_config(const Config &config) {
|
|||
|
||||
// Set threshold for truncating snapshots
|
||||
json_chop_threshold_ = config.chop_threshold;
|
||||
uint_t i;
|
||||
BaseState::qreg_.set_json_chop_threshold(json_chop_threshold_);
|
||||
|
||||
// Set OMP threshold for state update functions
|
||||
|
@ -650,7 +648,7 @@ void State<densmat_t>::apply_gate(const Operations::Op &op) {
|
|||
}
|
||||
if (qubits_out.size() > 0) {
|
||||
uint_t mask = 0;
|
||||
for (int i = 0; i < qubits_out.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits_out.size(); i++) {
|
||||
mask |= (1ull << (qubits_out[i] - BaseState::qreg_.num_qubits()));
|
||||
}
|
||||
if ((BaseState::qreg_.chunk_index() & mask) != mask) {
|
||||
|
@ -670,7 +668,7 @@ void State<densmat_t>::apply_gate(const Operations::Op &op) {
|
|||
else if (ctrl_chunk)
|
||||
apply_gate_statevector(new_op);
|
||||
else {
|
||||
for (int i = 0; i < new_op.qubits.size(); i++)
|
||||
for (uint_t i = 0; i < new_op.qubits.size(); i++)
|
||||
new_op.qubits[i] += BaseState::qreg_.num_qubits();
|
||||
apply_gate_statevector(new_op);
|
||||
}
|
||||
|
@ -861,7 +859,7 @@ void State<densmat_t>::apply_diagonal_unitary_matrix(const reg_t &qubits,
|
|||
if (qubits_in.size() == qubits.size()) {
|
||||
BaseState::qreg_.apply_diagonal_unitary_matrix(qubits, diag);
|
||||
} else {
|
||||
for (int_t i = 0; i < qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits.size(); i++) {
|
||||
if (qubits[i] >= BaseState::qreg_.num_qubits())
|
||||
qubits_row[i] = qubits[i] + BaseState::num_global_qubits_ -
|
||||
BaseState::qreg_.num_qubits();
|
||||
|
@ -871,7 +869,7 @@ void State<densmat_t>::apply_diagonal_unitary_matrix(const reg_t &qubits,
|
|||
diag_row);
|
||||
|
||||
reg_t qubits_chunk(qubits_in.size() * 2);
|
||||
for (int_t i = 0; i < qubits_in.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits_in.size(); i++) {
|
||||
qubits_chunk[i] = qubits_in[i];
|
||||
qubits_chunk[i + qubits_in.size()] =
|
||||
qubits_in[i] + BaseState::qreg_.num_qubits();
|
||||
|
|
|
@ -38,6 +38,7 @@ public:
|
|||
// Parent class aliases
|
||||
using BaseVector = QubitVectorThrust<data_t>;
|
||||
using BaseMatrix = UnitaryMatrixThrust<data_t>;
|
||||
using BaseVector::probabilities;
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// Constructors and Destructor
|
||||
|
@ -449,9 +450,9 @@ public:
|
|||
template <typename data_t>
|
||||
class DensityDiagMatMultNxN : public Chunk::GateFuncBase<data_t> {
|
||||
protected:
|
||||
int nqubits_;
|
||||
int total_bits_;
|
||||
int chunk_bits_;
|
||||
uint_t nqubits_;
|
||||
uint_t total_bits_;
|
||||
uint_t chunk_bits_;
|
||||
|
||||
public:
|
||||
DensityDiagMatMultNxN(const reg_t &qb, int total, int chunk) {
|
||||
|
@ -541,7 +542,7 @@ public:
|
|||
offset_ = 1ull << qubits[qubits.size() - 1];
|
||||
offset_sp_ = 1ull << (qubits[qubits.size() - 1] + chunk_qubits_);
|
||||
cmask_ = 0;
|
||||
for (int i = 0; i < qubits.size() - 1; i++)
|
||||
for (uint_t i = 0; i < qubits.size() - 1; i++)
|
||||
cmask_ |= (1ull << qubits[i]);
|
||||
enable_batch_ = batch;
|
||||
}
|
||||
|
@ -629,7 +630,7 @@ public:
|
|||
offset_ = 1ull << qubits[qubits.size() - 1];
|
||||
offset_sp_ = 1ull << (qubits[qubits.size() - 1] + chunk_qubits_);
|
||||
cmask_ = 0;
|
||||
for (int i = 0; i < qubits.size() - 1; i++)
|
||||
for (uint_t i = 0; i < qubits.size() - 1; i++)
|
||||
cmask_ |= (1ull << qubits[i]);
|
||||
enable_batch_ = batch;
|
||||
}
|
||||
|
@ -1081,7 +1082,7 @@ public:
|
|||
vec = this->data_;
|
||||
|
||||
idx_vec = ((i << 1) & mask_u_) | (i & mask_l_);
|
||||
idx_mat = idx_vec ^ x_mask_ + rows_ * idx_vec;
|
||||
idx_mat = (idx_vec ^ x_mask_) + rows_ * idx_vec;
|
||||
|
||||
q0 = vec[idx_mat];
|
||||
q0 = 2 * phase_ * q0;
|
||||
|
@ -1158,7 +1159,7 @@ public:
|
|||
|
||||
vec = this->data_;
|
||||
|
||||
idx_mat = i ^ x_mask_ + rows_ * i;
|
||||
idx_mat = (i ^ x_mask_) + rows_ * i;
|
||||
|
||||
q0 = vec[idx_mat];
|
||||
q0 = phase_ * q0;
|
||||
|
@ -1353,7 +1354,7 @@ template <typename data_t>
|
|||
void DensityMatrixThrust<data_t>::apply_batched_measure(
|
||||
const reg_t &qubits, std::vector<RngEngine> &rng, const reg_t &cmemory,
|
||||
const reg_t &cregs) {
|
||||
const int_t DIM = 1 << qubits.size();
|
||||
const uint_t DIM = 1 << qubits.size();
|
||||
uint_t i, count = 1;
|
||||
if (BaseVector::enable_batch_) {
|
||||
if (BaseVector::chunk_.pos() != 0) {
|
||||
|
@ -1503,7 +1504,7 @@ void DensityMatrixThrust<data_t>::apply_reset(const reg_t &qubits) {
|
|||
auto qubits_sorted = qubits;
|
||||
std::sort(qubits_sorted.begin(), qubits_sorted.end());
|
||||
|
||||
for (int_t i = 0; i < qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits.size(); i++) {
|
||||
qubits_sorted.push_back(qubits[i]);
|
||||
}
|
||||
BaseVector::chunk_.StoreUintParams(qubits_sorted);
|
||||
|
|
|
@ -426,8 +426,8 @@ scalar_t StabilizerState::ProposeFlip(unsigned flip_pos) {
|
|||
|
||||
scalar_t amp;
|
||||
amp.e = 2 * Q.e;
|
||||
amp.p = -1 *
|
||||
(AER::Utils::popcount(v)); // each Hadamard gate contributes 1/sqrt(2)
|
||||
// each Hadamard gate contributes 1/sqrt(2)
|
||||
amp.p = -1 * (int)(AER::Utils::popcount(v));
|
||||
bool isNonZero = true;
|
||||
|
||||
for (unsigned q = 0; q < n; q++) {
|
||||
|
|
|
@ -743,7 +743,7 @@ void State::apply_measure(const reg_t &qubits, const reg_t &cmemory,
|
|||
const reg_t &cregister, RngEngine &rng) {
|
||||
rvector_t rands;
|
||||
rands.reserve(qubits.size());
|
||||
for (int_t i = 0; i < qubits.size(); ++i)
|
||||
for (uint_t i = 0; i < qubits.size(); ++i)
|
||||
rands.push_back(rng.rand(0., 1.));
|
||||
reg_t outcome = qreg_.apply_measure(qubits, rands);
|
||||
creg().store_measure(outcome, cmemory, cregister);
|
||||
|
@ -777,10 +777,10 @@ State::sample_measure_using_apply_measure(const reg_t &qubits, uint_t shots,
|
|||
all_samples.resize(shots);
|
||||
std::vector<rvector_t> rnds_list;
|
||||
rnds_list.reserve(shots);
|
||||
for (int_t i = 0; i < shots; ++i) {
|
||||
for (uint_t i = 0; i < shots; ++i) {
|
||||
rvector_t rands;
|
||||
rands.reserve(qubits.size());
|
||||
for (int_t j = 0; j < qubits.size(); ++j)
|
||||
for (uint_t j = 0; j < qubits.size(); ++j)
|
||||
rands.push_back(rng.rand(0., 1.));
|
||||
rnds_list.push_back(rands);
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ void MPSSizeEstimator::initialize(uint_t nq) {
|
|||
qubit_map_.resize(nq);
|
||||
qubit_order_.resize(nq);
|
||||
|
||||
for (int_t i = 0; i < nq; i++) {
|
||||
for (uint_t i = 0; i < nq; i++) {
|
||||
tensor_size_[i].first = 1;
|
||||
tensor_size_[i].second = 1;
|
||||
|
||||
|
@ -66,7 +66,7 @@ void MPSSizeEstimator::initialize(uint_t nq) {
|
|||
|
||||
uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops) {
|
||||
uint_t n = ops.size();
|
||||
for (int_t i = 0; i < n; i++) {
|
||||
for (uint_t i = 0; i < n; i++) {
|
||||
switch (ops[i].type) {
|
||||
case Operations::OpType::gate:
|
||||
case Operations::OpType::matrix:
|
||||
|
@ -79,7 +79,7 @@ uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops) {
|
|||
}
|
||||
}
|
||||
uint_t max_bond = 0;
|
||||
for (int_t i = 0; i < num_qubits_ - 1; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_ - 1; i++) {
|
||||
if (max_bond < bond_dimensions_[i])
|
||||
max_bond = bond_dimensions_[i];
|
||||
}
|
||||
|
@ -89,16 +89,16 @@ uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops) {
|
|||
void MPSSizeEstimator::apply_qubits(const reg_t &qubits) {
|
||||
reg_t sorted(qubits.size());
|
||||
|
||||
for (int_t i = 0; i < qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits.size(); i++) {
|
||||
sorted[i] = qubit_map_[qubits[i]];
|
||||
}
|
||||
std::sort(sorted.begin(), sorted.end());
|
||||
|
||||
for (int_t i = 1; i < qubits.size(); i++) {
|
||||
for (uint_t i = 1; i < qubits.size(); i++) {
|
||||
reorder_qubit(sorted[i - 1], sorted[i]);
|
||||
}
|
||||
|
||||
for (int_t i = 0; i < qubits.size() - 1; i++) {
|
||||
for (uint_t i = 0; i < qubits.size() - 1; i++) {
|
||||
update(sorted[i]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -139,14 +139,6 @@ protected:
|
|||
void measure_sampler(InputIterator first_meas, InputIterator last_meas,
|
||||
Branch &branch, ResultItr result_it);
|
||||
|
||||
// sampling measure
|
||||
virtual std::vector<reg_t> sample_measure(state_t &state, const reg_t &qubits,
|
||||
uint_t shots,
|
||||
std::vector<RngEngine> &rng) const {
|
||||
// this is for single rng, impement in sub-class for multi-shots case
|
||||
return state.sample_measure(qubits, shots, rng[0]);
|
||||
}
|
||||
|
||||
void apply_save_expval(Branch &root, const Operations::Op &op,
|
||||
ResultItr result);
|
||||
};
|
||||
|
@ -192,7 +184,7 @@ void MultiStateExecutor<state_t>::set_distribution(uint_t num_states) {
|
|||
|
||||
state_index_begin_.resize(Base::distributed_procs_);
|
||||
state_index_end_.resize(Base::distributed_procs_);
|
||||
for (int_t i = 0; i < Base::distributed_procs_; i++) {
|
||||
for (uint_t i = 0; i < Base::distributed_procs_; i++) {
|
||||
state_index_begin_[i] = num_global_states_ * i / Base::distributed_procs_;
|
||||
state_index_end_[i] =
|
||||
num_global_states_ * (i + 1) / Base::distributed_procs_;
|
||||
|
@ -212,7 +204,7 @@ void MultiStateExecutor<state_t>::set_parallelization(
|
|||
template <class state_t>
|
||||
bool MultiStateExecutor<state_t>::allocate_states(uint_t num_shots,
|
||||
const Config &config) {
|
||||
int_t i;
|
||||
uint_t i;
|
||||
bool ret = true;
|
||||
|
||||
states_.resize(num_shots);
|
||||
|
@ -281,12 +273,11 @@ void MultiStateExecutor<state_t>::run_circuit_shots(
|
|||
|
||||
Noise::NoiseModel dummy_noise;
|
||||
state_t dummy_state;
|
||||
RngEngine dummy_rng;
|
||||
dummy_rng.set_seed(circ.seed); // this is not used actually
|
||||
|
||||
Circuit circ_opt;
|
||||
if (sample_noise) {
|
||||
RngEngine dummy_rng;
|
||||
dummy_rng.set_seed(circ.seed);
|
||||
circ_opt = noise.sample_noise(circ, dummy_rng,
|
||||
Noise::NoiseModel::Method::circuit, true);
|
||||
auto fusion_pass = Base::transpile_fusion(circ_opt.opset(), config);
|
||||
|
@ -385,12 +376,12 @@ void MultiStateExecutor<state_t>::run_circuit_shots(
|
|||
#endif
|
||||
|
||||
for (auto &res : par_results) {
|
||||
for (int_t i = 0; i < Base::num_bind_params_; i++) {
|
||||
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
|
||||
(result_it + i)->combine(std::move(res[i]));
|
||||
}
|
||||
}
|
||||
|
||||
for (int_t i = 0; i < Base::num_bind_params_; i++) {
|
||||
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
|
||||
(result_it + i)->metadata.add(true, "shot_branching_enabled");
|
||||
(result_it + i)
|
||||
->metadata.add(sample_noise, "runtime_noise_sampling_enabled");
|
||||
|
@ -413,7 +404,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
bool can_sample = false;
|
||||
OpItr measure_seq = last;
|
||||
OpItr it = last - 1;
|
||||
int_t num_measure = 0;
|
||||
uint_t num_measure = 0;
|
||||
|
||||
if (shot_branching_sampling_enable_) {
|
||||
do {
|
||||
|
@ -445,14 +436,14 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
if (Base::num_bind_params_ > 1) {
|
||||
if (par_shots > 1) {
|
||||
#pragma omp parallel for num_threads(par_shots)
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (int_t i = 0; i < (int_t)nshots; i++) {
|
||||
uint_t gid = global_state_index_ + ishot + i;
|
||||
uint_t ip = gid / Base::num_shots_per_bind_param_;
|
||||
shots_storage[i].set_seed(circ.seed_for_params[ip] +
|
||||
(gid % Base::num_shots_per_bind_param_));
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
uint_t gid = global_state_index_ + ishot + i;
|
||||
uint_t ip = gid / Base::num_shots_per_bind_param_;
|
||||
shots_storage[i].set_seed(circ.seed_for_params[ip] +
|
||||
|
@ -466,10 +457,10 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
shots_storage[0].set_seed(circ.seed + global_state_index_ + ishot);
|
||||
if (par_shots > 1) {
|
||||
#pragma omp parallel for num_threads(par_shots)
|
||||
for (int_t i = 1; i < nshots; i++)
|
||||
for (int_t i = 1; i < (int_t)nshots; i++)
|
||||
shots_storage[i].set_seed(circ.seed + global_state_index_ + ishot + i);
|
||||
} else {
|
||||
for (int_t i = 1; i < nshots; i++)
|
||||
for (uint_t i = 1; i < nshots; i++)
|
||||
shots_storage[i].set_seed(circ.seed + global_state_index_ + ishot + i);
|
||||
}
|
||||
}
|
||||
|
@ -498,7 +489,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
uint_t num_active_states = 1;
|
||||
|
||||
// set branches
|
||||
for (int_t i = 0; i < waiting_branches.size(); i++) {
|
||||
for (uint_t i = 0; i < waiting_branches.size(); i++) {
|
||||
if (i > num_states)
|
||||
break;
|
||||
uint_t sid = top_state + i;
|
||||
|
@ -547,9 +538,9 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
if (branches[istate]->num_branches() > 0) {
|
||||
// if there are additional ops remaining, queue them on new
|
||||
// branches
|
||||
for (int_t k = iadd + 1;
|
||||
for (uint_t k = iadd + 1;
|
||||
k < branches[istate]->additional_ops().size(); k++) {
|
||||
for (int_t l = 0; l < branches[istate]->num_branches();
|
||||
for (uint_t l = 0; l < branches[istate]->num_branches();
|
||||
l++)
|
||||
branches[istate]->branches()[l]->add_op_after_branch(
|
||||
branches[istate]->additional_ops()[k]);
|
||||
|
@ -631,10 +622,10 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
// repeat until new branch is available
|
||||
if (nbranch > 0) {
|
||||
uint_t num_states_prev = branches.size();
|
||||
for (int_t i = 0; i < num_states_prev; i++) {
|
||||
for (uint_t i = 0; i < num_states_prev; i++) {
|
||||
// add new branches
|
||||
if (branches[i]->num_branches() > 0) {
|
||||
for (int_t j = 0; j < branches[i]->num_branches(); j++) {
|
||||
for (uint_t j = 0; j < branches[i]->num_branches(); j++) {
|
||||
if (branches[i]->branches()[j]->num_shots() > 0) {
|
||||
// add new branched state
|
||||
uint_t pos = branches.size();
|
||||
|
@ -680,7 +671,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
|
||||
// check if there are remaining ops
|
||||
num_active_states = 0;
|
||||
for (int_t i = 0; i < branches.size(); i++) {
|
||||
for (uint_t i = 0; i < branches.size(); i++) {
|
||||
if (branches[i]->op_iterator() != measure_seq ||
|
||||
branches[i]->additional_ops().size() > 0)
|
||||
num_active_states++;
|
||||
|
@ -707,7 +698,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
Utils::apply_omp_parallel_for(can_parallel, 0, par_shots,
|
||||
sampling_measure_func, par_shots);
|
||||
|
||||
for (int_t i = 0; i < Base::num_bind_params_; i++)
|
||||
for (uint_t i = 0; i < Base::num_bind_params_; i++)
|
||||
(result_it + i)->metadata.add(true, "shot_branching_sampling_enabled");
|
||||
} else {
|
||||
// save cregs to result
|
||||
|
@ -718,7 +709,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
|
||||
for (; istate < state_end; istate++) {
|
||||
if (Base::num_process_per_experiment_ > 1) {
|
||||
for (int_t j = 0; j < branches[istate]->num_shots(); j++) {
|
||||
for (uint_t j = 0; j < branches[istate]->num_shots(); j++) {
|
||||
uint_t idx = branches[istate]->rng_shots()[j].initial_seed();
|
||||
uint_t ip = branches[istate]->param_index(j);
|
||||
idx += ip * Base::num_shots_per_bind_param_;
|
||||
|
@ -728,13 +719,13 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
} else {
|
||||
std::string memory_hex =
|
||||
states_[branches[istate]->state_index()].creg().memory_hex();
|
||||
for (int_t j = 0; j < branches[istate]->num_shots(); j++) {
|
||||
for (uint_t j = 0; j < branches[istate]->num_shots(); j++) {
|
||||
uint_t ip = branches[istate]->param_index(j);
|
||||
par_results[i][ip].data.add_accum(static_cast<uint_t>(1ULL),
|
||||
"counts", memory_hex);
|
||||
}
|
||||
if (Base::save_creg_memory_) {
|
||||
for (int_t j = 0; j < branches[istate]->num_shots(); j++) {
|
||||
for (uint_t j = 0; j < branches[istate]->num_shots(); j++) {
|
||||
uint_t ip = branches[istate]->param_index(j);
|
||||
par_results[i][ip].data.add_list(memory_hex, "memory");
|
||||
}
|
||||
|
@ -748,14 +739,14 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
|
|||
}
|
||||
|
||||
// clear
|
||||
for (int_t i = 0; i < branches.size(); i++) {
|
||||
for (uint_t i = 0; i < branches.size(); i++) {
|
||||
branches[i].reset();
|
||||
}
|
||||
branches.clear();
|
||||
}
|
||||
|
||||
for (auto &res : par_results) {
|
||||
for (int_t i = 0; i < Base::num_bind_params_; i++) {
|
||||
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
|
||||
(result_it + i)->combine(std::move(res[i]));
|
||||
}
|
||||
}
|
||||
|
@ -777,7 +768,7 @@ void MultiStateExecutor<state_t>::apply_runtime_parameterization(
|
|||
root.branch_shots_by_params();
|
||||
|
||||
// add binded op after branch
|
||||
for (int_t i = 0; i < nparams; i++) {
|
||||
for (uint_t i = 0; i < nparams; i++) {
|
||||
uint_t ip = root.branches()[i]->param_index(0);
|
||||
Operations::Op bind_op =
|
||||
Operations::bind_parameter(op, ip, Base::num_bind_params_);
|
||||
|
@ -799,7 +790,7 @@ void MultiStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
|
|||
// Check if meas_circ is empty, and if so return initial creg
|
||||
if (first_meas == last_meas) {
|
||||
if (Base::num_process_per_experiment_ > 1) {
|
||||
for (int_t i = 0; i < shots; i++) {
|
||||
for (uint_t i = 0; i < shots; i++) {
|
||||
uint_t idx = branch.rng_shots()[i].initial_seed();
|
||||
uint_t ip = branch.param_index(i);
|
||||
idx += ip * Base::num_shots_per_bind_param_;
|
||||
|
@ -807,7 +798,7 @@ void MultiStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
|
|||
cregs_[idx] = state.creg();
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < shots; i++) {
|
||||
for (uint_t i = 0; i < shots; i++) {
|
||||
uint_t ip = branch.param_index(i);
|
||||
(result + ip)->save_count_data(state.creg(), Base::save_creg_memory_);
|
||||
}
|
||||
|
@ -837,7 +828,7 @@ void MultiStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
|
|||
|
||||
// Generate the samples
|
||||
std::vector<reg_t> all_samples;
|
||||
all_samples = sample_measure(state, meas_qubits, shots, rng);
|
||||
all_samples = this->sample_measure(state, meas_qubits, shots, rng);
|
||||
|
||||
// Make qubit map of position in vector of measured qubits
|
||||
std::unordered_map<uint_t, uint_t> qubit_map;
|
||||
|
@ -859,10 +850,6 @@ void MultiStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
|
|||
}
|
||||
|
||||
// Process samples
|
||||
uint_t num_memory =
|
||||
(memory_map.empty()) ? 0ULL : 1 + memory_map.rbegin()->first;
|
||||
uint_t num_registers =
|
||||
(register_map.empty()) ? 0ULL : 1 + register_map.rbegin()->first;
|
||||
for (int_t i = all_samples.size() - 1; i >= 0; i--) {
|
||||
ClassicalRegister creg = state.creg();
|
||||
|
||||
|
@ -929,7 +916,7 @@ void MultiStateExecutor<state_t>::apply_save_expval(Branch &root,
|
|||
std::vector<double> expval_var(2);
|
||||
expval_var[0] = expval; // mean
|
||||
expval_var[1] = sq_expval - expval * expval; // variance
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -940,7 +927,7 @@ void MultiStateExecutor<state_t>::apply_save_expval(Branch &root,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
|
|
@ -136,14 +136,6 @@ protected:
|
|||
// Apply a save expectation value instruction
|
||||
void apply_save_expval(const Operations::Op &op, ExperimentResult &result);
|
||||
|
||||
// Sample n-measurement outcomes without applying the measure operation
|
||||
// to the system state
|
||||
virtual std::vector<reg_t> sample_measure(const reg_t &qubits, uint_t shots,
|
||||
RngEngine &rng) const {
|
||||
std::vector<reg_t> ret;
|
||||
return ret;
|
||||
};
|
||||
|
||||
// swap between chunks
|
||||
virtual void apply_chunk_swap(const reg_t &qubits);
|
||||
|
||||
|
@ -270,7 +262,7 @@ ParallelStateExecutor<state_t>::transpile_cache_blocking(
|
|||
template <class state_t>
|
||||
bool ParallelStateExecutor<state_t>::allocate(uint_t num_qubits,
|
||||
const Config &config) {
|
||||
int_t i;
|
||||
uint_t i;
|
||||
Base::num_qubits_ = num_qubits;
|
||||
chunk_bits_ = cache_block_qubit_;
|
||||
|
||||
|
@ -312,9 +304,9 @@ bool ParallelStateExecutor<state_t>::allocate(uint_t num_qubits,
|
|||
template <class state_t>
|
||||
bool ParallelStateExecutor<state_t>::allocate_states(uint_t num_states,
|
||||
const Config &config) {
|
||||
int_t i;
|
||||
uint_t i;
|
||||
bool init_states = true;
|
||||
uint_t num_states_allocated;
|
||||
uint_t num_states_allocated = num_states;
|
||||
// deallocate qregs before reallocation
|
||||
if (Base::states_.size() > 0) {
|
||||
if (Base::states_.size() == num_states)
|
||||
|
@ -532,7 +524,7 @@ void ParallelStateExecutor<state_t>::run_circuit_shots(
|
|||
result.metadata.copy(fusion_result.metadata);
|
||||
}
|
||||
|
||||
for (int_t ishot = 0; ishot < circ.shots; ishot++) {
|
||||
for (uint_t ishot = 0; ishot < circ.shots; ishot++) {
|
||||
RngEngine rng;
|
||||
if (iparam == 0 && ishot == 0)
|
||||
rng = init_rng;
|
||||
|
@ -616,7 +608,7 @@ void ParallelStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
|
|||
|
||||
// Generate the samples
|
||||
auto timer_start = myclock_t::now();
|
||||
auto all_samples = sample_measure(meas_qubits, shots, rng);
|
||||
auto all_samples = this->sample_measure(meas_qubits, shots, rng);
|
||||
auto time_taken =
|
||||
std::chrono::duration<double>(myclock_t::now() - timer_start).count();
|
||||
result.metadata.add(time_taken, "sample_measure_time");
|
||||
|
@ -792,11 +784,11 @@ void ParallelStateExecutor<state_t>::apply_ops_chunks(
|
|||
uint_t iOpBegin = iOp + 1;
|
||||
if (Base::num_groups_ > 1 && chunk_omp_parallel_) {
|
||||
#pragma omp parallel for num_threads(Base::num_groups_)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++)
|
||||
apply_cache_blocking_ops(ig, first + iOpBegin, first + iOpEnd, result,
|
||||
rng, iparam);
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
apply_cache_blocking_ops(ig, first + iOpBegin, first + iOpEnd, result,
|
||||
rng, iparam);
|
||||
}
|
||||
|
@ -810,11 +802,11 @@ void ParallelStateExecutor<state_t>::apply_ops_chunks(
|
|||
final_ops && nOp == iOp + 1)) {
|
||||
if (Base::num_groups_ > 1 && chunk_omp_parallel_) {
|
||||
#pragma omp parallel for num_threads(Base::num_groups_)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++)
|
||||
apply_cache_blocking_ops(ig, bind_op.cbegin(), bind_op.cend(),
|
||||
result, rng, iparam);
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
apply_cache_blocking_ops(ig, bind_op.cbegin(), bind_op.cend(),
|
||||
result, rng, iparam);
|
||||
}
|
||||
|
@ -824,11 +816,11 @@ void ParallelStateExecutor<state_t>::apply_ops_chunks(
|
|||
final_ops && nOp == iOp + 1)) {
|
||||
if (Base::num_groups_ > 1 && chunk_omp_parallel_) {
|
||||
#pragma omp parallel for num_threads(Base::num_groups_)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++)
|
||||
apply_cache_blocking_ops(ig, first + iOp, first + iOp + 1, result,
|
||||
rng, iparam);
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
apply_cache_blocking_ops(ig, first + iOp, first + iOp + 1, result,
|
||||
rng, iparam);
|
||||
}
|
||||
|
@ -843,10 +835,10 @@ void ParallelStateExecutor<state_t>::apply_ops_chunks(
|
|||
|
||||
if (Base::num_groups_ > 1 && chunk_omp_parallel_) {
|
||||
#pragma omp parallel for num_threads(Base::num_groups_)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++)
|
||||
Base::states_[Base::top_state_of_group_[ig]].qreg().synchronize();
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++)
|
||||
Base::states_[Base::top_state_of_group_[ig]].qreg().synchronize();
|
||||
}
|
||||
|
||||
|
@ -881,7 +873,7 @@ void ParallelStateExecutor<state_t>::apply_cache_blocking_ops(
|
|||
const int_t iGroup, InputIterator first, InputIterator last,
|
||||
ExperimentResult &result, RngEngine &rng, uint_t iparam) {
|
||||
// for each chunk in group
|
||||
for (int_t iChunk = Base::top_state_of_group_[iGroup];
|
||||
for (uint_t iChunk = Base::top_state_of_group_[iGroup];
|
||||
iChunk < Base::top_state_of_group_[iGroup + 1]; iChunk++) {
|
||||
// fecth chunk in cache
|
||||
if (Base::states_[iChunk].qreg().fetch_chunk()) {
|
||||
|
@ -901,15 +893,15 @@ void ParallelStateExecutor<state_t>::apply_cache_blocking_ops(
|
|||
template <class state_t>
|
||||
template <typename list_t>
|
||||
void ParallelStateExecutor<state_t>::initialize_from_vector(const list_t &vec) {
|
||||
int_t iChunk;
|
||||
uint_t iChunk;
|
||||
|
||||
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for private(iChunk)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
|
||||
list_t tmp(1ull << (chunk_bits_ * qubit_scale()));
|
||||
for (int_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
|
||||
for (uint_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
|
||||
tmp[i] = vec[((Base::global_state_index_ + iChunk)
|
||||
<< (chunk_bits_ * qubit_scale())) +
|
||||
i];
|
||||
|
@ -920,7 +912,7 @@ void ParallelStateExecutor<state_t>::initialize_from_vector(const list_t &vec) {
|
|||
} else {
|
||||
for (iChunk = 0; iChunk < Base::num_local_states_; iChunk++) {
|
||||
list_t tmp(1ull << (chunk_bits_ * qubit_scale()));
|
||||
for (int_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
|
||||
for (uint_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
|
||||
tmp[i] = vec[((Base::global_state_index_ + iChunk)
|
||||
<< (chunk_bits_ * qubit_scale())) +
|
||||
i];
|
||||
|
@ -933,10 +925,10 @@ void ParallelStateExecutor<state_t>::initialize_from_vector(const list_t &vec) {
|
|||
template <class state_t>
|
||||
template <typename list_t>
|
||||
void ParallelStateExecutor<state_t>::initialize_from_matrix(const list_t &mat) {
|
||||
int_t iChunk;
|
||||
uint_t iChunk;
|
||||
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for private(iChunk)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
|
||||
list_t tmp(1ull << (chunk_bits_), 1ull << (chunk_bits_));
|
||||
|
@ -949,7 +941,7 @@ void ParallelStateExecutor<state_t>::initialize_from_matrix(const list_t &mat) {
|
|||
<< (chunk_bits_);
|
||||
|
||||
// copy part of state for this chunk
|
||||
uint_t i, row, col;
|
||||
uint_t i;
|
||||
for (i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
|
||||
uint_t icol = i & ((1ull << chunk_bits_) - 1);
|
||||
uint_t irow = i >> chunk_bits_;
|
||||
|
@ -970,7 +962,7 @@ void ParallelStateExecutor<state_t>::initialize_from_matrix(const list_t &mat) {
|
|||
<< (chunk_bits_);
|
||||
|
||||
// copy part of state for this chunk
|
||||
uint_t i, row, col;
|
||||
uint_t i;
|
||||
for (i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
|
||||
uint_t icol = i & ((1ull << chunk_bits_) - 1);
|
||||
uint_t irow = i >> chunk_bits_;
|
||||
|
@ -985,7 +977,7 @@ void ParallelStateExecutor<state_t>::initialize_from_matrix(const list_t &mat) {
|
|||
template <class state_t>
|
||||
auto ParallelStateExecutor<state_t>::apply_to_matrix(bool copy) {
|
||||
// this function is used to collect states over chunks
|
||||
int_t iChunk;
|
||||
uint_t iChunk;
|
||||
uint_t size = 1ull << (chunk_bits_ * qubit_scale());
|
||||
uint_t mask = (1ull << (chunk_bits_)) - 1;
|
||||
uint_t num_threads = Base::states_[0].qreg().get_omp_threads();
|
||||
|
@ -1024,7 +1016,7 @@ auto ParallelStateExecutor<state_t>::apply_to_matrix(bool copy) {
|
|||
recv_data(tmp.data(), size, 0, iChunk);
|
||||
#endif
|
||||
#pragma omp parallel for if (num_threads > 1) num_threads(num_threads)
|
||||
for (i = 0; i < size; i++) {
|
||||
for (i = 0; i < (int_t)size; i++) {
|
||||
uint_t irow = i >> (chunk_bits_);
|
||||
uint_t icol = i & mask;
|
||||
uint_t idx =
|
||||
|
@ -1093,9 +1085,7 @@ void ParallelStateExecutor<state_t>::apply_save_expval(
|
|||
|
||||
template <class state_t>
|
||||
void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
|
||||
uint_t nLarge = 1;
|
||||
uint_t q0, q1;
|
||||
int_t iChunk;
|
||||
|
||||
q0 = qubits[qubits.size() - 2];
|
||||
q1 = qubits[qubits.size() - 1];
|
||||
|
@ -1112,14 +1102,14 @@ void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
|
|||
// inside chunk
|
||||
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for num_threads(Base::num_groups_)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
|
||||
Base::states_[iChunk].qreg().apply_mcswap(qubits);
|
||||
}
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
|
||||
Base::states_[iChunk].qreg().apply_mcswap(qubits);
|
||||
}
|
||||
|
@ -1139,7 +1129,7 @@ void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
|
|||
// processes
|
||||
// is needed
|
||||
auto apply_chunk_swap_1qubit = [this, mask1, qubits](int_t iGroup) {
|
||||
for (int_t ic = Base::top_state_of_group_[iGroup];
|
||||
for (uint_t ic = Base::top_state_of_group_[iGroup];
|
||||
ic < Base::top_state_of_group_[iGroup + 1]; ic++) {
|
||||
uint_t baseChunk;
|
||||
baseChunk = ic & (~mask1);
|
||||
|
@ -1150,7 +1140,7 @@ void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
|
|||
};
|
||||
auto apply_chunk_swap_2qubits = [this, mask0, mask1,
|
||||
qubits](int_t iGroup) {
|
||||
for (int_t ic = Base::top_state_of_group_[iGroup];
|
||||
for (uint_t ic = Base::top_state_of_group_[iGroup];
|
||||
ic < Base::top_state_of_group_[iGroup + 1]; ic++) {
|
||||
uint_t baseChunk;
|
||||
baseChunk = ic & (~(mask0 | mask1));
|
||||
|
@ -1172,7 +1162,8 @@ void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
|
|||
}
|
||||
#ifdef AER_MPI
|
||||
else {
|
||||
int_t iPair;
|
||||
uint_t nLarge = 1;
|
||||
uint_t iPair;
|
||||
uint_t nPair;
|
||||
uint_t baseChunk, iChunk1, iChunk2;
|
||||
|
||||
|
@ -1343,14 +1334,14 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
|
|||
// swap inside chunks to prepare for all-to-all shuffle
|
||||
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
|
||||
Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps);
|
||||
}
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
|
||||
Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps);
|
||||
}
|
||||
|
@ -1366,7 +1357,7 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
|
|||
|
||||
for (uint_t i = 0; i < nchunk; i++) {
|
||||
chunk_offset[i] = 0;
|
||||
for (uint_t k = 0; k < nswap; k++) {
|
||||
for (int_t k = 0; k < nswap; k++) {
|
||||
if (((i >> k) & 1) != 0)
|
||||
chunk_offset[i] += (1ull << chunk_shuffle_qubits[k]);
|
||||
}
|
||||
|
@ -1381,7 +1372,7 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
|
|||
uint_t i1, i2, k, ii, t;
|
||||
baseChunk = 0;
|
||||
ii = iPair;
|
||||
for (k = 0; k < nswap; k++) {
|
||||
for (k = 0; k < (uint_t)nswap; k++) {
|
||||
t = ii & ((1ull << chunk_shuffle_qubits_sorted[k]) - 1);
|
||||
baseChunk += t;
|
||||
ii = (ii - t) << 1;
|
||||
|
@ -1395,7 +1386,6 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
|
|||
// all-to-all
|
||||
// send data
|
||||
for (uint_t iswap = 1; iswap < nchunk; iswap++) {
|
||||
uint_t sizeRecv, sizeSend;
|
||||
uint_t num_local_swap = 0;
|
||||
for (i1 = 0; i1 < nchunk; i1++) {
|
||||
i2 = i1 ^ iswap;
|
||||
|
@ -1412,6 +1402,7 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
|
|||
continue; // swap while data is exchanged between processes
|
||||
}
|
||||
#ifdef AER_MPI
|
||||
uint_t sizeRecv, sizeSend;
|
||||
uint_t offset1 = i1 << (chunk_bits_ * qubit_scale() - nswap);
|
||||
uint_t offset2 = i2 << (chunk_bits_ * qubit_scale() - nswap);
|
||||
uint_t iChunk1 =
|
||||
|
@ -1419,7 +1410,7 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
|
|||
uint_t iChunk2 =
|
||||
baseChunk + chunk_offset[i2] - Base::global_state_index_;
|
||||
|
||||
int_t tid = (iPair << nswap) + iswap;
|
||||
uint_t tid = (iPair << nswap) + iswap;
|
||||
|
||||
if (iProc1 == Base::distributed_rank_) {
|
||||
auto pRecv = Base::states_[iChunk1].qreg().recv_buffer(sizeRecv);
|
||||
|
@ -1499,14 +1490,14 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
|
|||
// restore qubits order
|
||||
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
|
||||
Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps);
|
||||
}
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
|
||||
Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps);
|
||||
}
|
||||
|
@ -1515,13 +1506,10 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
|
|||
|
||||
template <class state_t>
|
||||
void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
|
||||
int_t iChunk;
|
||||
uint_t nLarge = 1;
|
||||
|
||||
if (qubit < chunk_bits_ * qubit_scale()) {
|
||||
auto apply_mcx = [this, qubit](int_t ig) {
|
||||
reg_t qubits(1, qubit);
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
|
||||
Base::states_[iChunk].qreg().apply_mcx(qubits);
|
||||
};
|
||||
|
@ -1529,9 +1517,7 @@ void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
|
|||
(chunk_omp_parallel_ && Base::num_groups_ > 1), 0, Base::num_groups_,
|
||||
apply_mcx);
|
||||
} else { // exchange over chunks
|
||||
int_t iPair;
|
||||
uint_t nPair, mask;
|
||||
uint_t baseChunk, iChunk1, iChunk2;
|
||||
reg_t qubits(2);
|
||||
qubits[0] = qubit;
|
||||
qubits[1] = qubit;
|
||||
|
@ -1547,7 +1533,7 @@ void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
|
|||
nPair = Base::num_local_states_ >> 1;
|
||||
|
||||
auto apply_chunk_swap = [this, mask, qubits](int_t iGroup) {
|
||||
for (int_t ic = Base::top_state_of_group_[iGroup];
|
||||
for (uint_t ic = Base::top_state_of_group_[iGroup];
|
||||
ic < Base::top_state_of_group_[iGroup + 1]; ic++) {
|
||||
uint_t pairChunk;
|
||||
pairChunk = ic ^ mask;
|
||||
|
@ -1562,6 +1548,9 @@ void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
|
|||
}
|
||||
#ifdef AER_MPI
|
||||
else {
|
||||
uint_t iPair;
|
||||
uint_t baseChunk, iChunk1, iChunk2;
|
||||
|
||||
// chunk scheduler that supports any number of processes
|
||||
uint_t nu[3];
|
||||
uint_t ub[3];
|
||||
|
@ -1570,7 +1559,6 @@ void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
|
|||
uint_t iLocalChunk, iRemoteChunk, iProc;
|
||||
int i;
|
||||
|
||||
nLarge = 1;
|
||||
nu[0] = 1ull << (qubit - chunk_bits_ * qubit_scale());
|
||||
ub[0] = 0;
|
||||
iu[0] = 0;
|
||||
|
@ -1864,8 +1852,8 @@ void ParallelStateExecutor<state_t>::gather_state(
|
|||
AER::Vector<std::complex<data_t>> &state) {
|
||||
#ifdef AER_MPI
|
||||
if (Base::distributed_procs_ > 1) {
|
||||
uint_t size, local_size, global_size, offset;
|
||||
int i;
|
||||
uint_t global_size;
|
||||
uint_t i;
|
||||
|
||||
std::vector<int> recv_counts(Base::distributed_procs_);
|
||||
std::vector<int> recv_offset(Base::distributed_procs_);
|
||||
|
|
|
@ -83,7 +83,7 @@ public:
|
|||
void set_shots(std::vector<RngEngine> &shots) { shots_ = shots; }
|
||||
void initialize_shots(const uint_t nshots, const uint_t seed) {
|
||||
shots_.resize(nshots);
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
shots_[i].set_seed(seed + i);
|
||||
}
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ public:
|
|||
if (param_index_.size() == 1) {
|
||||
return param_index_[0];
|
||||
}
|
||||
for (int_t i = 0; i < param_index_.size(); i++) {
|
||||
for (uint_t i = 0; i < param_index_.size(); i++) {
|
||||
if (param_shots_[i] > ishot) {
|
||||
return param_index_[i];
|
||||
}
|
||||
|
@ -174,13 +174,13 @@ void Branch::branch_shots(reg_t &shots, int_t nbranch) {
|
|||
if (param_index_.size() > 1) {
|
||||
branches_[i]->param_index_ = param_index_;
|
||||
branches_[i]->param_shots_.resize(param_index_.size());
|
||||
for (int_t j = 0; j < param_index_.size(); j++)
|
||||
for (uint_t j = 0; j < param_index_.size(); j++)
|
||||
branches_[i]->param_shots_[j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint_t pos = 0;
|
||||
for (int_t i = 0; i < shots.size(); i++) {
|
||||
for (uint_t i = 0; i < shots.size(); i++) {
|
||||
branches_[shots[i]]->shots_.push_back(shots_[i]);
|
||||
|
||||
if (param_index_.size() > 1) {
|
||||
|
@ -193,19 +193,19 @@ void Branch::branch_shots(reg_t &shots, int_t nbranch) {
|
|||
// set parameter indices
|
||||
if (param_index_.size() > 1) {
|
||||
for (int_t i = 0; i < nbranch; i++) {
|
||||
uint_t pos = 0;
|
||||
while (pos < branches_[i]->param_index_.size()) {
|
||||
if (branches_[i]->param_shots_[pos] == 0) {
|
||||
uint_t ppos = 0;
|
||||
while (ppos < branches_[i]->param_index_.size()) {
|
||||
if (branches_[i]->param_shots_[ppos] == 0) {
|
||||
branches_[i]->param_index_.erase(branches_[i]->param_index_.begin() +
|
||||
pos);
|
||||
ppos);
|
||||
branches_[i]->param_shots_.erase(branches_[i]->param_index_.begin() +
|
||||
pos);
|
||||
ppos);
|
||||
} else {
|
||||
if (pos > 0) {
|
||||
branches_[i]->param_shots_[pos] +=
|
||||
branches_[i]->param_shots_[pos - 1];
|
||||
if (ppos > 0) {
|
||||
branches_[i]->param_shots_[ppos] +=
|
||||
branches_[i]->param_shots_[ppos - 1];
|
||||
}
|
||||
pos++;
|
||||
ppos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -218,27 +218,27 @@ void Branch::branch_shots(reg_t &shots, int_t nbranch) {
|
|||
void Branch::branch_shots_by_params(void) {
|
||||
branches_.resize(param_index_.size());
|
||||
|
||||
for (int_t i = 0; i < param_index_.size(); i++) {
|
||||
for (uint_t i = 0; i < param_index_.size(); i++) {
|
||||
branches_[i] = std::make_shared<Branch>();
|
||||
branches_[i]->creg_ = creg_;
|
||||
branches_[i]->iter_ = iter_;
|
||||
branches_[i]->flow_marks_ = flow_marks_;
|
||||
}
|
||||
uint_t pos = 0;
|
||||
for (int_t i = 0; i < shots_.size(); i++) {
|
||||
for (uint_t i = 0; i < shots_.size(); i++) {
|
||||
if (i >= param_shots_[pos])
|
||||
pos++;
|
||||
branches_[pos]->shots_.push_back(shots_[i]);
|
||||
}
|
||||
|
||||
for (int_t i = 0; i < param_index_.size(); i++) {
|
||||
for (uint_t i = 0; i < param_index_.size(); i++) {
|
||||
branches_[i]->set_param_index(param_index_[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Branch::advance_iterator(void) {
|
||||
iter_++;
|
||||
for (int_t i = 0; i < branches_.size(); i++) {
|
||||
for (uint_t i = 0; i < branches_.size(); i++) {
|
||||
branches_[i]->iter_++;
|
||||
}
|
||||
}
|
||||
|
@ -253,24 +253,24 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
|
|||
reg_t shot_map(nshots);
|
||||
std::vector<std::vector<Operations::Op>> noises;
|
||||
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
std::vector<Operations::Op> noise_ops =
|
||||
noise.sample_noise_loc(op, shots_[i]);
|
||||
|
||||
// search same noise ops
|
||||
int_t pos = -1;
|
||||
for (int_t j = 0; j < noises.size(); j++) {
|
||||
for (uint_t j = 0; j < noises.size(); j++) {
|
||||
if (noise_ops.size() != noises[j].size())
|
||||
continue;
|
||||
bool same = true;
|
||||
for (int_t k = 0; k < noise_ops.size(); k++) {
|
||||
for (uint_t k = 0; k < noise_ops.size(); k++) {
|
||||
if (noise_ops[k].type != noises[j][k].type ||
|
||||
noise_ops[k].name != noises[j][k].name)
|
||||
same = false;
|
||||
else if (noise_ops[k].qubits.size() != noises[j][k].qubits.size())
|
||||
same = false;
|
||||
else {
|
||||
for (int_t l = 0; l < noise_ops[k].qubits.size(); l++) {
|
||||
for (uint_t l = 0; l < noise_ops[k].qubits.size(); l++) {
|
||||
if (noise_ops[k].qubits[l] != noises[j][k].qubits[l]) {
|
||||
same = false;
|
||||
break;
|
||||
|
@ -286,7 +286,7 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
|
|||
} else if (noise_ops[k].params.size() != noises[j][k].params.size())
|
||||
same = false;
|
||||
else {
|
||||
for (int_t l = 0; l < noise_ops[k].params.size(); l++) {
|
||||
for (uint_t l = 0; l < noise_ops[k].params.size(); l++) {
|
||||
if (noise_ops[k].params[l] != noises[j][k].params[l]) {
|
||||
same = false;
|
||||
break;
|
||||
|
@ -298,12 +298,12 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
|
|||
if (noise_ops[k].mats.size() != noises[j][k].mats.size())
|
||||
same = false;
|
||||
else {
|
||||
for (int_t l = 0; l < noise_ops[k].mats.size(); l++) {
|
||||
for (uint_t l = 0; l < noise_ops[k].mats.size(); l++) {
|
||||
if (noise_ops[k].mats[l].size() != noises[j][k].mats[l].size()) {
|
||||
same = false;
|
||||
break;
|
||||
}
|
||||
for (int_t m = 0; m < noise_ops[k].mats[l].size(); m++) {
|
||||
for (uint_t m = 0; m < noise_ops[k].mats[l].size(); m++) {
|
||||
if (noise_ops[k].mats[l][m] != noises[j][k].mats[l][m]) {
|
||||
same = false;
|
||||
break;
|
||||
|
@ -333,7 +333,7 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
|
|||
|
||||
creg_ = creg;
|
||||
branch_shots(shot_map, noises.size());
|
||||
for (int_t i = 0; i < noises.size(); i++) {
|
||||
for (uint_t i = 0; i < noises.size(); i++) {
|
||||
branches_[i]->copy_ops_after_branch(noises[i]);
|
||||
}
|
||||
|
||||
|
@ -342,7 +342,7 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
|
|||
|
||||
void Branch::remove_empty_branches(void) {
|
||||
int_t istart = 0;
|
||||
for (int_t j = 0; j < branches_.size(); j++) {
|
||||
for (uint_t j = 0; j < branches_.size(); j++) {
|
||||
if (branches_[j]->num_shots() > 0) {
|
||||
// copy shots to the root
|
||||
shots_ = branches_[j]->rng_shots();
|
||||
|
@ -359,7 +359,7 @@ void Branch::remove_empty_branches(void) {
|
|||
|
||||
std::vector<std::shared_ptr<Branch>> new_branches;
|
||||
|
||||
for (int_t j = istart; j < branches_.size(); j++) {
|
||||
for (uint_t j = istart; j < branches_.size(); j++) {
|
||||
if (branches_[j]->num_shots() > 0)
|
||||
new_branches.push_back(branches_[j]);
|
||||
else
|
||||
|
@ -370,7 +370,7 @@ void Branch::remove_empty_branches(void) {
|
|||
|
||||
void Branch::reset_branch(void) {
|
||||
// reset random seeds
|
||||
for (int_t i = 0; i < shots_.size(); i++) {
|
||||
for (uint_t i = 0; i < shots_.size(); i++) {
|
||||
shots_[i].set_seed(shots_[i].initial_seed());
|
||||
}
|
||||
additional_ops_.clear();
|
||||
|
@ -390,7 +390,7 @@ void Branch::set_param_index(uint_t ishot, uint_t nshots_per_param) {
|
|||
param_shots_.clear();
|
||||
|
||||
param_index_.push_back(ishot / nshots_per_param);
|
||||
for (int_t i = 1; i < shots_.size(); i++) {
|
||||
for (uint_t i = 1; i < shots_.size(); i++) {
|
||||
uint_t ip = (ishot + i) / nshots_per_param;
|
||||
if (ip != param_index_[pos]) {
|
||||
param_shots_.push_back(i);
|
||||
|
|
|
@ -431,7 +431,7 @@ bool Clifford::measure_and_update(const uint64_t qubit,
|
|||
auto measure_non_determinisitic_func = [this, rS, row,
|
||||
qubit](AER::int_t i) {
|
||||
uint64_t row_mask = ~0ull;
|
||||
if ((row >> destabilizer_phases_.BLOCK_BITS) == i)
|
||||
if ((row >> destabilizer_phases_.BLOCK_BITS) == (uint_t)i)
|
||||
row_mask ^= (1ull << (row & destabilizer_phases_.BLOCK_MASK));
|
||||
|
||||
uint64_t d_mask = row_mask & destabilizer_table_[qubit].X(i);
|
||||
|
|
|
@ -255,7 +255,7 @@ void State::set_config(const Config &config) {
|
|||
}
|
||||
|
||||
bool State::validate_parameters(const std::vector<Operations::Op> &ops) const {
|
||||
for (int_t i = 0; i < ops.size(); i++) {
|
||||
for (uint_t i = 0; i < ops.size(); i++) {
|
||||
if (ops[i].type == OpType::gate) {
|
||||
// check parameter of R gates
|
||||
if (ops[i].name == "rx" || ops[i].name == "ry" || ops[i].name == "rz") {
|
||||
|
@ -639,7 +639,7 @@ template <typename T>
|
|||
void State::get_probabilities_auxiliary(const reg_t &qubits,
|
||||
std::string outcome,
|
||||
double outcome_prob, T &probs) {
|
||||
uint_t qubit_for_branching = -1;
|
||||
int_t qubit_for_branching = -1;
|
||||
for (uint_t i = 0; i < qubits.size(); ++i) {
|
||||
uint_t qubit = qubits[qubits.size() - i - 1];
|
||||
if (outcome[i] == 'X') {
|
||||
|
@ -690,7 +690,7 @@ void State::get_probability_helper(const reg_t &qubits,
|
|||
const std::string &outcome,
|
||||
std::string &outcome_carry,
|
||||
double &prob_carry) {
|
||||
uint_t qubit_for_branching = -1;
|
||||
int_t qubit_for_branching = -1;
|
||||
for (uint_t i = 0; i < qubits.size(); ++i) {
|
||||
uint_t qubit = qubits[qubits.size() - i - 1];
|
||||
if (outcome_carry[i] == 'X') {
|
||||
|
|
|
@ -395,7 +395,7 @@ void ChunkContainer<data_t>::UnmapBuffer(Chunk<data_t> &buf) {
|
|||
|
||||
template <typename data_t>
|
||||
void ChunkContainer<data_t>::unmap_all(void) {
|
||||
int_t i;
|
||||
uint_t i;
|
||||
for (i = 0; i < chunks_map_.size(); i++)
|
||||
chunks_map_[i] = false;
|
||||
num_chunk_mapped_ = 0;
|
||||
|
@ -804,14 +804,8 @@ void ChunkContainer<data_t>::ExecuteSum2(double *pSum, Function func,
|
|||
#endif
|
||||
}
|
||||
|
||||
void host_func_launcher(void *pParam) {
|
||||
HostFuncBase *func = reinterpret_cast<HostFuncBase *>(pParam);
|
||||
func->execute();
|
||||
}
|
||||
|
||||
template <typename data_t>
|
||||
void ChunkContainer<data_t>::allocate_chunks(void) {
|
||||
uint_t i;
|
||||
chunks_map_.resize(num_chunks_, false);
|
||||
|
||||
reduced_queue_begin_.resize(num_chunks_, 0);
|
||||
|
@ -855,7 +849,7 @@ void ChunkContainer<data_t>::apply_matrix(
|
|||
#else
|
||||
if (N <= 10) {
|
||||
#endif
|
||||
int i;
|
||||
uint_t i;
|
||||
for (i = 0; i < N; i++) {
|
||||
qubits_sorted.push_back(qubits[i]);
|
||||
}
|
||||
|
@ -918,7 +912,7 @@ void ChunkContainer<data_t>::apply_batched_matrix(
|
|||
} else {
|
||||
auto qubits_sorted = qubits;
|
||||
std::sort(qubits_sorted.begin(), qubits_sorted.end());
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (uint_t i = 0; i < N; i++) {
|
||||
qubits_sorted.push_back(qubits[i]);
|
||||
}
|
||||
StoreUintParams(qubits_sorted, iChunk);
|
||||
|
@ -971,8 +965,8 @@ void ChunkContainer<data_t>::apply_phase(const uint_t iChunk,
|
|||
const int_t control_bits,
|
||||
const std::complex<double> phase,
|
||||
const uint_t gid, const uint_t count) {
|
||||
Execute(phase_func<data_t>(qubits, *(thrust::complex<double> *)&phase),
|
||||
iChunk, gid, count);
|
||||
thrust::complex<double> p(phase);
|
||||
Execute(phase_func<data_t>(qubits, p), iChunk, gid, count);
|
||||
}
|
||||
|
||||
template <typename data_t>
|
||||
|
@ -989,8 +983,8 @@ void ChunkContainer<data_t>::apply_multi_swaps(const uint_t iChunk,
|
|||
const uint_t gid,
|
||||
const uint_t count) {
|
||||
// max 5 swaps can be applied at once using GPU's shared memory
|
||||
for (int_t i = 0; i < qubits.size(); i += 10) {
|
||||
int_t n = 10;
|
||||
for (uint_t i = 0; i < qubits.size(); i += 10) {
|
||||
uint_t n = 10;
|
||||
if (i + n > qubits.size())
|
||||
n = qubits.size() - i;
|
||||
|
||||
|
@ -1009,7 +1003,6 @@ void ChunkContainer<data_t>::apply_permutation(
|
|||
const uint_t iChunk, const reg_t &qubits,
|
||||
const std::vector<std::pair<uint_t, uint_t>> &pairs, const uint_t gid,
|
||||
const uint_t count) {
|
||||
const size_t N = qubits.size();
|
||||
auto qubits_sorted = qubits;
|
||||
std::sort(qubits_sorted.begin(), qubits_sorted.end());
|
||||
|
||||
|
@ -1080,7 +1073,7 @@ void ChunkContainer<data_t>::probabilities(std::vector<double> &probs,
|
|||
|
||||
template <typename data_t>
|
||||
double ChunkContainer<data_t>::norm(uint_t iChunk, uint_t count) const {
|
||||
double ret;
|
||||
double ret = 0.0;
|
||||
ExecuteSum(&ret, norm_func<data_t>(), iChunk, count);
|
||||
|
||||
return ret;
|
||||
|
@ -1089,7 +1082,7 @@ double ChunkContainer<data_t>::norm(uint_t iChunk, uint_t count) const {
|
|||
template <typename data_t>
|
||||
double ChunkContainer<data_t>::trace(uint_t iChunk, uint_t row,
|
||||
uint_t count) const {
|
||||
double ret;
|
||||
double ret = 0.0;
|
||||
ExecuteSum(&ret, trace_func<data_t>(row), iChunk, count);
|
||||
|
||||
return ret;
|
||||
|
@ -1108,7 +1101,7 @@ double ChunkContainer<data_t>::expval_matrix(const uint_t iChunk,
|
|||
else {
|
||||
auto qubits_sorted = qubits;
|
||||
std::sort(qubits_sorted.begin(), qubits_sorted.end());
|
||||
for (int_t i = 0; i < N; i++) {
|
||||
for (uint_t i = 0; i < N; i++) {
|
||||
qubits_sorted.push_back(qubits[i]);
|
||||
}
|
||||
|
||||
|
@ -1166,7 +1159,6 @@ void ChunkContainer<data_t>::batched_expval_pauli(
|
|||
count, first);
|
||||
return;
|
||||
}
|
||||
double ret;
|
||||
// specialize x_max == 0
|
||||
if (x_mask == 0) {
|
||||
ExecuteSum2(nullptr,
|
||||
|
|
|
@ -35,11 +35,11 @@ protected:
|
|||
std::vector<std::shared_ptr<ChunkContainer<data_t>>>
|
||||
chunks_; // chunk containers for each device and host
|
||||
|
||||
int num_devices_; // number of devices
|
||||
int num_places_; // number of places (devices + host)
|
||||
uint_t num_devices_; // number of devices
|
||||
uint_t num_places_; // number of places (devices + host)
|
||||
|
||||
int chunk_bits_; // number of qubits of chunk
|
||||
int num_qubits_; // number of global qubits
|
||||
uint_t chunk_bits_; // number of qubits of chunk
|
||||
uint_t num_qubits_; // number of global qubits
|
||||
|
||||
uint_t num_chunks_; // number of chunks on this process
|
||||
uint_t chunk_index_; // global chunk index for the first chunk
|
||||
|
@ -105,7 +105,6 @@ public:
|
|||
|
||||
template <typename data_t>
|
||||
ChunkManager<data_t>::ChunkManager() {
|
||||
int i, j;
|
||||
num_places_ = 1;
|
||||
chunk_bits_ = 0;
|
||||
num_chunks_ = 0;
|
||||
|
@ -126,7 +125,9 @@ ChunkManager<data_t>::ChunkManager() {
|
|||
#else
|
||||
|
||||
#ifdef AER_THRUST_GPU
|
||||
if (cudaGetDeviceCount(&num_devices_) == cudaSuccess) {
|
||||
int ndev;
|
||||
if (cudaGetDeviceCount(&ndev) == cudaSuccess) {
|
||||
num_devices_ = ndev;
|
||||
num_places_ = num_devices_;
|
||||
} else {
|
||||
cudaGetLastError();
|
||||
|
@ -168,19 +169,21 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
|
|||
bool density_mat, reg_t &gpus,
|
||||
bool enable_cuStatevec) {
|
||||
uint_t num_buffers;
|
||||
int iDev;
|
||||
uint_t iDev;
|
||||
uint_t is, ie, nc;
|
||||
int i;
|
||||
uint_t i;
|
||||
char *str;
|
||||
bool multi_gpu = false;
|
||||
bool hybrid = false;
|
||||
|
||||
bool hybrid = false;
|
||||
#ifdef AER_THRUST_GPU
|
||||
bool multi_gpu = false;
|
||||
//--- for test
|
||||
str = getenv("AER_MULTI_GPU");
|
||||
if (str) {
|
||||
multi_gpu = true;
|
||||
num_places_ = num_devices_;
|
||||
}
|
||||
#endif
|
||||
str = getenv("AER_HYBRID");
|
||||
if (str) {
|
||||
hybrid = true;
|
||||
|
@ -192,8 +195,10 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
|
|||
target_gpus_ = gpus;
|
||||
if (target_gpus_.size() > 0) {
|
||||
num_devices_ = target_gpus_.size();
|
||||
#ifdef AER_THRUST_GPU
|
||||
if (num_devices_ > 1)
|
||||
multi_gpu = true;
|
||||
#endif
|
||||
} else {
|
||||
target_gpus_.resize(num_devices_);
|
||||
for (iDev = 0; iDev < num_devices_; iDev++) {
|
||||
|
@ -203,7 +208,7 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
|
|||
|
||||
chunk_index_ = chunk_index;
|
||||
|
||||
if (num_qubits_ != nqubits || chunk_bits_ != chunk_bits ||
|
||||
if (num_qubits_ != (uint_t)nqubits || chunk_bits_ != (uint_t)chunk_bits ||
|
||||
nchunks > num_chunks_) {
|
||||
// free previous allocation
|
||||
Free();
|
||||
|
@ -224,7 +229,6 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
|
|||
multi_shots_ = true;
|
||||
|
||||
#ifdef AER_THRUST_CPU
|
||||
multi_gpu = false;
|
||||
num_places_ = 1;
|
||||
#else
|
||||
if (chunk_distribution_enable_) {
|
||||
|
@ -260,7 +264,9 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
|
|||
#endif
|
||||
} else { // single chunk
|
||||
num_buffers = 0;
|
||||
#ifdef AER_THRUST_GPU
|
||||
multi_gpu = false;
|
||||
#endif
|
||||
num_places_ = 1;
|
||||
num_chunks_ = nchunks;
|
||||
multi_shots_ = false;
|
||||
|
@ -346,7 +352,7 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
|
|||
}
|
||||
}
|
||||
if (chunks_allocated < num_chunks_) {
|
||||
int nplaces_add = num_places_;
|
||||
uint_t nplaces_add = num_places_;
|
||||
if ((num_chunks_ - chunks_allocated) < nplaces_add)
|
||||
nplaces_add = (num_chunks_ - chunks_allocated);
|
||||
// rest of chunks are stored on host
|
||||
|
@ -391,7 +397,7 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
|
|||
|
||||
template <typename data_t>
|
||||
void ChunkManager<data_t>::Free(void) {
|
||||
int i;
|
||||
uint_t i;
|
||||
|
||||
for (i = 0; i < chunks_.size(); i++) {
|
||||
chunks_[i]->Deallocate();
|
||||
|
@ -408,7 +414,7 @@ void ChunkManager<data_t>::Free(void) {
|
|||
|
||||
template <typename data_t>
|
||||
bool ChunkManager<data_t>::MapChunk(Chunk<data_t> &chunk, int iplace) {
|
||||
int i;
|
||||
uint_t i;
|
||||
|
||||
for (i = 0; i < num_places_; i++) {
|
||||
if (chunks_[(iplace + i) % num_places_]->MapChunk(chunk)) {
|
||||
|
@ -422,7 +428,7 @@ bool ChunkManager<data_t>::MapChunk(Chunk<data_t> &chunk, int iplace) {
|
|||
template <typename data_t>
|
||||
bool ChunkManager<data_t>::MapBufferChunk(Chunk<data_t> &out, int idev) {
|
||||
if (idev < 0) {
|
||||
int i;
|
||||
uint_t i;
|
||||
for (i = 0; i < num_devices_; i++) {
|
||||
if (chunks_[i]->MapBufferChunk(out))
|
||||
break;
|
||||
|
|
|
@ -377,9 +377,6 @@ void cuStateVecChunkContainer<data_t>::apply_diagonal_matrix(
|
|||
qubits32[i] = qubits[i];
|
||||
|
||||
int32_t *pQubits = &qubits32[control_bits];
|
||||
int32_t *pControl = nullptr;
|
||||
if (control_bits > 0)
|
||||
pControl = &qubits32[0];
|
||||
|
||||
uint_t bits;
|
||||
uint_t nc;
|
||||
|
@ -686,7 +683,6 @@ void cuStateVecChunkContainer<data_t>::apply_rotation(
|
|||
const uint_t iChunk, const reg_t &qubits, const Rotation r,
|
||||
const double theta, const uint_t gid, const uint_t count) {
|
||||
custatevecPauli_t pauli[2];
|
||||
int nPauli = 1;
|
||||
|
||||
BaseContainer::set_device();
|
||||
|
||||
|
@ -705,25 +701,21 @@ void cuStateVecChunkContainer<data_t>::apply_rotation(
|
|||
case Rotation::xx:
|
||||
pauli[0] = CUSTATEVEC_PAULI_X;
|
||||
pauli[1] = CUSTATEVEC_PAULI_X;
|
||||
nPauli = 2;
|
||||
control_bits--;
|
||||
break;
|
||||
case Rotation::yy:
|
||||
pauli[0] = CUSTATEVEC_PAULI_Y;
|
||||
pauli[1] = CUSTATEVEC_PAULI_Y;
|
||||
nPauli = 2;
|
||||
control_bits--;
|
||||
break;
|
||||
case Rotation::zz:
|
||||
pauli[0] = CUSTATEVEC_PAULI_Z;
|
||||
pauli[1] = CUSTATEVEC_PAULI_Z;
|
||||
nPauli = 2;
|
||||
control_bits--;
|
||||
break;
|
||||
case Rotation::zx:
|
||||
pauli[0] = CUSTATEVEC_PAULI_Z;
|
||||
pauli[1] = CUSTATEVEC_PAULI_X;
|
||||
nPauli = 2;
|
||||
control_bits--;
|
||||
break;
|
||||
default:
|
||||
|
@ -911,7 +903,7 @@ double cuStateVecChunkContainer<data_t>::expval_pauli(
|
|||
const custatevecPauli_t *pauliOperatorsArray[] = {pauliOps};
|
||||
const int32_t *basisBitsArray[] = {qubits32};
|
||||
double ret[1];
|
||||
const uint32_t nBasisBitsArray[] = {qubits.size()};
|
||||
const uint32_t nBasisBitsArray[] = {(uint32_t)qubits.size()};
|
||||
|
||||
custatevecStatus_t err;
|
||||
err = custatevecComputeExpectationsOnPauliBasis(
|
||||
|
|
|
@ -220,7 +220,7 @@ public:
|
|||
void allocate_creg(uint_t num_mem, uint_t num_reg);
|
||||
int measured_cbit(uint_t iChunk, int qubit) {
|
||||
uint_t n64, i64, ibit;
|
||||
if (qubit >= this->num_creg_bits_)
|
||||
if ((uint_t)qubit >= this->num_creg_bits_)
|
||||
return -1;
|
||||
n64 = (this->num_creg_bits_ + 63) >> 6;
|
||||
i64 = qubit >> 6;
|
||||
|
@ -324,7 +324,6 @@ uint_t DeviceChunkContainer<data_t>::Allocate(int idev, int chunk_bits,
|
|||
bool density_matrix) {
|
||||
uint_t nc = chunks;
|
||||
uint_t i;
|
||||
int mat_bits;
|
||||
|
||||
this->chunk_bits_ = chunk_bits;
|
||||
this->num_qubits_ = num_qubits;
|
||||
|
@ -359,13 +358,10 @@ uint_t DeviceChunkContainer<data_t>::Allocate(int idev, int chunk_bits,
|
|||
|
||||
if (multi_shots) { // mult-shot parallelization for small qubits
|
||||
multi_shots_ = true;
|
||||
mat_bits = AER_DEFAULT_MATRIX_BITS;
|
||||
nc = chunks;
|
||||
num_matrices_ = chunks;
|
||||
} else {
|
||||
multi_shots_ = false;
|
||||
|
||||
mat_bits = AER_DEFAULT_MATRIX_BITS;
|
||||
num_matrices_ = 1;
|
||||
nc = chunks;
|
||||
}
|
||||
|
@ -519,7 +515,7 @@ void DeviceChunkContainer<data_t>::calculate_matrix_buffer_size(int bits,
|
|||
if (shots > AER_MAX_SAMPLING_SHOTS)
|
||||
shots = AER_MAX_SAMPLING_SHOTS;
|
||||
uint_t b = this->matrix_bits_;
|
||||
while ((1ull << (b * 2)) < shots) {
|
||||
while ((1ull << (b * 2)) < (uint_t)shots) {
|
||||
b++;
|
||||
}
|
||||
this->matrix_bits_ = b;
|
||||
|
@ -545,7 +541,7 @@ void DeviceChunkContainer<data_t>::calculate_matrix_buffer_size(int bits,
|
|||
}
|
||||
params_buffer_size_ = size;
|
||||
|
||||
if (shots > 1 && params_buffer_size_ < shots) {
|
||||
if (shots > 1 && params_buffer_size_ < (uint_t)shots) {
|
||||
params_buffer_size_ = shots;
|
||||
}
|
||||
}
|
||||
|
@ -553,10 +549,9 @@ void DeviceChunkContainer<data_t>::calculate_matrix_buffer_size(int bits,
|
|||
template <typename data_t>
|
||||
void DeviceChunkContainer<data_t>::ResizeMatrixBuffers(int bits,
|
||||
int max_shots) {
|
||||
uint_t size;
|
||||
uint_t n = num_matrices_ + this->num_buffers_;
|
||||
|
||||
if (bits != this->matrix_bits_) {
|
||||
if ((uint_t)bits != this->matrix_bits_) {
|
||||
calculate_matrix_buffer_size(bits, max_shots);
|
||||
}
|
||||
|
||||
|
@ -941,7 +936,7 @@ void DeviceChunkContainer<data_t>::set_blocked_qubits(uint_t iChunk,
|
|||
auto qubits_sorted = qubits;
|
||||
std::sort(qubits_sorted.begin(), qubits_sorted.end());
|
||||
|
||||
int i;
|
||||
uint_t i;
|
||||
for (i = 0; i < qubits.size(); i++) {
|
||||
blocked_qubits_holder_[iBlock * QV_MAX_REGISTERS + i] = qubits_sorted[i];
|
||||
}
|
||||
|
@ -1010,8 +1005,7 @@ void DeviceChunkContainer<data_t>::queue_blocked_gate(
|
|||
}
|
||||
|
||||
cvector_t<double> mat(4, 0.0);
|
||||
int i;
|
||||
uint_t idx, idxParam, iBlock;
|
||||
uint_t iBlock;
|
||||
if (iChunk >= this->num_chunks_) { // for buffer chunks
|
||||
iBlock = num_matrices_ + iChunk - this->num_chunks_;
|
||||
} else {
|
||||
|
@ -1028,7 +1022,7 @@ void DeviceChunkContainer<data_t>::queue_blocked_gate(
|
|||
params.mask_ = mask;
|
||||
params.gate_ = gate;
|
||||
params.qubit_ = 0;
|
||||
for (i = 0; i < num_blocked_qubits_[iBlock]; i++) {
|
||||
for (uint_t i = 0; i < num_blocked_qubits_[iBlock]; i++) {
|
||||
if (blocked_qubits_holder_[iBlock * QV_MAX_REGISTERS + i] == qubit) {
|
||||
params.qubit_ = i;
|
||||
break;
|
||||
|
@ -1408,8 +1402,8 @@ void DeviceChunkContainer<data_t>::copy_reduce_buffer(std::vector<double> &ret,
|
|||
count * reduce_buffer_size_, tmp.begin());
|
||||
#endif
|
||||
|
||||
for (int_t i = 0; i < count; i++) {
|
||||
for (int_t j = 0; j < num_val; j++)
|
||||
for (uint_t i = 0; i < count; i++) {
|
||||
for (uint_t j = 0; j < num_val; j++)
|
||||
ret[i * num_val + j] = tmp[i * reduce_buffer_size_ + j];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -121,7 +121,6 @@ uint_t HostChunkContainer<data_t>::Allocate(int idev, int chunk_bits,
|
|||
int matrix_bit, int max_shots,
|
||||
bool density_matrix) {
|
||||
uint_t nc = chunks;
|
||||
uint_t i;
|
||||
|
||||
ChunkContainer<data_t>::chunk_bits_ = chunk_bits;
|
||||
ChunkContainer<data_t>::num_qubits_ = num_qubits;
|
||||
|
|
|
@ -69,7 +69,10 @@ protected:
|
|||
public:
|
||||
GateFuncBase() {
|
||||
data_ = NULL;
|
||||
matrix_ = NULL;
|
||||
params_ = NULL;
|
||||
base_index_ = 0;
|
||||
chunk_bits_ = 0;
|
||||
cregs_ = NULL;
|
||||
num_creg_bits_ = 0;
|
||||
conditional_bit_ = -1;
|
||||
|
@ -147,7 +150,7 @@ public:
|
|||
template <typename data_t>
|
||||
class GateFuncWithCache : public GateFuncBase<data_t> {
|
||||
protected:
|
||||
int nqubits_;
|
||||
uint_t nqubits_;
|
||||
|
||||
public:
|
||||
GateFuncWithCache(uint_t nq) { nqubits_ = nq; }
|
||||
|
@ -210,7 +213,7 @@ public:
|
|||
template <typename data_t>
|
||||
class GateFuncSumWithCache : public GateFuncBase<data_t> {
|
||||
protected:
|
||||
int nqubits_;
|
||||
uint_t nqubits_;
|
||||
|
||||
public:
|
||||
GateFuncSumWithCache(uint_t nq) { nqubits_ = nq; }
|
||||
|
@ -276,7 +279,7 @@ public:
|
|||
: public thrust::unary_function<difference_type, difference_type> {
|
||||
difference_type stride;
|
||||
|
||||
stride_functor(difference_type stride) : stride(stride) {}
|
||||
stride_functor(difference_type _stride) : stride(_stride) {}
|
||||
|
||||
__host__ __device__ difference_type
|
||||
operator()(const difference_type &i) const {
|
||||
|
@ -301,8 +304,8 @@ public:
|
|||
typedef PermutationIterator iterator;
|
||||
|
||||
// construct strided_range for the range [first,last)
|
||||
strided_range(Iterator first, Iterator last, difference_type stride)
|
||||
: first(first), last(last), stride(stride) {}
|
||||
strided_range(Iterator _first, Iterator _last, difference_type _stride)
|
||||
: first(_first), last(_last), stride(_stride) {}
|
||||
|
||||
iterator begin(void) const {
|
||||
return PermutationIterator(
|
||||
|
@ -409,7 +412,7 @@ public:
|
|||
template <typename data_t>
|
||||
class initialize_component_func : public GateFuncBase<data_t> {
|
||||
protected:
|
||||
int nqubits;
|
||||
uint_t nqubits;
|
||||
uint_t offset;
|
||||
uint_t mat_pos;
|
||||
uint_t mat_num;
|
||||
|
@ -825,7 +828,7 @@ public:
|
|||
int qubits_count(void) { return 4; }
|
||||
|
||||
__host__ __device__ void operator()(const uint_t &i) const {
|
||||
uint_t i0, i1, i2, i3, i4, offset, f0, f1, f2;
|
||||
uint_t i0, i1, i2, i3, i4, offset;
|
||||
thrust::complex<data_t> *vec;
|
||||
thrust::complex<data_t> q0, q1, q2, q3, q4, q5, q6, q7;
|
||||
thrust::complex<data_t> q8, q9, q10, q11, q12, q13, q14, q15;
|
||||
|
@ -865,9 +868,6 @@ public:
|
|||
q15 = vec[i0 + offset3 + offset2 + offset1 + offset0];
|
||||
|
||||
offset = 0;
|
||||
f0 = 0;
|
||||
f1 = 0;
|
||||
f2 = 0;
|
||||
for (j = 0; j < 16; j++) {
|
||||
r = pMat[0 + j] * q0;
|
||||
r += pMat[16 + j] * q1;
|
||||
|
@ -936,9 +936,9 @@ public:
|
|||
template <typename data_t>
|
||||
class MatrixMultNxN_LU : public GateFuncBase<data_t> {
|
||||
protected:
|
||||
int nqubits;
|
||||
uint_t nqubits;
|
||||
uint_t matSize;
|
||||
int nswap;
|
||||
uint_t nswap;
|
||||
|
||||
public:
|
||||
MatrixMultNxN_LU(const cvector_t<double> &mat, const reg_t &qb,
|
||||
|
@ -978,7 +978,7 @@ public:
|
|||
params[nqubits + i] = j;
|
||||
}
|
||||
|
||||
if (dmax != 0) {
|
||||
if (dmax > 0) {
|
||||
c0 = matLU[(i << nqubits) + params[nqubits + i]];
|
||||
|
||||
for (j = i + 1; j < matSize; j++) {
|
||||
|
@ -1211,7 +1211,7 @@ protected:
|
|||
public:
|
||||
BatchedMatrixMult2x2(const reg_t &qubits, uint_t imat,
|
||||
uint_t nshots_per_mat) {
|
||||
int i;
|
||||
uint_t i;
|
||||
nqubits_ = qubits.size();
|
||||
|
||||
offset_ = 1ull << qubits[nqubits_ - 1];
|
||||
|
@ -1402,7 +1402,7 @@ public:
|
|||
template <typename data_t>
|
||||
class DiagonalMultNxN : public GateFuncBase<data_t> {
|
||||
protected:
|
||||
int nqubits;
|
||||
uint_t nqubits;
|
||||
|
||||
public:
|
||||
DiagonalMultNxN(const reg_t &qb) { nqubits = qb.size(); }
|
||||
|
@ -1504,7 +1504,7 @@ protected:
|
|||
public:
|
||||
BatchedDiagonalMatrixMult2x2(const reg_t &qubits, uint_t imat,
|
||||
uint_t nshots_per_mat) {
|
||||
int i;
|
||||
uint_t i;
|
||||
nqubits_ = qubits.size();
|
||||
|
||||
mask_ = (1ull << qubits[nqubits_ - 1]);
|
||||
|
@ -1557,7 +1557,6 @@ protected:
|
|||
public:
|
||||
BatchedDiagonalMatrixMultNxN(const uint_t nq, uint_t imat,
|
||||
uint_t nshots_per_mat) {
|
||||
int i;
|
||||
nqubits_ = nq;
|
||||
|
||||
matrix_begin_ = imat;
|
||||
|
@ -1894,9 +1893,8 @@ public:
|
|||
CSwapChunk_func(const reg_t &qubits, uint_t block_bits,
|
||||
thrust::complex<data_t> *pVec0,
|
||||
thrust::complex<data_t> *pVec1, bool wb) {
|
||||
int i;
|
||||
int nqubits;
|
||||
int qubit_t;
|
||||
uint_t nqubits;
|
||||
uint_t qubit_t;
|
||||
nqubits = qubits.size();
|
||||
|
||||
if (qubits[nqubits - 2] < qubits[nqubits - 1]) {
|
||||
|
@ -2078,10 +2076,8 @@ public:
|
|||
thrust::complex<data_t> q, r;
|
||||
thrust::complex<double> m;
|
||||
uint_t mat_size, irow;
|
||||
thrust::complex<data_t> *vec;
|
||||
thrust::complex<double> *pMat;
|
||||
|
||||
vec = this->data_;
|
||||
pMat = this->matrix_;
|
||||
|
||||
mat_size = 1ull << this->nqubits_;
|
||||
|
@ -2492,7 +2488,7 @@ public:
|
|||
operator()(const uint_t &i) const {
|
||||
thrust::complex<data_t> q;
|
||||
thrust::complex<data_t> *vec;
|
||||
double d, dv;
|
||||
double d, dv = 0.0;
|
||||
|
||||
vec = this->data_;
|
||||
q = vec[i];
|
||||
|
@ -2529,7 +2525,7 @@ public:
|
|||
operator()(const uint_t &i) const {
|
||||
thrust::complex<data_t> *vec;
|
||||
thrust::complex<data_t> q0;
|
||||
double d, dv;
|
||||
double d, dv = 0.0;
|
||||
|
||||
vec = this->data_;
|
||||
|
||||
|
@ -2585,7 +2581,7 @@ public:
|
|||
thrust::complex<data_t> q1;
|
||||
thrust::complex<data_t> q0p;
|
||||
thrust::complex<data_t> q1p;
|
||||
double d0, d1, ret, ret_v;
|
||||
double d0, d1, ret, ret_v = 0.0;
|
||||
uint_t idx0, idx1;
|
||||
|
||||
vec = this->data_;
|
||||
|
|
|
@ -955,7 +955,9 @@ void QubitVector<data_t>::allocate_mem(size_t data_size) {
|
|||
if (data_ == nullptr) {
|
||||
#if !defined(_WIN64) && !defined(_WIN32)
|
||||
void *data = nullptr;
|
||||
posix_memalign(&data, 64, sizeof(std::complex<data_t>) * data_size);
|
||||
if (posix_memalign(&data, 64, sizeof(std::complex<data_t>) * data_size) !=
|
||||
0)
|
||||
throw std::runtime_error("Cannot allocate memory by posix_memalign");
|
||||
data_ = reinterpret_cast<std::complex<data_t> *>(data);
|
||||
#else
|
||||
data_ = reinterpret_cast<std::complex<data_t> *>(
|
||||
|
@ -969,7 +971,8 @@ void QubitVector<data_t>::allocate_checkpoint(size_t data_size) {
|
|||
free_checkpoint();
|
||||
#if !defined(_WIN64) && !defined(_WIN32)
|
||||
void *data = nullptr;
|
||||
posix_memalign(&data, 64, sizeof(std::complex<data_t>) * data_size);
|
||||
if (posix_memalign(&data, 64, sizeof(std::complex<data_t>) * data_size) != 0)
|
||||
throw std::runtime_error("Cannot allocate memory by posix_memalign");
|
||||
checkpoint_ = reinterpret_cast<std::complex<data_t> *>(data);
|
||||
#else
|
||||
checkpoint_ = reinterpret_cast<std::complex<data_t> *>(
|
||||
|
@ -1765,13 +1768,13 @@ void QubitVector<data_t>::apply_chunk_swap(const reg_t &qubits,
|
|||
if (write_back) {
|
||||
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
|
||||
num_threads(omp_threads_)
|
||||
for (int_t k = 0; k < data_size_; ++k) {
|
||||
for (int_t k = 0; k < (int_t)data_size_; ++k) {
|
||||
std::swap(data_[k], src.data_[k]);
|
||||
}
|
||||
} else {
|
||||
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
|
||||
num_threads(omp_threads_)
|
||||
for (int_t k = 0; k < data_size_; ++k) {
|
||||
for (int_t k = 0; k < (int_t)data_size_; ++k) {
|
||||
data_[k] = src.data_[k];
|
||||
}
|
||||
}
|
||||
|
@ -1803,7 +1806,7 @@ void QubitVector<data_t>::apply_chunk_swap(const reg_t &qubits,
|
|||
if (q0 >= num_qubits_) { // exchange whole of chunk each other
|
||||
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
|
||||
num_threads(omp_threads_)
|
||||
for (int_t k = 0; k < data_size_; ++k) {
|
||||
for (int_t k = 0; k < (int_t)data_size_; ++k) {
|
||||
data_[k] = recv_buffer_[k];
|
||||
}
|
||||
} else {
|
||||
|
@ -1824,13 +1827,13 @@ void QubitVector<data_t>::apply_chunk_swap(QubitVector<data_t> &src,
|
|||
if (src.chunk_index_ == chunk_index_) {
|
||||
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
|
||||
num_threads(omp_threads_)
|
||||
for (int_t k = 0; k < size; ++k) {
|
||||
for (int_t k = 0; k < (int_t)size; ++k) {
|
||||
data_[dest_offset + k] = src.recv_buffer_[src_offset + k];
|
||||
}
|
||||
} else {
|
||||
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
|
||||
num_threads(omp_threads_)
|
||||
for (int_t k = 0; k < size; ++k) {
|
||||
for (int_t k = 0; k < (int_t)size; ++k) {
|
||||
std::swap(data_[dest_offset + k], src.data_[src_offset + k]);
|
||||
}
|
||||
}
|
||||
|
@ -1838,8 +1841,8 @@ void QubitVector<data_t>::apply_chunk_swap(QubitVector<data_t> &src,
|
|||
|
||||
template <typename data_t>
|
||||
void QubitVector<data_t>::apply_multi_swaps(const reg_t &qubits) {
|
||||
for (int_t i = 0; i < qubits.size(); i += 10) {
|
||||
int_t n = 10;
|
||||
for (uint_t i = 0; i < qubits.size(); i += 10) {
|
||||
uint_t n = 10;
|
||||
if (i + n > qubits.size())
|
||||
n = qubits.size() - i;
|
||||
|
||||
|
@ -1850,17 +1853,17 @@ void QubitVector<data_t>::apply_multi_swaps(const reg_t &qubits) {
|
|||
|
||||
auto lambda = [&](const indexes_t &inds) -> void {
|
||||
cvector_t<data_t> cache(size);
|
||||
for (int_t i = 0; i < size; i++)
|
||||
cache[i] = data_[inds[i]];
|
||||
for (uint_t ii = 0; ii < size; ii++)
|
||||
cache[ii] = data_[inds[ii]];
|
||||
|
||||
for (int_t i = 0; i < size; i++) {
|
||||
uint_t pos = i;
|
||||
for (int_t j = 0; j < nq; j += 2) {
|
||||
for (uint_t ii = 0; ii < size; ii++) {
|
||||
uint_t pos = ii;
|
||||
for (uint_t j = 0; j < nq; j += 2) {
|
||||
if ((((pos >> j) & 1) ^ ((pos >> (j + 1)) & 1)) != 0) {
|
||||
pos ^= ((1ull << j) | (1ull << (j + 1)));
|
||||
}
|
||||
}
|
||||
data_[inds[i]] = cache[pos];
|
||||
data_[inds[ii]] = cache[pos];
|
||||
}
|
||||
};
|
||||
apply_lambda(lambda, qubits_swap);
|
||||
|
|
|
@ -819,17 +819,17 @@ void QubitVectorThrust<data_t>::initialize_component(
|
|||
std::sort(qubits_sorted.begin(), qubits_sorted.end());
|
||||
|
||||
auto qubits_param = qubits;
|
||||
int i;
|
||||
uint_t i;
|
||||
for (i = 0; i < qubits.size(); i++)
|
||||
qubits_param.push_back(qubits_sorted[i]);
|
||||
|
||||
int nbit = chunk_.container()->matrix_bits();
|
||||
uint_t nbit = chunk_.container()->matrix_bits();
|
||||
if (nbit > qubits.size())
|
||||
nbit = qubits.size();
|
||||
|
||||
uint_t dim = 1ull << qubits.size();
|
||||
uint_t sub_dim = 1ull << nbit;
|
||||
for (uint_t i = 0; i < dim; i += sub_dim) {
|
||||
for (i = 0; i < dim; i += sub_dim) {
|
||||
cvector_t<double> state(sub_dim);
|
||||
for (uint_t j = 0; j < sub_dim; j++)
|
||||
state[j] = state0[dim - sub_dim - i + j];
|
||||
|
@ -872,7 +872,7 @@ uint_t QubitVectorThrust<data_t>::chunk_setup(int chunk_bits, int num_qubits,
|
|||
|
||||
if (chunk_manager_->chunk_bits() == chunk_bits &&
|
||||
chunk_manager_->num_qubits() == num_qubits) {
|
||||
bool mapped = chunk_manager_->MapChunk(chunk_, 0);
|
||||
chunk_manager_->MapChunk(chunk_, 0);
|
||||
chunk_.set_chunk_index(chunk_index_);
|
||||
return num_local_chunks;
|
||||
}
|
||||
|
@ -903,8 +903,8 @@ uint_t QubitVectorThrust<data_t>::chunk_setup(int chunk_bits, int num_qubits,
|
|||
recv_chunk_.unmap();
|
||||
|
||||
// mapping/setting chunk
|
||||
bool mapped = chunk_manager_->MapChunk(chunk_, 0);
|
||||
chunk_.set_chunk_index(chunk_index_);
|
||||
chunk_manager_->MapChunk(chunk_, 0);
|
||||
|
||||
return num_chunks_allocated;
|
||||
}
|
||||
|
@ -932,7 +932,7 @@ QubitVectorThrust<data_t>::chunk_setup(const QubitVectorThrust<data_t> &base,
|
|||
|
||||
// mapping/setting chunk
|
||||
chunk_manager_ = base.chunk_manager_;
|
||||
bool mapped = chunk_manager_->MapChunk(chunk_, 0);
|
||||
chunk_manager_->MapChunk(chunk_, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1260,7 +1260,7 @@ void QubitVectorThrust<data_t>::initialize_from_vector(const list_t &statevec) {
|
|||
int_t i;
|
||||
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
|
||||
num_threads(omp_threads_)
|
||||
for (i = 0; i < data_size_; i++) {
|
||||
for (i = 0; i < (int_t)data_size_; i++) {
|
||||
tmp[i] = statevec[i];
|
||||
}
|
||||
initialize_from_data(&tmp[0], tmp.size());
|
||||
|
@ -1322,7 +1322,7 @@ void QubitVectorThrust<data_t>::initialize_creg(
|
|||
if (chunk_.pos() == 0) {
|
||||
chunk_.container()->allocate_creg(num_cmem_bits_, num_creg_bits_);
|
||||
|
||||
int_t i;
|
||||
uint_t i;
|
||||
for (i = 0; i < num_register; i++) {
|
||||
if (register_hex[register_hex.size() - 1 - i] == '0') {
|
||||
store_cregister(i, 0);
|
||||
|
@ -1528,7 +1528,6 @@ void QubitVectorThrust<data_t>::apply_multiplexer(
|
|||
for (const auto &q : control_qubits) {
|
||||
qubits.push_back(q);
|
||||
}
|
||||
size_t N = qubits.size();
|
||||
|
||||
cvector_t<double> matMP(DIM * DIM, 0.0);
|
||||
uint_t b, i, j;
|
||||
|
@ -1627,7 +1626,7 @@ void QubitVectorThrust<data_t>::apply_mcx(const reg_t &qubits) {
|
|||
return;
|
||||
|
||||
if (register_blocking_) {
|
||||
int i;
|
||||
uint_t i;
|
||||
uint_t mask = 0;
|
||||
for (i = 0; i < qubits.size() - 1; i++) {
|
||||
mask |= (1ull << qubits[i]);
|
||||
|
@ -1645,7 +1644,7 @@ void QubitVectorThrust<data_t>::apply_mcy(const reg_t &qubits) {
|
|||
return;
|
||||
|
||||
if (register_blocking_) {
|
||||
int i;
|
||||
uint_t i;
|
||||
uint_t mask = 0;
|
||||
for (i = 0; i < qubits.size() - 1; i++) {
|
||||
mask |= (1ull << qubits[i]);
|
||||
|
@ -1678,7 +1677,7 @@ template <typename data_t>
|
|||
void QubitVectorThrust<data_t>::apply_chunk_swap(const reg_t &qubits,
|
||||
QubitVectorThrust<data_t> &src,
|
||||
bool write_back) {
|
||||
int q0, q1, t;
|
||||
uint_t q0, q1, t;
|
||||
|
||||
q0 = qubits[0];
|
||||
q1 = qubits[1];
|
||||
|
@ -1759,7 +1758,7 @@ void QubitVectorThrust<data_t>::apply_chunk_swap(const reg_t &qubits,
|
|||
template <typename data_t>
|
||||
void QubitVectorThrust<data_t>::apply_chunk_swap(const reg_t &qubits,
|
||||
uint_t remote_chunk_index) {
|
||||
int q0, q1, t;
|
||||
uint_t q0, q1, t;
|
||||
|
||||
q0 = qubits[qubits.size() - 2];
|
||||
q1 = qubits[qubits.size() - 1];
|
||||
|
@ -1840,7 +1839,7 @@ void QubitVectorThrust<data_t>::apply_mcphase(
|
|||
return;
|
||||
|
||||
if (register_blocking_) {
|
||||
int i;
|
||||
uint_t i;
|
||||
uint_t mask = 0;
|
||||
for (i = 0; i < qubits.size() - 1; i++) {
|
||||
mask |= (1ull << qubits[i]);
|
||||
|
@ -1875,7 +1874,7 @@ void QubitVectorThrust<data_t>::apply_mcu(const reg_t &qubits,
|
|||
return;
|
||||
} else {
|
||||
if (register_blocking_) {
|
||||
int i;
|
||||
uint_t i;
|
||||
uint_t mask = 0;
|
||||
for (i = 0; i < qubits.size() - 1; i++) {
|
||||
mask |= (1ull << qubits[i]);
|
||||
|
@ -1897,7 +1896,7 @@ void QubitVectorThrust<data_t>::apply_mcu(const reg_t &qubits,
|
|||
return;
|
||||
} else {
|
||||
if (register_blocking_) {
|
||||
int i;
|
||||
uint_t i;
|
||||
uint_t mask = 0;
|
||||
for (i = 0; i < qubits.size() - 1; i++) {
|
||||
mask |= (1ull << qubits[i]);
|
||||
|
@ -2252,7 +2251,7 @@ template <typename data_t>
|
|||
void QubitVectorThrust<data_t>::apply_batched_measure(
|
||||
const reg_t &qubits, std::vector<RngEngine> &rng, const reg_t &cmemory,
|
||||
const reg_t &cregs) {
|
||||
const int_t DIM = 1 << qubits.size();
|
||||
const uint_t DIM = 1 << qubits.size();
|
||||
uint_t i, count = 1;
|
||||
if (enable_batch_) {
|
||||
if (chunk_.pos() != 0) {
|
||||
|
@ -2386,7 +2385,7 @@ public:
|
|||
template <typename data_t>
|
||||
void QubitVectorThrust<data_t>::apply_batched_reset(
|
||||
const reg_t &qubits, std::vector<RngEngine> &rng) {
|
||||
const int_t DIM = 1 << qubits.size();
|
||||
const uint_t DIM = 1 << qubits.size();
|
||||
uint_t i, count = 1;
|
||||
if (enable_batch_) {
|
||||
if (chunk_.pos() != 0) {
|
||||
|
@ -2547,7 +2546,6 @@ public:
|
|||
uint_t *mask;
|
||||
uint_t val = 1;
|
||||
n64 = (this->num_creg_bits_ + 63) >> 6;
|
||||
int j;
|
||||
|
||||
mask = this->params_;
|
||||
|
||||
|
@ -2686,7 +2684,7 @@ void QubitVectorThrust<data_t>::batched_expval_pauli(
|
|||
std::vector<double> &val, const reg_t &qubits, const std::string &pauli,
|
||||
bool variance, std::complex<double> param, bool last,
|
||||
const complex_t initial_phase) const {
|
||||
uint_t i, count = 1;
|
||||
uint_t count = 1;
|
||||
if (enable_batch_) {
|
||||
if (chunk_.pos() != 0) {
|
||||
return; // first chunk execute all in batch
|
||||
|
@ -2898,12 +2896,11 @@ void QubitVectorThrust<data_t>::apply_batched_pauli_ops(
|
|||
}
|
||||
uint_t count = ops.size();
|
||||
int num_inner_threads = omp_get_max_threads() / num_threads_per_group_;
|
||||
int_t i;
|
||||
|
||||
reg_t params(4 * count);
|
||||
|
||||
auto count_paulis = [this, ¶ms, ops](int_t i) {
|
||||
int_t j;
|
||||
uint_t j;
|
||||
uint_t x_max = 0;
|
||||
uint_t num_y = 0;
|
||||
uint_t x_mask = 0;
|
||||
|
@ -2975,7 +2972,6 @@ public:
|
|||
thrust::complex<data_t> q0, q1;
|
||||
thrust::complex<data_t> *vec0;
|
||||
thrust::complex<data_t> *vec1;
|
||||
double p, p0, p1, rnd;
|
||||
|
||||
uint_t iChunk = i >> this->chunk_bits_;
|
||||
double scale =
|
||||
|
@ -3012,7 +3008,7 @@ public:
|
|||
__host__ __device__ void
|
||||
run_with_cache(uint_t _tid, uint_t _idx,
|
||||
thrust::complex<data_t> *_cache) const {
|
||||
uint_t j, threadID;
|
||||
uint_t j;
|
||||
thrust::complex<data_t> q, r;
|
||||
thrust::complex<double> m;
|
||||
uint_t mat_size, irow;
|
||||
|
@ -3066,7 +3062,6 @@ public:
|
|||
__host__ __device__ void operator()(const uint_t &i) const {
|
||||
uint_t iChunk = i;
|
||||
double p0, p1, rnd;
|
||||
bool mult = false;
|
||||
|
||||
p0 = reduce_[iChunk * reduce_buf_size_];
|
||||
probs_[iChunk + QV_RESET_CURRENT_PROB * prob_buf_size_] = p0;
|
||||
|
@ -3103,7 +3098,6 @@ void QubitVectorThrust<data_t>::apply_batched_kraus(
|
|||
std::vector<RngEngine> &rng) {
|
||||
const size_t N = qubits.size();
|
||||
uint_t i, count;
|
||||
double ret;
|
||||
|
||||
count = chunk_.container()->num_chunks();
|
||||
|
||||
|
@ -3266,7 +3260,7 @@ void QubitVectorThrust<data_t>::apply_bfunc(const Operations::Op &op) {
|
|||
return; // first chunk execute all in batch
|
||||
|
||||
reg_t params;
|
||||
int_t i, n64, n, iparam;
|
||||
uint_t i, n64, n, iparam;
|
||||
|
||||
// registers to be updated
|
||||
for (i = 0; i < op.registers.size(); i++)
|
||||
|
@ -3377,7 +3371,7 @@ void QubitVectorThrust<data_t>::apply_roerror(const Operations::Op &op,
|
|||
|
||||
reg_t params;
|
||||
std::vector<double> probs;
|
||||
int_t i, j, offset;
|
||||
uint_t i, offset;
|
||||
|
||||
for (i = 0; i < op.memory.size(); i++)
|
||||
params.push_back(op.memory[i]);
|
||||
|
|
|
@ -43,7 +43,7 @@ namespace {
|
|||
/** Remember we cannot use STL (or memcpy) **/
|
||||
template <typename T, typename U>
|
||||
void copy(T dest, const U orig, size_t size) {
|
||||
for (auto i = 0; i < size; ++i)
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
dest[i] = orig[i];
|
||||
}
|
||||
|
||||
|
@ -1114,7 +1114,8 @@ Avx apply_diagonal_matrix_avx<double>(
|
|||
#endif
|
||||
#if !defined(_WIN64) && !defined(_WIN32)
|
||||
void *data = nullptr;
|
||||
posix_memalign(&data, 64, sizeof(std::complex<double>) * 2);
|
||||
if (posix_memalign(&data, 64, sizeof(std::complex<double>) * 2) != 0)
|
||||
throw std::runtime_error("Cannot allocate memory by posix_memalign");
|
||||
auto double_tmp = reinterpret_cast<std::complex<double> *>(data);
|
||||
#else
|
||||
auto double_tmp = reinterpret_cast<std::complex<double> *>(
|
||||
|
@ -1122,7 +1123,7 @@ Avx apply_diagonal_matrix_avx<double>(
|
|||
#endif
|
||||
|
||||
size_t q0_mask_ = 0;
|
||||
for (int i = 0; i < qregs_size; ++i) {
|
||||
for (size_t i = 0; i < qregs_size; ++i) {
|
||||
if (qregs[i] == 0) {
|
||||
q0_mask_ = 1UL << i;
|
||||
break;
|
||||
|
@ -1135,9 +1136,9 @@ Avx apply_diagonal_matrix_avx<double>(
|
|||
|
||||
#pragma omp for
|
||||
for (int64_t k = 0; k < END; k += 1) {
|
||||
const auto base = k << (batch + 1);
|
||||
const auto until = base + (1UL << (batch + 1));
|
||||
for (auto i = base; i < until; i += 2) {
|
||||
const int64_t base = k << (batch + 1);
|
||||
const int64_t until = base + (1UL << (batch + 1));
|
||||
for (int64_t i = base; i < until; i += 2) {
|
||||
auto tgt_qv_data =
|
||||
_mm256_load(reinterpret_cast<double *>(&(qv_data[i])));
|
||||
auto input_data = _load_diagonal_input(input_vec, double_tmp, i, qregs,
|
||||
|
@ -1171,7 +1172,8 @@ Avx apply_diagonal_matrix_avx<float>(float *qv_data_, const uint64_t data_size,
|
|||
{
|
||||
#if !defined(_WIN64) && !defined(_WIN32)
|
||||
void *data = nullptr;
|
||||
posix_memalign(&data, 64, sizeof(std::complex<float>) * 4);
|
||||
if (posix_memalign(&data, 64, sizeof(std::complex<float>) * 4) != 0)
|
||||
throw std::runtime_error("Cannot allocate memory by posix_memalign");
|
||||
auto float_tmp = reinterpret_cast<std::complex<float> *>(data);
|
||||
#else
|
||||
auto float_tmp = reinterpret_cast<std::complex<float> *>(
|
||||
|
@ -1199,9 +1201,9 @@ Avx apply_diagonal_matrix_avx<float>(float *qv_data_, const uint64_t data_size,
|
|||
|
||||
#pragma omp for
|
||||
for (int64_t k = 0; k < END; k += 1) {
|
||||
const auto base = k << (batch + 2);
|
||||
const auto until = base + (1UL << (batch + 2));
|
||||
for (auto i = base; i < until; i += 4) {
|
||||
const int64_t base = k << (batch + 2);
|
||||
const int64_t until = base + (1UL << (batch + 2));
|
||||
for (int64_t i = base; i < until; i += 4) {
|
||||
m256_t<float> tgt_qv_data =
|
||||
_mm256_load(reinterpret_cast<float *>(&(qv_data[i])));
|
||||
auto input_data = _load_diagonal_input(input_vec, float_tmp, i, qregs,
|
||||
|
|
|
@ -41,6 +41,7 @@ class Executor : public CircuitExecutor::ParallelStateExecutor<state_t>,
|
|||
using Base = CircuitExecutor::MultiStateExecutor<state_t>;
|
||||
using BasePar = CircuitExecutor::ParallelStateExecutor<state_t>;
|
||||
using BaseBatch = CircuitExecutor::BatchShotsExecutor<state_t>;
|
||||
using Base::sample_measure;
|
||||
|
||||
protected:
|
||||
public:
|
||||
|
@ -434,7 +435,7 @@ bool Executor<state_t>::apply_branching_op(CircuitExecutor::Branch &root,
|
|||
|
||||
template <class state_t>
|
||||
void Executor<state_t>::initialize_qreg(uint_t num_qubits) {
|
||||
int_t i;
|
||||
uint_t i;
|
||||
|
||||
for (i = 0; i < Base::states_.size(); i++) {
|
||||
Base::states_[i].qreg().set_num_qubits(BasePar::chunk_bits_);
|
||||
|
@ -442,8 +443,8 @@ void Executor<state_t>::initialize_qreg(uint_t num_qubits) {
|
|||
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
|
||||
if (Base::global_state_index_ + iChunk == 0 ||
|
||||
this->num_qubits_ == this->chunk_bits_) {
|
||||
|
@ -482,7 +483,7 @@ auto Executor<state_t>::move_to_vector(void) {
|
|||
state.resize(Base::num_local_states_ << BasePar::chunk_bits_);
|
||||
|
||||
#pragma omp parallel for if (BasePar::chunk_omp_parallel_) private(iChunk)
|
||||
for (iChunk = 1; iChunk < Base::states_.size(); iChunk++) {
|
||||
for (iChunk = 1; iChunk < (int_t)Base::states_.size(); iChunk++) {
|
||||
auto tmp = Base::states_[iChunk].qreg().move_to_vector();
|
||||
uint_t j, offset = iChunk << BasePar::chunk_bits_;
|
||||
for (j = 0; j < tmp.size(); j++) {
|
||||
|
@ -511,7 +512,7 @@ auto Executor<state_t>::copy_to_vector(void) {
|
|||
state.resize(Base::num_local_states_ << BasePar::chunk_bits_);
|
||||
|
||||
#pragma omp parallel for if (BasePar::chunk_omp_parallel_) private(iChunk)
|
||||
for (iChunk = 1; iChunk < Base::states_.size(); iChunk++) {
|
||||
for (iChunk = 1; iChunk < (int_t)Base::states_.size(); iChunk++) {
|
||||
auto tmp = Base::states_[iChunk].qreg().copy_to_vector();
|
||||
uint_t j, offset = iChunk << BasePar::chunk_bits_;
|
||||
for (j = 0; j < tmp.size(); j++) {
|
||||
|
@ -553,12 +554,12 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
|
|||
reg_t qubits_out_chunk;
|
||||
std::string pauli_in_chunk;
|
||||
std::string pauli_out_chunk;
|
||||
int_t i, n;
|
||||
uint_t n;
|
||||
double expval(0.);
|
||||
|
||||
// get inner/outer chunk pauli string
|
||||
n = pauli.size();
|
||||
for (i = 0; i < n; i++) {
|
||||
for (uint_t i = 0; i < n; i++) {
|
||||
if (qubits[i] < BasePar::chunk_bits_) {
|
||||
qubits_in_chunk.push_back(qubits[i]);
|
||||
pauli_in_chunk.push_back(pauli[n - i - 1]);
|
||||
|
@ -583,17 +584,17 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
|
|||
if (x_mask != 0) { // pairing state is out of chunk
|
||||
bool on_same_process = true;
|
||||
#ifdef AER_MPI
|
||||
int proc_bits = 0;
|
||||
uint_t proc_bits = 0;
|
||||
uint_t procs = Base::distributed_procs_;
|
||||
while (procs > 1) {
|
||||
if ((procs & 1) != 0) {
|
||||
proc_bits = -1;
|
||||
proc_bits = 0;
|
||||
break;
|
||||
}
|
||||
proc_bits++;
|
||||
procs >>= 1;
|
||||
}
|
||||
if (x_mask & (~((1ull << (Base::num_qubits_ - proc_bits)) - 1)) !=
|
||||
if ((x_mask & (~((1ull << (Base::num_qubits_ - proc_bits)) - 1))) !=
|
||||
0) { // data exchange between processes is required
|
||||
on_same_process = false;
|
||||
}
|
||||
|
@ -609,8 +610,8 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
|
|||
auto apply_expval_pauli_chunk = [this, x_mask, z_mask, x_max, mask_u,
|
||||
mask_l, qubits_in_chunk,
|
||||
pauli_in_chunk, phase](int_t iGroup) {
|
||||
double expval = 0.0;
|
||||
for (int_t iChunk = Base::top_state_of_group_[iGroup];
|
||||
double expval_t = 0.0;
|
||||
for (uint_t iChunk = Base::top_state_of_group_[iGroup];
|
||||
iChunk < Base::top_state_of_group_[iGroup + 1]; iChunk++) {
|
||||
uint_t pair_chunk = iChunk ^ x_mask;
|
||||
if (iChunk < pair_chunk) {
|
||||
|
@ -618,20 +619,20 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
|
|||
z_count = AER::Utils::popcount(iChunk & z_mask);
|
||||
z_count_pair = AER::Utils::popcount(pair_chunk & z_mask);
|
||||
|
||||
expval += Base::states_[iChunk - Base::global_state_index_]
|
||||
expval_t += Base::states_[iChunk - Base::global_state_index_]
|
||||
.qreg()
|
||||
.expval_pauli(qubits_in_chunk, pauli_in_chunk,
|
||||
Base::states_[pair_chunk].qreg(),
|
||||
z_count, z_count_pair, phase);
|
||||
}
|
||||
}
|
||||
return expval;
|
||||
return expval_t;
|
||||
};
|
||||
expval += Utils::apply_omp_parallel_for_reduction(
|
||||
(BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1), 0,
|
||||
Base::num_global_states_ / 2, apply_expval_pauli_chunk);
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::num_global_states_ / 2; i++) {
|
||||
for (uint_t i = 0; i < Base::num_global_states_ / 2; i++) {
|
||||
uint_t iChunk = ((i << 1) & mask_u) | (i & mask_l);
|
||||
uint_t pair_chunk = iChunk ^ x_mask;
|
||||
uint_t iProc = BasePar::get_process_by_chunk(pair_chunk);
|
||||
|
@ -675,9 +676,9 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
|
|||
z_mask >>= BasePar::chunk_bits_;
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for reduction(+ : expval)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
double e_tmp = 0.0;
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
|
||||
double sign = 1.0;
|
||||
if (z_mask && (AER::Utils::popcount(
|
||||
|
@ -690,7 +691,7 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
|
|||
expval += e_tmp;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < Base::states_.size(); i++) {
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++) {
|
||||
double sign = 1.0;
|
||||
if (z_mask &&
|
||||
(AER::Utils::popcount((i + Base::global_state_index_) & z_mask) &
|
||||
|
@ -704,15 +705,15 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
|
|||
} else { // all bits are inside chunk
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for reduction(+ : expval)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
double e_tmp = 0.0;
|
||||
for (int_t iChunk = Base::top_state_of_group_[ig];
|
||||
for (uint_t iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
|
||||
e_tmp += Base::states_[iChunk].qreg().expval_pauli(qubits, pauli);
|
||||
expval += e_tmp;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
expval += Base::states_[i].qreg().expval_pauli(qubits, pauli);
|
||||
}
|
||||
}
|
||||
|
@ -777,10 +778,10 @@ void Executor<state_t>::apply_save_density_matrix(const Operations::Op &op,
|
|||
double sum = 0.0;
|
||||
if (BasePar::chunk_omp_parallel_) {
|
||||
#pragma omp parallel for reduction(+ : sum)
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (int_t i = 0; i < (int_t)Base::states_.size(); i++)
|
||||
sum += Base::states_[i].qreg().norm();
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
sum += Base::states_[i].qreg().norm();
|
||||
}
|
||||
#ifdef AER_MPI
|
||||
|
@ -906,7 +907,7 @@ template <class state_t>
|
|||
rvector_t Executor<state_t>::measure_probs(const reg_t &qubits) const {
|
||||
uint_t dim = 1ull << qubits.size();
|
||||
rvector_t sum(dim, 0.0);
|
||||
int_t i, j, k;
|
||||
uint_t i, j, k;
|
||||
reg_t qubits_in_chunk;
|
||||
reg_t qubits_out_chunk;
|
||||
|
||||
|
@ -916,8 +917,8 @@ rvector_t Executor<state_t>::measure_probs(const reg_t &qubits) const {
|
|||
if (qubits_in_chunk.size() > 0) {
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for private(i, j, k)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t i = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++) {
|
||||
auto chunkSum =
|
||||
Base::states_[i].qreg().probabilities(qubits_in_chunk);
|
||||
|
@ -983,8 +984,8 @@ rvector_t Executor<state_t>::measure_probs(const reg_t &qubits) const {
|
|||
} else { // there is no bit in chunk
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for private(i, j, k)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t i = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++) {
|
||||
auto nr = std::real(Base::states_[i].qreg().norm());
|
||||
int idx = 0;
|
||||
|
@ -1002,7 +1003,7 @@ rvector_t Executor<state_t>::measure_probs(const reg_t &qubits) const {
|
|||
} else {
|
||||
for (i = 0; i < Base::states_.size(); i++) {
|
||||
auto nr = std::real(Base::states_[i].qreg().norm());
|
||||
int idx = 0;
|
||||
uint_t idx = 0;
|
||||
for (k = 0; k < qubits_out_chunk.size(); k++) {
|
||||
if ((((i + Base::global_state_index_) << (BasePar::chunk_bits_)) >>
|
||||
qubits_out_chunk[k]) &
|
||||
|
@ -1058,14 +1059,14 @@ void Executor<state_t>::measure_reset_update(const std::vector<uint_t> &qubits,
|
|||
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].apply_diagonal_matrix(qubits, mdiag);
|
||||
}
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].apply_diagonal_matrix(qubits, mdiag);
|
||||
}
|
||||
|
@ -1085,14 +1086,14 @@ void Executor<state_t>::measure_reset_update(const std::vector<uint_t> &qubits,
|
|||
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].apply_diagonal_matrix(qubits, mdiag);
|
||||
}
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].apply_diagonal_matrix(qubits, mdiag);
|
||||
}
|
||||
|
@ -1120,20 +1121,20 @@ void Executor<state_t>::measure_reset_update(const std::vector<uint_t> &qubits,
|
|||
// apply permutation to swap state
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].qreg().apply_matrix(qubits, perm);
|
||||
}
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].qreg().apply_matrix(qubits, perm);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < qubits.size(); i++) {
|
||||
for (int_t i = 0; i < (int_t)qubits.size(); i++) {
|
||||
if (((final_state >> i) & 1) != ((meas_state >> i) & 1)) {
|
||||
BasePar::apply_chunk_x(qubits[i]);
|
||||
}
|
||||
|
@ -1147,7 +1148,7 @@ template <class state_t>
|
|||
std::vector<reg_t> Executor<state_t>::sample_measure(const reg_t &qubits,
|
||||
uint_t shots,
|
||||
RngEngine &rng) const {
|
||||
int_t i, j;
|
||||
uint_t i, j;
|
||||
// Generate flat register for storing
|
||||
std::vector<double> rnds;
|
||||
rnds.reserve(shots);
|
||||
|
@ -1162,8 +1163,8 @@ std::vector<reg_t> Executor<state_t>::sample_measure(const reg_t &qubits,
|
|||
// calculate per chunk sum
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++) {
|
||||
bool batched = Base::states_[ic].qreg().enable_batch(
|
||||
true); // return sum of all chunks in group
|
||||
|
@ -1172,8 +1173,8 @@ std::vector<reg_t> Executor<state_t>::sample_measure(const reg_t &qubits,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++) {
|
||||
bool batched = Base::states_[ic].qreg().enable_batch(
|
||||
true); // return sum of all chunks in group
|
||||
|
@ -1271,9 +1272,9 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
|
|||
auto apply_global_phase = [&tmp, ¶ms_in, global_phase](int_t i) {
|
||||
tmp[i] = params_in[i] * global_phase;
|
||||
};
|
||||
Utils::apply_omp_parallel_for((qubits.size() > Base::omp_qubit_threshold_),
|
||||
0, params_in.size(), apply_global_phase,
|
||||
Base::parallel_state_update_);
|
||||
Utils::apply_omp_parallel_for(
|
||||
(qubits.size() > (uint_t)Base::omp_qubit_threshold_), 0,
|
||||
params_in.size(), apply_global_phase, Base::parallel_state_update_);
|
||||
}
|
||||
const cvector_t ¶ms = tmp.empty() ? params_in : tmp;
|
||||
if (qubits.size() == Base::num_qubits_) {
|
||||
|
@ -1296,13 +1297,13 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
|
|||
if (qubits_out_chunk.size() == 0) { // no qubits outside of chunk
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t i = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++)
|
||||
Base::states_[i].qreg().initialize_component(qubits, params);
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().initialize_component(qubits, params);
|
||||
}
|
||||
} else {
|
||||
|
@ -1311,16 +1312,16 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
|
|||
// scatter inside chunks
|
||||
const size_t dim = 1ULL << qubits_in_chunk.size();
|
||||
cvector_t perm(dim * dim, 0.);
|
||||
for (int_t i = 0; i < dim; i++) {
|
||||
for (uint_t i = 0; i < dim; i++) {
|
||||
perm[i] = 1.0;
|
||||
}
|
||||
|
||||
if (BasePar::chunk_omp_parallel_) {
|
||||
#pragma omp parallel for
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (int_t i = 0; i < (int_t)Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().apply_matrix(qubits_in_chunk, perm);
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().apply_matrix(qubits_in_chunk, perm);
|
||||
}
|
||||
}
|
||||
|
@ -1329,7 +1330,8 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
|
|||
auto sorted_qubits_out = qubits_out_chunk;
|
||||
std::sort(sorted_qubits_out.begin(), sorted_qubits_out.end());
|
||||
|
||||
for (int_t i = 0; i < (1ull << (Base::num_qubits_ - BasePar::chunk_bits_ -
|
||||
for (uint_t i = 0;
|
||||
i < (1ull << (Base::num_qubits_ - BasePar::chunk_bits_ -
|
||||
qubits_out_chunk.size()));
|
||||
i++) {
|
||||
uint_t baseChunk = 0;
|
||||
|
@ -1344,7 +1346,7 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
|
|||
baseChunk >>= BasePar::chunk_bits_;
|
||||
|
||||
for (j = 1; j < (1ull << qubits_out_chunk.size()); j++) {
|
||||
int_t ic = baseChunk;
|
||||
uint_t ic = baseChunk;
|
||||
for (t = 0; t < qubits_out_chunk.size(); t++) {
|
||||
if ((j >> t) & 1)
|
||||
ic += (1ull << (qubits_out_chunk[t] - BasePar::chunk_bits_));
|
||||
|
@ -1385,13 +1387,13 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
|
|||
// initialize by params
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t i = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++)
|
||||
Base::states_[i].qreg().apply_diagonal_matrix(qubits, params);
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
Base::states_[i].qreg().apply_diagonal_matrix(qubits, params);
|
||||
}
|
||||
}
|
||||
|
@ -1402,7 +1404,7 @@ void Executor<state_t>::initialize_from_vector(const cvector_t ¶ms) {
|
|||
uint_t local_offset = Base::global_state_index_ << BasePar::chunk_bits_;
|
||||
|
||||
#pragma omp parallel for if (BasePar::chunk_omp_parallel_)
|
||||
for (int_t i = 0; i < Base::states_.size(); i++) {
|
||||
for (int_t i = 0; i < (int_t)Base::states_.size(); i++) {
|
||||
// copy part of state for this chunk
|
||||
cvector_t tmp(1ull << BasePar::chunk_bits_);
|
||||
std::copy(params.begin() + local_offset + (i << BasePar::chunk_bits_),
|
||||
|
@ -1443,13 +1445,13 @@ void Executor<state_t>::apply_kraus(const reg_t &qubits,
|
|||
p = 0.0;
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for reduction(+ : p)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t i = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t i = Base::top_state_of_group_[ig];
|
||||
i < Base::top_state_of_group_[ig + 1]; i++)
|
||||
p += Base::states_[i].qreg().norm(qubits, vmat);
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < Base::states_.size(); i++)
|
||||
for (uint_t i = 0; i < Base::states_.size(); i++)
|
||||
p += Base::states_[i].qreg().norm(qubits, vmat);
|
||||
}
|
||||
|
||||
|
@ -1465,14 +1467,14 @@ void Executor<state_t>::apply_kraus(const reg_t &qubits,
|
|||
// apply Kraus projection operator
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].qreg().apply_matrix(qubits, vmat);
|
||||
}
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].qreg().apply_matrix(qubits, vmat);
|
||||
}
|
||||
|
@ -1489,14 +1491,14 @@ void Executor<state_t>::apply_kraus(const reg_t &qubits,
|
|||
auto vmat = Utils::vectorize_matrix(renorm * kmats.back());
|
||||
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].qreg().apply_matrix(qubits, vmat);
|
||||
}
|
||||
} else {
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ic = Base::top_state_of_group_[ig];
|
||||
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (uint_t ic = Base::top_state_of_group_[ig];
|
||||
ic < Base::top_state_of_group_[ig + 1]; ic++)
|
||||
Base::states_[ic].qreg().apply_matrix(qubits, vmat);
|
||||
}
|
||||
|
@ -1513,7 +1515,7 @@ Executor<state_t>::sample_measure_with_prob(CircuitExecutor::Branch &root,
|
|||
uint_t nshots = root.num_shots();
|
||||
reg_t shot_branch(nshots);
|
||||
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
shot_branch[i] = root.rng_shots()[i].rand_int(probs);
|
||||
}
|
||||
|
||||
|
@ -1547,11 +1549,11 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
|
|||
root.branches()[i]->add_op_after_branch(op);
|
||||
|
||||
if (final_state >= 0 && final_state != i) {
|
||||
Operations::Op op;
|
||||
op.type = OpType::gate;
|
||||
op.name = "mcx";
|
||||
op.qubits = qubits;
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
Operations::Op op2;
|
||||
op2.type = OpType::gate;
|
||||
op2.name = "mcx";
|
||||
op2.qubits = qubits;
|
||||
root.branches()[i]->add_op_after_branch(op2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1559,7 +1561,7 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
|
|||
else {
|
||||
// Diagonal matrix for projecting and renormalizing to measurement outcome
|
||||
const size_t dim = 1ULL << qubits.size();
|
||||
for (int_t i = 0; i < dim; i++) {
|
||||
for (uint_t i = 0; i < dim; i++) {
|
||||
cvector_t mdiag(dim, 0.);
|
||||
mdiag[i] = 1. / std::sqrt(meas_probs[i]);
|
||||
|
||||
|
@ -1569,20 +1571,20 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
|
|||
op.params = mdiag;
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
|
||||
if (final_state >= 0 && final_state != i) {
|
||||
if (final_state >= 0 && final_state != (int_t)i) {
|
||||
// build vectorized permutation matrix
|
||||
cvector_t perm(dim * dim, 0.);
|
||||
perm[final_state * dim + i] = 1.;
|
||||
perm[i * dim + final_state] = 1.;
|
||||
for (size_t j = 0; j < dim; j++) {
|
||||
if (j != final_state && j != i)
|
||||
for (uint_t j = 0; j < dim; j++) {
|
||||
if ((int_t)j != final_state && j != i)
|
||||
perm[j * dim + j] = 1.;
|
||||
}
|
||||
Operations::Op op;
|
||||
op.type = OpType::matrix;
|
||||
op.qubits = qubits;
|
||||
op.mats.push_back(Utils::devectorize_matrix(perm));
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
Operations::Op op2;
|
||||
op2.type = OpType::matrix;
|
||||
op2.qubits = qubits;
|
||||
op2.mats.push_back(Utils::devectorize_matrix(perm));
|
||||
root.branches()[i]->add_op_after_branch(op2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1595,7 +1597,7 @@ void Executor<state_t>::apply_measure(CircuitExecutor::Branch &root,
|
|||
rvector_t probs = sample_measure_with_prob(root, qubits);
|
||||
|
||||
// save result to cregs
|
||||
for (int_t i = 0; i < probs.size(); i++) {
|
||||
for (uint_t i = 0; i < probs.size(); i++) {
|
||||
const reg_t outcome = Utils::int2reg(i, 2, qubits.size());
|
||||
root.branches()[i]->creg().store_measure(outcome, cmemory, cregister);
|
||||
}
|
||||
|
@ -1624,9 +1626,9 @@ void Executor<state_t>::apply_initialize(CircuitExecutor::Branch &root,
|
|||
auto apply_global_phase = [&tmp, params_in, global_phase](int_t i) {
|
||||
tmp[i] = params_in[i] * global_phase;
|
||||
};
|
||||
Utils::apply_omp_parallel_for((qubits.size() > Base::omp_qubit_threshold_),
|
||||
0, params_in.size(), apply_global_phase,
|
||||
Base::parallel_state_update_);
|
||||
Utils::apply_omp_parallel_for(
|
||||
(qubits.size() > (uint_t)Base::omp_qubit_threshold_), 0,
|
||||
params_in.size(), apply_global_phase, Base::parallel_state_update_);
|
||||
}
|
||||
const cvector_t ¶ms = tmp.empty() ? params_in : tmp;
|
||||
if (qubits.size() == Base::num_qubits_) {
|
||||
|
@ -1648,7 +1650,7 @@ void Executor<state_t>::apply_initialize(CircuitExecutor::Branch &root,
|
|||
op.name = "initialize";
|
||||
op.qubits = qubits;
|
||||
op.params = params;
|
||||
for (int_t i = 0; i < root.num_branches(); i++) {
|
||||
for (uint_t i = 0; i < root.num_branches(); i++) {
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
}
|
||||
return; // initialization will be done in next call because of shot
|
||||
|
@ -1672,10 +1674,8 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
|
|||
// So we only compute probabilities for the first N-1 kraus operators
|
||||
// and infer the probability of the last one from 1 - sum of the previous
|
||||
|
||||
double r;
|
||||
double accum = 0.;
|
||||
double p;
|
||||
bool complete = false;
|
||||
|
||||
reg_t shot_branch;
|
||||
uint_t nshots;
|
||||
|
@ -1685,7 +1685,7 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
|
|||
nshots = root.num_shots();
|
||||
shot_branch.resize(nshots);
|
||||
rshots.resize(nshots);
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
shot_branch[i] = kmats.size() - 1;
|
||||
rshots[i] = root.rng_shots()[i].rand(0., 1.);
|
||||
}
|
||||
|
@ -1701,7 +1701,7 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
|
|||
|
||||
// check if we need to apply this operator
|
||||
pmats[j] = p;
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
if (shot_branch[i] >= kmats.size() - 1) {
|
||||
if (accum > rshots[i]) {
|
||||
shot_branch[i] = j;
|
||||
|
@ -1710,23 +1710,21 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
|
|||
}
|
||||
}
|
||||
if (nshots_multiplied >= nshots) {
|
||||
complete = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// check if we haven't applied a kraus operator yet
|
||||
pmats[pmats.size() - 1] = 1. - accum;
|
||||
|
||||
root.creg() = Base::states_[root.state_index()].creg();
|
||||
root.branch_shots(shot_branch, kmats.size());
|
||||
for (int_t i = 0; i < kmats.size(); i++) {
|
||||
for (uint_t i = 0; i < kmats.size(); i++) {
|
||||
Operations::Op op;
|
||||
op.type = OpType::matrix;
|
||||
op.qubits = qubits;
|
||||
op.mats.push_back(kmats[i]);
|
||||
p = 1 / std::sqrt(pmats[i]);
|
||||
for (int_t j = 0; j < op.mats[0].size(); j++)
|
||||
for (uint_t j = 0; j < op.mats[0].size(); j++)
|
||||
op.mats[0][j] *= p;
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
}
|
||||
|
@ -1748,7 +1746,7 @@ void Executor<state_t>::apply_save_density_matrix(CircuitExecutor::Branch &root,
|
|||
}
|
||||
|
||||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -1771,7 +1769,7 @@ void Executor<state_t>::apply_save_probs(CircuitExecutor::Branch &root,
|
|||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
if (op.type == Operations::OpType::save_probs_ket) {
|
||||
// Convert to ket dict
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -1783,7 +1781,7 @@ void Executor<state_t>::apply_save_probs(CircuitExecutor::Branch &root,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -1810,7 +1808,7 @@ void Executor<state_t>::apply_save_statevector(CircuitExecutor::Branch &root,
|
|||
|
||||
if (last_op) {
|
||||
const auto v = Base::states_[root.state_index()].move_to_vector();
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
(result + ip)
|
||||
->save_data_pershot(Base::states_[root.state_index()].creg(), key, v,
|
||||
|
@ -1818,7 +1816,7 @@ void Executor<state_t>::apply_save_statevector(CircuitExecutor::Branch &root,
|
|||
}
|
||||
} else {
|
||||
const auto v = Base::states_[root.state_index()].copy_to_vector();
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
(result + ip)
|
||||
->save_data_pershot(Base::states_[root.state_index()].creg(), key, v,
|
||||
|
@ -1841,7 +1839,7 @@ void Executor<state_t>::apply_save_statevector_dict(
|
|||
for (auto const &it : state_ket) {
|
||||
result_state_ket[it.first] = it.second;
|
||||
}
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
(result + ip)
|
||||
->save_data_pershot(
|
||||
|
@ -1866,7 +1864,7 @@ void Executor<state_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
|
|||
amps[i] =
|
||||
Base::states_[root.state_index()].qreg().get_state(op.int_params[i]);
|
||||
}
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
(result + ip)
|
||||
->save_data_pershot(
|
||||
|
@ -1880,7 +1878,7 @@ void Executor<state_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
|
|||
op.int_params[i]);
|
||||
}
|
||||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -1898,7 +1896,7 @@ std::vector<reg_t>
|
|||
Executor<state_t>::sample_measure(state_t &state, const reg_t &qubits,
|
||||
uint_t shots,
|
||||
std::vector<RngEngine> &rng) const {
|
||||
int_t i, j;
|
||||
uint_t i;
|
||||
std::vector<double> rnds;
|
||||
rnds.reserve(shots);
|
||||
|
||||
|
|
|
@ -402,7 +402,6 @@ const stringmap_t<Gates> State<statevec_t>::gateset_(
|
|||
|
||||
template <class statevec_t>
|
||||
void State<statevec_t>::initialize_qreg(uint_t num_qubits) {
|
||||
int_t i;
|
||||
initialize_omp();
|
||||
|
||||
BaseState::qreg_.set_num_qubits(num_qubits);
|
||||
|
@ -426,8 +425,6 @@ void State<statevec_t>::initialize_statevector(uint_t num_qubits,
|
|||
|
||||
template <class statevec_t>
|
||||
void State<statevec_t>::initialize_omp() {
|
||||
uint_t i;
|
||||
|
||||
BaseState::qreg_.set_omp_threshold(omp_qubit_threshold_);
|
||||
if (BaseState::threads_ > 0) // set allowed OMP threads in qubitvector
|
||||
BaseState::qreg_.set_omp_threads(BaseState::threads_);
|
||||
|
@ -701,7 +698,7 @@ cmatrix_t State<statevec_t>::vec2density(const reg_t &qubits, const T &vec) {
|
|||
cmatrix_t densmat(DIM, DIM);
|
||||
if ((N == BaseState::qreg_.num_qubits()) && (qubits == qubits_sorted)) {
|
||||
const int_t mask = QV::MASKS[N];
|
||||
#pragma omp parallel for if (2 * N > omp_qubit_threshold_ && \
|
||||
#pragma omp parallel for if (2 * N > (size_t)omp_qubit_threshold_ && \
|
||||
BaseState::threads_ > 1) \
|
||||
num_threads(BaseState::threads_)
|
||||
for (int_t rowcol = 0; rowcol < int_t(DIM * DIM); ++rowcol) {
|
||||
|
@ -750,7 +747,7 @@ void State<statevec_t>::apply_gate(const Operations::Op &op) {
|
|||
}
|
||||
if (qubits_out.size() > 0) {
|
||||
uint_t mask = 0;
|
||||
for (int i = 0; i < qubits_out.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits_out.size(); i++) {
|
||||
mask |= (1ull << (qubits_out[i] - BaseState::qreg_.num_qubits()));
|
||||
}
|
||||
if ((BaseState::qreg_.chunk_index() & mask) == mask) {
|
||||
|
@ -1026,7 +1023,7 @@ template <class statevec_t>
|
|||
std::vector<reg_t> State<statevec_t>::sample_measure(const reg_t &qubits,
|
||||
uint_t shots,
|
||||
RngEngine &rng) {
|
||||
int_t i, j;
|
||||
uint_t i;
|
||||
// Generate flat register for storing
|
||||
std::vector<double> rnds;
|
||||
rnds.reserve(shots);
|
||||
|
@ -1066,9 +1063,9 @@ void State<statevec_t>::apply_initialize(const reg_t &qubits,
|
|||
auto apply_global_phase = [&tmp, ¶ms_in, this](int_t i) {
|
||||
tmp[i] = params_in[i] * BaseState::global_phase_;
|
||||
};
|
||||
Utils::apply_omp_parallel_for((qubits.size() > omp_qubit_threshold_), 0,
|
||||
params_in.size(), apply_global_phase,
|
||||
BaseState::threads_);
|
||||
Utils::apply_omp_parallel_for(
|
||||
(qubits.size() > (uint_t)omp_qubit_threshold_), 0, params_in.size(),
|
||||
apply_global_phase, BaseState::threads_);
|
||||
}
|
||||
const cvector_t ¶ms = tmp.empty() ? params_in : tmp;
|
||||
if (qubits.size() == BaseState::qreg_.num_qubits()) {
|
||||
|
|
|
@ -244,9 +244,9 @@ void Transformer<Container, data_t>::apply_diagonal_matrix(
|
|||
auto func = [&](const areg_t<2> &inds,
|
||||
const cvector_t<data_t> &_diag) -> void {
|
||||
for (int_t i = 0; i < 2; ++i) {
|
||||
const int_t k = inds[i];
|
||||
const uint_t k = inds[i];
|
||||
int_t iv = 0;
|
||||
for (int_t j = 0; j < N; j++)
|
||||
for (uint_t j = 0; j < N; j++)
|
||||
if ((k & (1ULL << qubits[j])) != 0)
|
||||
iv += (1ULL << j);
|
||||
if (_diag[iv] != (data_t)1.0)
|
||||
|
|
|
@ -177,7 +177,7 @@ void Tensor<data_t>::set_conj(const reg_t &qubits,
|
|||
std::vector<std::complex<data_t>> &mat) {
|
||||
set(qubits, mat);
|
||||
|
||||
for (int i = 0; i < tensor_.size(); i++)
|
||||
for (uint_t i = 0; i < tensor_.size(); i++)
|
||||
tensor_[i] = std::conj(tensor_[i]);
|
||||
sp_tensor_ = true;
|
||||
}
|
||||
|
|
|
@ -374,7 +374,7 @@ template <typename data_t>
|
|||
TensorNet<data_t>::TensorNet(const TensorNet &obj) {}
|
||||
template <typename data_t>
|
||||
TensorNet<data_t>::~TensorNet() {
|
||||
int i;
|
||||
uint_t i;
|
||||
for (i = 0; i < tensors_.size(); i++) {
|
||||
tensors_[i].reset();
|
||||
}
|
||||
|
@ -417,7 +417,7 @@ void TensorNet<data_t>::buffer_statevector(void) const {
|
|||
std::vector<int64_t> extents_out(num_qubits_);
|
||||
|
||||
// output tensor
|
||||
for (int_t i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
modes_out[i] = modes_qubits_[i];
|
||||
extents_out[i] = 2;
|
||||
}
|
||||
|
@ -464,9 +464,9 @@ TensorNet<data_t>::reduced_density_matrix(const reg_t &qubits) {
|
|||
uint_t nqubits = qubits.size();
|
||||
|
||||
// connect qubits not to be reduced
|
||||
for (int_t i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
bool check = false;
|
||||
for (int_t j = 0; j < qubits.size(); j++) {
|
||||
for (uint_t j = 0; j < qubits.size(); j++) {
|
||||
if (i == qubits[j]) {
|
||||
check = true;
|
||||
break;
|
||||
|
@ -491,7 +491,7 @@ TensorNet<data_t>::reduced_density_matrix(const reg_t &qubits) {
|
|||
std::vector<std::complex<data_t>> trace;
|
||||
|
||||
// output tensor
|
||||
for (int_t i = 0; i < nqubits; i++) {
|
||||
for (uint_t i = 0; i < nqubits; i++) {
|
||||
modes_out[i] = modes_qubits_[qubits[i]];
|
||||
modes_out[i + nqubits] = modes_qubits_sp_[qubits[i]];
|
||||
extents_out[i] = 2;
|
||||
|
@ -505,9 +505,9 @@ TensorNet<data_t>::reduced_density_matrix(const reg_t &qubits) {
|
|||
delete contractor;
|
||||
|
||||
// recover connectted qubits
|
||||
for (int_t i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
bool check = false;
|
||||
for (int_t j = 0; j < qubits.size(); j++) {
|
||||
for (uint_t j = 0; j < qubits.size(); j++) {
|
||||
if (i == qubits[j]) {
|
||||
check = true;
|
||||
break;
|
||||
|
@ -538,7 +538,7 @@ void TensorNet<data_t>::initialize_component(const reg_t &qubits,
|
|||
statevector_.clear(); // invalidate statevector buffer
|
||||
|
||||
cvector_t<data_t> state(state0.size());
|
||||
for (int_t i = 0; i < state0.size(); i++)
|
||||
for (uint_t i = 0; i < state0.size(); i++)
|
||||
state[i] = (std::complex<data_t>)state0[i];
|
||||
|
||||
tensors_.push_back(std::make_shared<Tensor<data_t>>());
|
||||
|
@ -547,7 +547,7 @@ void TensorNet<data_t>::initialize_component(const reg_t &qubits,
|
|||
tensors_.push_back(std::make_shared<Tensor<data_t>>());
|
||||
tensors_[last + 1]->set_conj(qubits, state);
|
||||
|
||||
for (int i = 0; i < qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits.size(); i++) {
|
||||
modes_qubits_[qubits[i]] = mode_index_;
|
||||
tensors_[last]->modes()[i] = mode_index_++;
|
||||
qubits_[qubits[i]] = tensors_[last];
|
||||
|
@ -584,7 +584,7 @@ void TensorNet<data_t>::add_tensor(const reg_t &qubits,
|
|||
tensors_.push_back(std::make_shared<Tensor<data_t>>());
|
||||
uint_t last = tensors_.size() - 1;
|
||||
tensors_[last]->set(qubits, mat);
|
||||
for (int i = 0; i < qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits.size(); i++) {
|
||||
tensors_[last]->modes()[i] = modes_qubits_[qubits[i]];
|
||||
modes_qubits_[qubits[i]] = mode_index_;
|
||||
tensors_[last]->modes()[qubits.size() + i] = mode_index_++;
|
||||
|
@ -594,7 +594,7 @@ void TensorNet<data_t>::add_tensor(const reg_t &qubits,
|
|||
tensors_.push_back(std::make_shared<Tensor<data_t>>());
|
||||
last++;
|
||||
tensors_[last]->set_conj(qubits, mat);
|
||||
for (int i = 0; i < qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits.size(); i++) {
|
||||
tensors_[last]->modes()[i] = modes_qubits_sp_[qubits[i]];
|
||||
modes_qubits_sp_[qubits[i]] = mode_index_;
|
||||
tensors_[last]->modes()[qubits.size() + i] = mode_index_++;
|
||||
|
@ -614,13 +614,13 @@ void TensorNet<data_t>::add_superop_tensor(
|
|||
uint_t last = tensors_.size() - 1;
|
||||
tensors_[last]->set(qubits, mat);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (uint_t i = 0; i < size; i++) {
|
||||
tensors_[last]->modes()[i] = modes_qubits_[qubits[i]];
|
||||
modes_qubits_[qubits[i]] = mode_index_;
|
||||
tensors_[last]->modes()[size * 2 + i] = mode_index_++;
|
||||
qubits_[qubits[i]] = tensors_[last];
|
||||
}
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (uint_t i = 0; i < size; i++) {
|
||||
tensors_[last]->modes()[size + i] = modes_qubits_sp_[qubits[i]];
|
||||
modes_qubits_sp_[qubits[i]] = mode_index_;
|
||||
tensors_[last]->modes()[size * 3 + i] = mode_index_++;
|
||||
|
@ -636,7 +636,7 @@ void TensorNet<data_t>::add_superop_tensor(
|
|||
|
||||
template <typename data_t>
|
||||
void TensorNet<data_t>::initialize() {
|
||||
int i;
|
||||
uint_t i;
|
||||
|
||||
if (statevector_.size() > 0)
|
||||
statevector_.clear(); // invalidate statevector buffer
|
||||
|
@ -658,7 +658,7 @@ void TensorNet<data_t>::initialize() {
|
|||
for (i = 0; i < num_qubits_; i++) {
|
||||
tensors_.push_back(std::make_shared<Tensor<data_t>>());
|
||||
uint_t last = tensors_.size() - 1;
|
||||
tensors_[last]->set({i}, init);
|
||||
tensors_[last]->set({(int)i}, init);
|
||||
|
||||
modes_qubits_[i] = mode_index_;
|
||||
tensors_[last]->modes()[0] = mode_index_++;
|
||||
|
@ -667,7 +667,7 @@ void TensorNet<data_t>::initialize() {
|
|||
for (i = 0; i < num_qubits_; i++) { // for super qubits
|
||||
tensors_.push_back(std::make_shared<Tensor<data_t>>());
|
||||
uint_t last = tensors_.size() - 1;
|
||||
tensors_[last]->set({i}, init);
|
||||
tensors_[last]->set({(int)i}, init);
|
||||
|
||||
modes_qubits_sp_[i] = mode_index_;
|
||||
tensors_[last]->modes()[0] = mode_index_++;
|
||||
|
@ -700,19 +700,19 @@ void TensorNet<data_t>::initialize(const TensorNet<data_t> &obj) {
|
|||
template <typename data_t>
|
||||
void TensorNet<data_t>::initialize_from_matrix(const cmatrix_t &matrix0) {
|
||||
cvector_t<data_t> matrix(matrix0.size());
|
||||
for (int_t i = 0; i < matrix0.size(); i++)
|
||||
for (uint_t i = 0; i < matrix0.size(); i++)
|
||||
matrix[i] = (std::complex<data_t>)matrix0[i];
|
||||
|
||||
tensors_.push_back(std::make_shared<Tensor<data_t>>());
|
||||
uint_t last = tensors_.size() - 1;
|
||||
tensors_[last]->set(num_qubits_, matrix);
|
||||
|
||||
for (int i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
modes_qubits_[i] = mode_index_++;
|
||||
tensors_[last]->modes()[i] = modes_qubits_[i];
|
||||
qubits_[i] = tensors_[last];
|
||||
}
|
||||
for (int i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
modes_qubits_sp_[i] = mode_index_++;
|
||||
tensors_[last]->modes()[i + num_qubits_] = modes_qubits_sp_[i];
|
||||
qubits_sp_[i] = tensors_[last];
|
||||
|
@ -772,7 +772,6 @@ void TensorNet<data_t>::apply_multiplexer(const reg_t &control_qubits,
|
|||
for (const auto &q : control_qubits) {
|
||||
qubits.push_back(q);
|
||||
}
|
||||
size_t N = qubits.size();
|
||||
|
||||
cvector_t<double> matMP(DIM * DIM, 0.0);
|
||||
uint_t b, i, j;
|
||||
|
@ -794,11 +793,10 @@ template <typename data_t>
|
|||
void TensorNet<data_t>::apply_diagonal_matrix(const reg_t &qubits,
|
||||
const cvector_t<double> &diag) {
|
||||
cvector_t<data_t> mat(diag.size() * diag.size(), 0.0);
|
||||
for (int_t i = 0; i < diag.size(); i++) {
|
||||
for (uint_t i = 0; i < diag.size(); i++) {
|
||||
mat[i * (diag.size() + 1)] = diag[i];
|
||||
}
|
||||
|
||||
Tensor<data_t> *t = new Tensor<data_t>;
|
||||
add_tensor(qubits, mat);
|
||||
}
|
||||
|
||||
|
@ -806,7 +804,7 @@ template <typename data_t>
|
|||
void TensorNet<data_t>::apply_diagonal_superop_matrix(
|
||||
const reg_t &qubits, const cvector_t<double> &diag) {
|
||||
cvector_t<data_t> mat(diag.size() * diag.size(), 0.0);
|
||||
for (int_t i = 0; i < diag.size(); i++) {
|
||||
for (uint_t i = 0; i < diag.size(); i++) {
|
||||
mat[i * (diag.size() + 1)] = diag[i];
|
||||
}
|
||||
add_superop_tensor(qubits, mat);
|
||||
|
@ -833,7 +831,7 @@ void TensorNet<data_t>::apply_mcx(const reg_t &qubits) {
|
|||
|
||||
reg_t qubits_t;
|
||||
qubits_t.push_back(qubits[qubits.size() - 1]);
|
||||
for (int i = 0; i < qubits.size() - 1; i++)
|
||||
for (uint_t i = 0; i < qubits.size() - 1; i++)
|
||||
qubits_t.push_back(qubits[i]);
|
||||
|
||||
add_tensor(qubits_t, mat);
|
||||
|
@ -850,7 +848,7 @@ void TensorNet<data_t>::apply_mcy(const reg_t &qubits) {
|
|||
|
||||
reg_t qubits_t;
|
||||
qubits_t.push_back(qubits[qubits.size() - 1]);
|
||||
for (int i = 0; i < qubits.size() - 1; i++)
|
||||
for (uint_t i = 0; i < qubits.size() - 1; i++)
|
||||
qubits_t.push_back(qubits[i]);
|
||||
|
||||
add_tensor(qubits_t, mat);
|
||||
|
@ -869,7 +867,7 @@ void TensorNet<data_t>::apply_mcswap(const reg_t &qubits) {
|
|||
reg_t qubits_t;
|
||||
qubits_t.push_back(qubits[qubits.size() - 2]);
|
||||
qubits_t.push_back(qubits[qubits.size() - 1]);
|
||||
for (int i = 0; i < qubits.size() - 2; i++)
|
||||
for (uint_t i = 0; i < qubits.size() - 2; i++)
|
||||
qubits_t.push_back(qubits[i]);
|
||||
|
||||
add_tensor(qubits_t, mat);
|
||||
|
@ -886,7 +884,7 @@ void TensorNet<data_t>::apply_mcphase(const reg_t &qubits,
|
|||
|
||||
reg_t qubits_t;
|
||||
qubits_t.push_back(qubits[qubits.size() - 1]);
|
||||
for (int i = 0; i < qubits.size() - 1; i++)
|
||||
for (uint_t i = 0; i < qubits.size() - 1; i++)
|
||||
qubits_t.push_back(qubits[i]);
|
||||
|
||||
add_tensor(qubits_t, mat);
|
||||
|
@ -907,7 +905,7 @@ void TensorNet<data_t>::apply_mcu(const reg_t &qubits,
|
|||
|
||||
reg_t qubits_t;
|
||||
qubits_t.push_back(qubits[qubits.size() - 1]);
|
||||
for (int i = 0; i < qubits.size() - 1; i++)
|
||||
for (uint_t i = 0; i < qubits.size() - 1; i++)
|
||||
qubits_t.push_back(qubits[i]);
|
||||
|
||||
add_tensor(qubits_t, matR);
|
||||
|
@ -951,7 +949,7 @@ void TensorNet<data_t>::apply_rotation(const reg_t &qubits, const Rotation r,
|
|||
template <typename data_t>
|
||||
double TensorNet<data_t>::norm() const {
|
||||
// connect qubits not used for trace
|
||||
for (int_t i = 1; i < num_qubits_; i++) {
|
||||
for (uint_t i = 1; i < num_qubits_; i++) {
|
||||
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
|
||||
if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) {
|
||||
qubits_sp_[i]->modes()[j] = modes_qubits_[i];
|
||||
|
@ -980,7 +978,7 @@ double TensorNet<data_t>::norm() const {
|
|||
delete contractor;
|
||||
|
||||
// restore connected qubits
|
||||
for (int_t i = 1; i < num_qubits_; i++) {
|
||||
for (uint_t i = 1; i < num_qubits_; i++) {
|
||||
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
|
||||
if (qubits_sp_[i]->modes()[j] == modes_qubits_[i]) {
|
||||
qubits_sp_[i]->modes()[j] = modes_qubits_sp_[i];
|
||||
|
@ -1002,26 +1000,26 @@ double TensorNet<data_t>::norm(const reg_t &qubits,
|
|||
|
||||
// additional matrix
|
||||
std::vector<std::complex<data_t>> mat_t(mat.size());
|
||||
for (int_t i = 0; i < mat.size(); i++)
|
||||
for (uint_t i = 0; i < mat.size(); i++)
|
||||
mat_t[i] = mat[i];
|
||||
|
||||
mat_tensors[0] = std::make_shared<Tensor<data_t>>();
|
||||
mat_tensors[0]->set(qubits, mat_t);
|
||||
for (int i = 0; i < qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits.size(); i++) {
|
||||
mat_tensors[0]->modes()[i] = tmp_modes[qubits[i]];
|
||||
tmp_modes[qubits[i]] = tmp_index;
|
||||
mat_tensors[0]->modes()[qubits.size() + i] = tmp_index++;
|
||||
}
|
||||
mat_tensors[1] = std::make_shared<Tensor<data_t>>();
|
||||
mat_tensors[1]->set_conj(qubits, mat_t);
|
||||
for (int i = 0; i < qubits.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits.size(); i++) {
|
||||
mat_tensors[1]->modes()[i] = tmp_modes_sp[qubits[i]];
|
||||
tmp_modes_sp[qubits[i]] = tmp_index;
|
||||
mat_tensors[1]->modes()[qubits.size() + i] = tmp_index++;
|
||||
}
|
||||
|
||||
// connect qubits not used for trace
|
||||
for (int_t i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
if (i != qubits[0]) {
|
||||
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
|
||||
if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) {
|
||||
|
@ -1054,7 +1052,7 @@ double TensorNet<data_t>::norm(const reg_t &qubits,
|
|||
delete contractor;
|
||||
|
||||
// restore connected qubits
|
||||
for (int_t i = 1; i < num_qubits_; i++) {
|
||||
for (uint_t i = 1; i < num_qubits_; i++) {
|
||||
if (i != qubits[0]) {
|
||||
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
|
||||
if (qubits_sp_[i]->modes()[j] == tmp_modes[i]) {
|
||||
|
@ -1085,7 +1083,7 @@ double TensorNet<data_t>::probability(const uint_t outcome) const {
|
|||
template <typename data_t>
|
||||
std::vector<double> TensorNet<data_t>::probabilities() const {
|
||||
reg_t qubits(num_qubits_);
|
||||
for (int_t i = 0; i < num_qubits_; i++)
|
||||
for (uint_t i = 0; i < num_qubits_; i++)
|
||||
qubits[i] = i;
|
||||
return probabilities(qubits);
|
||||
}
|
||||
|
@ -1099,9 +1097,9 @@ TensorNet<data_t>::probabilities(const reg_t &qubits) const {
|
|||
std::vector<int64_t> extents_out(nqubits * 2);
|
||||
std::vector<std::complex<data_t>> trace;
|
||||
// connect qubits not to be measured
|
||||
for (int_t i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
bool check = false;
|
||||
for (int_t j = 0; j < qubits.size(); j++) {
|
||||
for (uint_t j = 0; j < qubits.size(); j++) {
|
||||
if (i == qubits[j]) {
|
||||
check = true;
|
||||
break;
|
||||
|
@ -1122,7 +1120,7 @@ TensorNet<data_t>::probabilities(const reg_t &qubits) const {
|
|||
contractor->set_network(tensors_);
|
||||
|
||||
// output tensor
|
||||
for (int_t i = 0; i < nqubits; i++) {
|
||||
for (uint_t i = 0; i < nqubits; i++) {
|
||||
modes_out[i] = modes_qubits_[qubits[i]];
|
||||
modes_out[i + nqubits] = modes_qubits_sp_[qubits[i]];
|
||||
extents_out[i] = 2;
|
||||
|
@ -1147,9 +1145,9 @@ TensorNet<data_t>::probabilities(const reg_t &qubits) const {
|
|||
delete contractor;
|
||||
|
||||
// recover connected qubits
|
||||
for (int_t i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
bool check = false;
|
||||
for (int_t j = 0; j < qubits.size(); j++) {
|
||||
for (uint_t j = 0; j < qubits.size(); j++) {
|
||||
if (i == qubits[j]) {
|
||||
check = true;
|
||||
break;
|
||||
|
@ -1201,7 +1199,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
|
|||
const reg_t &input_shot_index,
|
||||
const reg_t &input_measured_probs,
|
||||
const uint_t pos_measured) const {
|
||||
const int_t SHOTS = rnds.size();
|
||||
const uint_t SHOTS = rnds.size();
|
||||
|
||||
/*---------------------------------------------------------------------------
|
||||
| cccccccccccc | oooooooooooooo | ************** | xxxxxxxxxxxxxx |
|
||||
|
@ -1233,7 +1231,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
|
|||
// output tensor
|
||||
std::vector<int32_t> modes_out(nqubits * 2);
|
||||
std::vector<int64_t> extents_out(nqubits * 2);
|
||||
for (int_t i = 0; i < nqubits; i++) {
|
||||
for (uint_t i = 0; i < nqubits; i++) {
|
||||
modes_out[i] = modes_qubits_[pos_measured - nqubits + i];
|
||||
modes_out[i + nqubits] = modes_qubits_sp_[pos_measured - nqubits + i];
|
||||
extents_out[i] = 2;
|
||||
|
@ -1245,7 +1243,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
|
|||
|
||||
// connect qubits not to be measured
|
||||
if (pos_measured - nqubits > 0) {
|
||||
for (int_t i = 0; i < pos_measured - nqubits; i++) {
|
||||
for (uint_t i = 0; i < pos_measured - nqubits; i++) {
|
||||
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
|
||||
if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) {
|
||||
qubits_sp_[i]->modes()[j] = modes_qubits_[i];
|
||||
|
@ -1266,7 +1264,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
|
|||
shots[0] = rnds;
|
||||
shot_index[0] = input_shot_index;
|
||||
} else {
|
||||
for (int_t i = 0; i < SHOTS; i++) {
|
||||
for (uint_t i = 0; i < SHOTS; i++) {
|
||||
shots[input_sample_index[i]].push_back(rnds[i]);
|
||||
shot_index[input_sample_index[i]].push_back(input_shot_index[i]);
|
||||
}
|
||||
|
@ -1276,7 +1274,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
|
|||
std::vector<std::shared_ptr<Tensor<data_t>>> measured_tensors;
|
||||
if (measured_qubits > 0) {
|
||||
measured_tensors.resize(measured_qubits * 2);
|
||||
for (int_t i = 0; i < measured_qubits; i++) {
|
||||
for (uint_t i = 0; i < measured_qubits; i++) {
|
||||
std::vector<std::complex<data_t>> prob(2, 0.0);
|
||||
prob[input_measured_probs[pos_measured + i]] = 1.0;
|
||||
measured_tensors[i * 2] = std::make_shared<Tensor<data_t>>();
|
||||
|
@ -1293,11 +1291,11 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
|
|||
|
||||
// 1st loop, sampling each branch before traversing branches to reuse tensor
|
||||
// network
|
||||
for (int_t ib = 0; ib < num_branches; ib++) {
|
||||
for (uint_t ib = 0; ib < num_branches; ib++) {
|
||||
if (shots[ib].size() > 0) {
|
||||
if (nqubits_branch > 0) {
|
||||
// tensors for measuredirmed probabilities
|
||||
for (int_t i = 0; i < nqubits_branch; i++) {
|
||||
for (uint_t i = 0; i < nqubits_branch; i++) {
|
||||
std::vector<std::complex<data_t>> prob(2, 0.0);
|
||||
if (((ib >> i) & 1) == 0)
|
||||
prob[0] = 1.0;
|
||||
|
@ -1317,7 +1315,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
|
|||
|
||||
// recover connected qubits
|
||||
if (pos_measured - nqubits > 0) {
|
||||
for (int_t i = 0; i < pos_measured - nqubits; i++) {
|
||||
for (uint_t i = 0; i < pos_measured - nqubits; i++) {
|
||||
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
|
||||
if (qubits_sp_[i]->modes()[j] == modes_qubits_[i]) {
|
||||
qubits_sp_[i]->modes()[j] = modes_qubits_sp_[i];
|
||||
|
@ -1326,16 +1324,16 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
|
|||
}
|
||||
}
|
||||
}
|
||||
for (int_t i = 0; i < measured_tensors.size(); i++)
|
||||
for (uint_t i = 0; i < measured_tensors.size(); i++)
|
||||
measured_tensors[i].reset();
|
||||
delete contractor;
|
||||
|
||||
// 2nd loop traverse branches
|
||||
if (pos_measured - nqubits > 0) {
|
||||
for (int_t ib = 0; ib < num_branches; ib++) {
|
||||
for (uint_t ib = 0; ib < num_branches; ib++) {
|
||||
if (shots[ib].size() > 0) {
|
||||
reg_t measured_probs = input_measured_probs;
|
||||
for (int_t i = 0; i < nqubits_branch; i++)
|
||||
for (uint_t i = 0; i < nqubits_branch; i++)
|
||||
measured_probs[pos_measured + i] = ((ib >> i) & 1);
|
||||
|
||||
sample_measure_branch(samples, shots[ib], sample_index[ib],
|
||||
|
@ -1345,15 +1343,15 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
|
|||
}
|
||||
} else {
|
||||
// save samples
|
||||
for (int_t ib = 0; ib < num_branches; ib++) {
|
||||
for (uint_t ib = 0; ib < num_branches; ib++) {
|
||||
if (shots[ib].size() > 0) {
|
||||
reg_t sample = input_measured_probs;
|
||||
for (int_t i = 0; i < nqubits_branch; i++)
|
||||
for (uint_t i = 0; i < nqubits_branch; i++)
|
||||
sample[pos_measured + i] = ((ib >> i) & 1);
|
||||
for (int_t i = 0; i < shots[ib].size(); i++) {
|
||||
for (uint_t i = 0; i < shots[ib].size(); i++) {
|
||||
uint_t shot_id = shot_index[ib][i];
|
||||
samples[shot_id] = sample;
|
||||
for (int_t j = 0; j < nqubits; j++) {
|
||||
for (uint_t j = 0; j < nqubits; j++) {
|
||||
samples[shot_id][j] = ((sample_index[ib][i] >> j) & 1);
|
||||
}
|
||||
}
|
||||
|
@ -1385,7 +1383,7 @@ double TensorNet<data_t>::expval_pauli(const reg_t &qubits,
|
|||
mat_phase[3] = initial_phase;
|
||||
|
||||
// add Pauli ops to qubits
|
||||
for (int_t i = 0; i < size; i++) {
|
||||
for (uint_t i = 0; i < size; i++) {
|
||||
cvector_t<data_t> mat(4, 0.0);
|
||||
|
||||
switch (pauli[size - 1 - i]) {
|
||||
|
@ -1421,7 +1419,7 @@ double TensorNet<data_t>::expval_pauli(const reg_t &qubits,
|
|||
}
|
||||
|
||||
// connect qubits not used for trace
|
||||
for (int_t i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
if (i != qubits[0]) {
|
||||
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
|
||||
if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) {
|
||||
|
@ -1454,7 +1452,7 @@ double TensorNet<data_t>::expval_pauli(const reg_t &qubits,
|
|||
delete contractor;
|
||||
|
||||
// restore connected qubits
|
||||
for (int_t i = 0; i < num_qubits_; i++) {
|
||||
for (uint_t i = 0; i < num_qubits_; i++) {
|
||||
if (i != qubits[0]) {
|
||||
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
|
||||
if (qubits_sp_[i]->modes()[j] == tmp_modes[i]) {
|
||||
|
@ -1465,7 +1463,7 @@ double TensorNet<data_t>::expval_pauli(const reg_t &qubits,
|
|||
}
|
||||
}
|
||||
|
||||
for (int_t i = 0; i < pauli_tensors.size(); i++) {
|
||||
for (uint_t i = 0; i < pauli_tensors.size(); i++) {
|
||||
pauli_tensors[i].reset();
|
||||
}
|
||||
|
||||
|
|
|
@ -84,6 +84,7 @@ protected:
|
|||
uint_t tensor_size_;
|
||||
uint_t additional_tensor_size_;
|
||||
uint_t out_size_;
|
||||
uint_t work_size_limit_;
|
||||
uint_t work_size_;
|
||||
uint_t sampling_buffer_size_;
|
||||
|
||||
|
@ -484,6 +485,12 @@ uint_t RawTensorData<data_t>::optimize_contraction(void) {
|
|||
cutensornetStatus_t err;
|
||||
cudaSetDevice(device_id_);
|
||||
|
||||
size_t freeMem, totalMem;
|
||||
int nid = omp_get_num_threads();
|
||||
|
||||
HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem));
|
||||
work_size_limit_ = (freeMem / nid) * 0.9;
|
||||
|
||||
/*******************************
|
||||
* Find "optimal" contraction order and slicing
|
||||
*******************************/
|
||||
|
@ -510,7 +517,7 @@ uint_t RawTensorData<data_t>::optimize_contraction(void) {
|
|||
cutensornetGetErrorString(err));
|
||||
|
||||
err = cutensornetContractionOptimize(hTensorNet_, tn_desc_, optimizer_config_,
|
||||
work_size_, optimizer_info_);
|
||||
work_size_limit_, optimizer_info_);
|
||||
if (err != CUTENSORNET_STATUS_SUCCESS)
|
||||
assert_error("cutensornetContractionOptimize",
|
||||
cutensornetGetErrorString(err));
|
||||
|
@ -540,27 +547,26 @@ void RawTensorData<data_t>::create_contraction_plan(bool use_autotune) {
|
|||
assert_error("cutensornetCreateWorkspaceDescriptor",
|
||||
cutensornetGetErrorString(err));
|
||||
|
||||
uint64_t requiredWorkspaceSize = 0;
|
||||
err = cutensornetWorkspaceComputeSizes(hTensorNet_, tn_desc_, optimizer_info_,
|
||||
work_desc_);
|
||||
int64_t requiredWorkspaceSize = 0;
|
||||
err = cutensornetWorkspaceComputeContractionSizes(
|
||||
hTensorNet_, tn_desc_, optimizer_info_, work_desc_);
|
||||
if (err != CUTENSORNET_STATUS_SUCCESS)
|
||||
assert_error("cutensornetWorkspaceComputeSizes",
|
||||
cutensornetGetErrorString(err));
|
||||
|
||||
err = cutensornetWorkspaceGetSize(
|
||||
err = cutensornetWorkspaceGetMemorySize(
|
||||
hTensorNet_, work_desc_, CUTENSORNET_WORKSIZE_PREF_MIN,
|
||||
CUTENSORNET_MEMSPACE_DEVICE, &requiredWorkspaceSize);
|
||||
CUTENSORNET_MEMSPACE_DEVICE, CUTENSORNET_WORKSPACE_SCRATCH,
|
||||
&requiredWorkspaceSize);
|
||||
if (err != CUTENSORNET_STATUS_SUCCESS)
|
||||
assert_error("cutensornetWorkspaceGetSize", cutensornetGetErrorString(err));
|
||||
|
||||
if (work_size_ < requiredWorkspaceSize) {
|
||||
throw std::runtime_error("ERROR : TensorNet::contractor required memory "
|
||||
"size for workspace is not enough");
|
||||
}
|
||||
allocate_work(requiredWorkspaceSize);
|
||||
|
||||
err = cutensornetWorkspaceSet(
|
||||
err = cutensornetWorkspaceSetMemory(
|
||||
hTensorNet_, work_desc_, CUTENSORNET_MEMSPACE_DEVICE,
|
||||
thrust::raw_pointer_cast(dev_work_.data()), work_size_);
|
||||
CUTENSORNET_WORKSPACE_SCRATCH, thrust::raw_pointer_cast(dev_work_.data()),
|
||||
work_size_);
|
||||
if (err != CUTENSORNET_STATUS_SUCCESS)
|
||||
assert_error("cutensornetWorkspaceSet", cutensornetGetErrorString(err));
|
||||
|
||||
|
@ -967,8 +973,6 @@ void TensorNetContractor_cuTensorNet<data_t>::allocate_additional_tensors(
|
|||
template <typename data_t>
|
||||
void TensorNetContractor_cuTensorNet<data_t>::set_additional_tensors(
|
||||
const std::vector<std::shared_ptr<Tensor<data_t>>> &tensors) {
|
||||
uint_t size = 0;
|
||||
|
||||
remove_additional_tensors();
|
||||
|
||||
num_additional_tensors_ = tensors.size();
|
||||
|
@ -1021,10 +1025,6 @@ void TensorNetContractor_cuTensorNet<data_t>::set_output(
|
|||
template <typename data_t>
|
||||
void TensorNetContractor_cuTensorNet<data_t>::setup_contraction(
|
||||
bool use_autotune) {
|
||||
int nid = omp_get_num_threads();
|
||||
cutensornetStatus_t err;
|
||||
size_t freeMem, totalMem;
|
||||
uint_t work_size;
|
||||
|
||||
// for MPI distribution
|
||||
#ifdef AER_MPI
|
||||
|
@ -1032,14 +1032,6 @@ void TensorNetContractor_cuTensorNet<data_t>::setup_contraction(
|
|||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank_);
|
||||
#endif
|
||||
|
||||
// allocate work buffer on GPU
|
||||
if (!tensor_data_[0].work_allocated()) {
|
||||
cudaSetDevice(target_gpus_[0]);
|
||||
HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem));
|
||||
work_size = (freeMem / nid) * 0.9;
|
||||
tensor_data_[0].allocate_work(work_size);
|
||||
}
|
||||
|
||||
num_devices_used_ = 1;
|
||||
|
||||
// setup first device
|
||||
|
@ -1060,12 +1052,6 @@ void TensorNetContractor_cuTensorNet<data_t>::setup_contraction(
|
|||
|
||||
if (ns > 0) {
|
||||
// setup for the device
|
||||
if (!tensor_data_[i].work_allocated()) {
|
||||
cudaSetDevice(target_gpus_[i]);
|
||||
HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem));
|
||||
work_size = (freeMem / nid) * 0.9;
|
||||
tensor_data_[i].allocate_work(work_size);
|
||||
}
|
||||
tensor_data_[i].copy_tensors_from_device(
|
||||
tensor_data_[0]); // copy data from the first device
|
||||
tensor_data_[i].create_contraction_descriptor(
|
||||
|
|
|
@ -37,6 +37,7 @@ using ResultItr = std::vector<ExperimentResult>::iterator;
|
|||
template <class state_t>
|
||||
class Executor : public CircuitExecutor::MultiStateExecutor<state_t> {
|
||||
using Base = CircuitExecutor::MultiStateExecutor<state_t>;
|
||||
using Base::sample_measure;
|
||||
|
||||
protected:
|
||||
public:
|
||||
|
@ -148,7 +149,7 @@ Executor<state_t>::sample_measure_with_prob(CircuitExecutor::Branch &root,
|
|||
uint_t nshots = root.num_shots();
|
||||
reg_t shot_branch(nshots);
|
||||
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
shot_branch[i] = root.rng_shots()[i].rand_int(probs);
|
||||
}
|
||||
|
||||
|
@ -182,11 +183,11 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
|
|||
root.branches()[i]->add_op_after_branch(op);
|
||||
|
||||
if (final_state >= 0 && final_state != i) {
|
||||
Operations::Op op;
|
||||
op.type = OpType::gate;
|
||||
op.name = "mcx";
|
||||
op.qubits = qubits;
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
Operations::Op op2;
|
||||
op2.type = OpType::gate;
|
||||
op2.name = "mcx";
|
||||
op2.qubits = qubits;
|
||||
root.branches()[i]->add_op_after_branch(op2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -194,7 +195,7 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
|
|||
else {
|
||||
// Diagonal matrix for projecting and renormalizing to measurement outcome
|
||||
const size_t dim = 1ULL << qubits.size();
|
||||
for (int_t i = 0; i < dim; i++) {
|
||||
for (uint_t i = 0; i < dim; i++) {
|
||||
cvector_t<double> mdiag(dim, 0.);
|
||||
mdiag[i] = 1. / std::sqrt(meas_probs[i]);
|
||||
|
||||
|
@ -204,20 +205,20 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
|
|||
op.params = mdiag;
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
|
||||
if (final_state >= 0 && final_state != i) {
|
||||
if (final_state >= 0 && final_state != (int_t)i) {
|
||||
// build vectorized permutation matrix
|
||||
cvector_t<double> perm(dim * dim, 0.);
|
||||
perm[final_state * dim + i] = 1.;
|
||||
perm[i * dim + final_state] = 1.;
|
||||
for (size_t j = 0; j < dim; j++) {
|
||||
if (j != final_state && j != i)
|
||||
if (j != (size_t)final_state && j != i)
|
||||
perm[j * dim + j] = 1.;
|
||||
}
|
||||
Operations::Op op;
|
||||
op.type = OpType::matrix;
|
||||
op.qubits = qubits;
|
||||
op.mats.push_back(Utils::devectorize_matrix(perm));
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
Operations::Op op2;
|
||||
op2.type = OpType::matrix;
|
||||
op2.qubits = qubits;
|
||||
op2.mats.push_back(Utils::devectorize_matrix(perm));
|
||||
root.branches()[i]->add_op_after_branch(op2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -230,7 +231,7 @@ void Executor<state_t>::apply_measure(CircuitExecutor::Branch &root,
|
|||
rvector_t probs = sample_measure_with_prob(root, qubits);
|
||||
|
||||
// save result to cregs
|
||||
for (int_t i = 0; i < probs.size(); i++) {
|
||||
for (uint_t i = 0; i < probs.size(); i++) {
|
||||
const reg_t outcome = Utils::int2reg(i, 2, qubits.size());
|
||||
root.branches()[i]->creg().store_measure(outcome, cmemory, cregister);
|
||||
}
|
||||
|
@ -259,9 +260,9 @@ void Executor<state_t>::apply_initialize(CircuitExecutor::Branch &root,
|
|||
auto apply_global_phase = [&tmp, params_in, global_phase](int_t i) {
|
||||
tmp[i] = params_in[i] * global_phase;
|
||||
};
|
||||
Utils::apply_omp_parallel_for((qubits.size() > Base::omp_qubit_threshold_),
|
||||
0, params_in.size(), apply_global_phase,
|
||||
Base::parallel_state_update_);
|
||||
Utils::apply_omp_parallel_for(
|
||||
(qubits.size() > (uint_t)Base::omp_qubit_threshold_), 0,
|
||||
params_in.size(), apply_global_phase, Base::parallel_state_update_);
|
||||
}
|
||||
const cvector_t<double> ¶ms = tmp.empty() ? params_in : tmp;
|
||||
if (qubits.size() == Base::num_qubits_) {
|
||||
|
@ -283,7 +284,7 @@ void Executor<state_t>::apply_initialize(CircuitExecutor::Branch &root,
|
|||
op.name = "initialize";
|
||||
op.qubits = qubits;
|
||||
op.params = params;
|
||||
for (int_t i = 0; i < root.num_branches(); i++) {
|
||||
for (uint_t i = 0; i < root.num_branches(); i++) {
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
}
|
||||
return; // initialization will be done in next call because of shot
|
||||
|
@ -307,10 +308,8 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
|
|||
// So we only compute probabilities for the first N-1 kraus operators
|
||||
// and infer the probability of the last one from 1 - sum of the previous
|
||||
|
||||
double r;
|
||||
double accum = 0.;
|
||||
double p;
|
||||
bool complete = false;
|
||||
|
||||
reg_t shot_branch;
|
||||
uint_t nshots;
|
||||
|
@ -320,7 +319,7 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
|
|||
nshots = root.num_shots();
|
||||
shot_branch.resize(nshots);
|
||||
rshots.resize(nshots);
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
shot_branch[i] = kmats.size() - 1;
|
||||
rshots[i] = root.rng_shots()[i].rand(0., 1.);
|
||||
}
|
||||
|
@ -336,7 +335,7 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
|
|||
|
||||
// check if we need to apply this operator
|
||||
pmats[j] = p;
|
||||
for (int_t i = 0; i < nshots; i++) {
|
||||
for (uint_t i = 0; i < nshots; i++) {
|
||||
if (shot_branch[i] >= kmats.size() - 1) {
|
||||
if (accum > rshots[i]) {
|
||||
shot_branch[i] = j;
|
||||
|
@ -345,7 +344,6 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
|
|||
}
|
||||
}
|
||||
if (nshots_multiplied >= nshots) {
|
||||
complete = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -355,13 +353,13 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
|
|||
|
||||
root.creg() = Base::states_[root.state_index()].creg();
|
||||
root.branch_shots(shot_branch, kmats.size());
|
||||
for (int_t i = 0; i < kmats.size(); i++) {
|
||||
for (uint_t i = 0; i < kmats.size(); i++) {
|
||||
Operations::Op op;
|
||||
op.type = OpType::matrix;
|
||||
op.qubits = qubits;
|
||||
op.mats.push_back(kmats[i]);
|
||||
p = 1 / std::sqrt(pmats[i]);
|
||||
for (int_t j = 0; j < op.mats[0].size(); j++)
|
||||
for (uint_t j = 0; j < op.mats[0].size(); j++)
|
||||
op.mats[0][j] *= p;
|
||||
root.branches()[i]->add_op_after_branch(op);
|
||||
}
|
||||
|
@ -385,7 +383,7 @@ void Executor<state_t>::apply_save_density_matrix(CircuitExecutor::Branch &root,
|
|||
}
|
||||
|
||||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -408,7 +406,7 @@ void Executor<state_t>::apply_save_probs(CircuitExecutor::Branch &root,
|
|||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
if (op.type == Operations::OpType::save_probs_ket) {
|
||||
// Convert to ket dict
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -420,7 +418,7 @@ void Executor<state_t>::apply_save_probs(CircuitExecutor::Branch &root,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -447,7 +445,7 @@ void Executor<state_t>::apply_save_statevector(CircuitExecutor::Branch &root,
|
|||
|
||||
if (last_op) {
|
||||
const auto v = Base::states_[root.state_index()].move_to_vector();
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
(result + ip)
|
||||
->save_data_pershot(Base::states_[root.state_index()].creg(), key, v,
|
||||
|
@ -455,7 +453,7 @@ void Executor<state_t>::apply_save_statevector(CircuitExecutor::Branch &root,
|
|||
}
|
||||
} else {
|
||||
const auto v = Base::states_[root.state_index()].copy_to_vector();
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
(result + ip)
|
||||
->save_data_pershot(Base::states_[root.state_index()].creg(), key, v,
|
||||
|
@ -478,7 +476,7 @@ void Executor<state_t>::apply_save_statevector_dict(
|
|||
for (auto const &it : state_ket) {
|
||||
result_state_ket[it.first] = it.second;
|
||||
}
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
(result + ip)
|
||||
->save_data_pershot(
|
||||
|
@ -496,14 +494,14 @@ void Executor<state_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
|
|||
throw std::invalid_argument(
|
||||
"Invalid save_amplitudes instructions (empty params).");
|
||||
}
|
||||
const int_t size = op.int_params.size();
|
||||
const uint_t size = op.int_params.size();
|
||||
if (op.type == Operations::OpType::save_amps) {
|
||||
Vector<complex_t> amps(size, false);
|
||||
for (int_t i = 0; i < size; ++i) {
|
||||
for (uint_t i = 0; i < size; ++i) {
|
||||
amps[i] =
|
||||
Base::states_[root.state_index()].qreg().get_state(op.int_params[i]);
|
||||
}
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
(result + ip)
|
||||
->save_data_pershot(
|
||||
|
@ -512,12 +510,12 @@ void Executor<state_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
|
|||
}
|
||||
} else {
|
||||
rvector_t amps_sq(size, 0);
|
||||
for (int_t i = 0; i < size; ++i) {
|
||||
for (uint_t i = 0; i < size; ++i) {
|
||||
amps_sq[i] = Base::states_[root.state_index()].qreg().probability(
|
||||
op.int_params[i]);
|
||||
}
|
||||
std::vector<bool> copied(Base::num_bind_params_, false);
|
||||
for (int_t i = 0; i < root.num_shots(); i++) {
|
||||
for (uint_t i = 0; i < root.num_shots(); i++) {
|
||||
uint_t ip = root.param_index(i);
|
||||
if (!copied[ip]) {
|
||||
(result + ip)
|
||||
|
@ -539,23 +537,23 @@ Executor<state_t>::sample_measure(state_t &state, const reg_t &qubits,
|
|||
std::vector<double> rnds;
|
||||
rnds.reserve(shots);
|
||||
|
||||
for (i = 0; i < shots; ++i)
|
||||
for (i = 0; i < (int_t)shots; ++i)
|
||||
rnds.push_back(rng[i].rand(0, 1));
|
||||
|
||||
std::vector<reg_t> samples = state.qreg().sample_measure(rnds);
|
||||
std::vector<reg_t> ret(shots);
|
||||
|
||||
if (omp_get_num_threads() > 1) {
|
||||
for (i = 0; i < shots; ++i) {
|
||||
for (i = 0; i < (int_t)shots; ++i) {
|
||||
ret[i].resize(qubits.size());
|
||||
for (j = 0; j < qubits.size(); j++)
|
||||
for (j = 0; j < (int_t)qubits.size(); j++)
|
||||
ret[i][j] = samples[i][qubits[j]];
|
||||
}
|
||||
} else {
|
||||
#pragma omp parallel for private(j)
|
||||
for (i = 0; i < shots; ++i) {
|
||||
for (i = 0; i < (int_t)shots; ++i) {
|
||||
ret[i].resize(qubits.size());
|
||||
for (j = 0; j < qubits.size(); j++)
|
||||
for (j = 0; j < (int_t)qubits.size(); j++)
|
||||
ret[i][j] = samples[i][qubits[j]];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -899,27 +899,26 @@ template <class tensor_net_t>
|
|||
std::vector<reg_t> State<tensor_net_t>::sample_measure(const reg_t &qubits,
|
||||
uint_t shots,
|
||||
RngEngine &rng) {
|
||||
int_t i, j;
|
||||
// Generate flat register for storing
|
||||
std::vector<double> rnds(shots);
|
||||
|
||||
for (i = 0; i < shots; ++i)
|
||||
for (uint_t i = 0; i < shots; ++i)
|
||||
rnds[i] = rng.rand(0, 1);
|
||||
|
||||
std::vector<reg_t> samples = BaseState::qreg_.sample_measure(rnds);
|
||||
std::vector<reg_t> ret(shots);
|
||||
|
||||
if (omp_get_num_threads() > 1) {
|
||||
for (i = 0; i < shots; ++i) {
|
||||
for (uint_t i = 0; i < shots; ++i) {
|
||||
ret[i].resize(qubits.size());
|
||||
for (j = 0; j < qubits.size(); j++)
|
||||
for (uint_t j = 0; j < qubits.size(); j++)
|
||||
ret[i][j] = samples[i][qubits[j]];
|
||||
}
|
||||
} else {
|
||||
#pragma omp parallel for private(j)
|
||||
for (i = 0; i < shots; ++i) {
|
||||
#pragma omp parallel for
|
||||
for (int_t i = 0; i < (int_t)shots; ++i) {
|
||||
ret[i].resize(qubits.size());
|
||||
for (j = 0; j < qubits.size(); j++)
|
||||
for (uint_t j = 0; j < qubits.size(); j++)
|
||||
ret[i][j] = samples[i][qubits[j]];
|
||||
}
|
||||
}
|
||||
|
@ -963,7 +962,7 @@ void State<tensor_net_t>::initialize_from_vector(
|
|||
BaseState::qreg_.initialize();
|
||||
|
||||
reg_t qubits(BaseState::qreg_.num_qubits());
|
||||
for (int_t i = 0; i < BaseState::qreg_.num_qubits(); i++)
|
||||
for (uint_t i = 0; i < BaseState::qreg_.num_qubits(); i++)
|
||||
qubits[i] = i;
|
||||
BaseState::qreg_.initialize_component(qubits, params);
|
||||
}
|
||||
|
|
|
@ -84,14 +84,14 @@ void Executor<state_t>::set_config(const Config &config) {
|
|||
|
||||
template <class state_t>
|
||||
void Executor<state_t>::initialize_qreg(uint_t num_qubits) {
|
||||
int_t iChunk;
|
||||
uint_t iChunk;
|
||||
for (iChunk = 0; iChunk < Base::states_.size(); iChunk++) {
|
||||
Base::states_[iChunk].qreg().set_num_qubits(Base::chunk_bits_);
|
||||
}
|
||||
|
||||
if (Base::chunk_omp_parallel_ && Base::num_groups_ > 1) {
|
||||
#pragma omp parallel for private(iChunk)
|
||||
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
|
||||
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
|
||||
for (iChunk = Base::top_state_of_group_[ig];
|
||||
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
|
||||
uint_t irow, icol;
|
||||
|
|
|
@ -369,7 +369,6 @@ void State<unitary_matrix_t>::initialize_qreg(uint_t num_qubits,
|
|||
|
||||
template <class unitary_matrix_t>
|
||||
void State<unitary_matrix_t>::initialize_omp() {
|
||||
uint_t i;
|
||||
BaseState::qreg_.set_omp_threshold(omp_qubit_threshold_);
|
||||
if (BaseState::threads_ > 0)
|
||||
BaseState::qreg_.set_omp_threads(
|
||||
|
@ -414,7 +413,7 @@ void State<unitary_matrix_t>::apply_gate(const Operations::Op &op) {
|
|||
}
|
||||
if (qubits_out.size() > 0) {
|
||||
uint_t mask = 0;
|
||||
for (int i = 0; i < qubits_out.size(); i++) {
|
||||
for (uint_t i = 0; i < qubits_out.size(); i++) {
|
||||
mask |= (1ull << (qubits_out[i] - BaseState::qreg_.num_qubits()));
|
||||
}
|
||||
if ((BaseState::qreg_.chunk_index() & mask) == mask) {
|
||||
|
|
|
@ -237,7 +237,6 @@ void UnitaryMatrix<data_t>::initialize() {
|
|||
// Zero the underlying vector
|
||||
BaseVector::zero();
|
||||
// Set to be identity matrix
|
||||
const int_t nrows = rows_; // end for k loop
|
||||
auto initialize_proc = [this](int_t i) {
|
||||
BaseVector::data_[i * (rows_ + 1)] = 1.0;
|
||||
};
|
||||
|
@ -261,7 +260,7 @@ void UnitaryMatrix<data_t>::initialize_from_matrix(
|
|||
").");
|
||||
}
|
||||
auto initialize_proc = [this, &mat](int_t row) {
|
||||
for (int_t col = 0; col < rows_; ++col) {
|
||||
for (uint_t col = 0; col < rows_; ++col) {
|
||||
BaseVector::data_[row + rows_ * col] = mat(row, col);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -212,13 +212,11 @@ UnitaryMatrixThrust<data_t>::copy_to_matrix() const {
|
|||
|
||||
cvector_t<data_t> qreg = BaseVector::vector();
|
||||
|
||||
int_t i;
|
||||
uint_t irow, icol;
|
||||
#pragma omp parallel for private( \
|
||||
i, irow, icol) if (BaseVector::num_qubits_ > BaseVector::omp_threshold_ && \
|
||||
#pragma omp parallel for if (BaseVector::num_qubits_ > \
|
||||
BaseVector::omp_threshold_ && \
|
||||
BaseVector::omp_threads_ > 1) \
|
||||
num_threads(BaseVector::omp_threads_)
|
||||
for (i = 0; i < csize; i++) {
|
||||
for (int_t i = 0; i < (int_t)csize; i++) {
|
||||
ret[i] = qreg[i];
|
||||
}
|
||||
return ret;
|
||||
|
|
|
@ -100,7 +100,7 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
|
|||
const opset_t &allowed_opset,
|
||||
ExperimentResult &result) const {
|
||||
// convert operations for batch shots execution
|
||||
for (int_t i = 0; i < circ.ops.size(); i++) {
|
||||
for (uint_t i = 0; i < circ.ops.size(); i++) {
|
||||
if (circ.ops[i].has_bind_params) {
|
||||
if (circ.ops[i].type == Operations::OpType::gate) {
|
||||
gate_to_matrix(circ.ops[i], circ.num_bind_params);
|
||||
|
@ -108,8 +108,8 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
|
|||
// convert matrix to cvector_t in params
|
||||
uint_t matrix_size = circ.ops[i].mats[0].size();
|
||||
circ.ops[i].params.resize(matrix_size * circ.num_bind_params);
|
||||
for (int_t j = 0; j < circ.num_bind_params; j++) {
|
||||
for (int_t k = 0; k < matrix_size; k++)
|
||||
for (uint_t j = 0; j < circ.num_bind_params; j++) {
|
||||
for (uint_t k = 0; k < matrix_size; k++)
|
||||
circ.ops[i].params[j * matrix_size + k] = circ.ops[i].mats[j][k];
|
||||
}
|
||||
circ.ops[i].mats.clear();
|
||||
|
@ -120,7 +120,7 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
|
|||
// convert global phase to diagonal matrix
|
||||
if (circ.global_phase_for_params.size() == circ.num_bind_params) {
|
||||
bool has_global_phase = false;
|
||||
for (int_t j = 0; j < circ.num_bind_params; j++) {
|
||||
for (uint_t j = 0; j < circ.num_bind_params; j++) {
|
||||
if (!Linalg::almost_equal(circ.global_phase_for_params[j], 0.0)) {
|
||||
has_global_phase = true;
|
||||
break;
|
||||
|
@ -132,7 +132,7 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
|
|||
phase_op.type = Operations::OpType::diagonal_matrix;
|
||||
phase_op.has_bind_params = true;
|
||||
phase_op.params.resize(2 * circ.num_bind_params);
|
||||
for (int_t j = 0; j < circ.num_bind_params; j++) {
|
||||
for (uint_t j = 0; j < circ.num_bind_params; j++) {
|
||||
auto t = std::exp(complex_t(0.0, circ.global_phase_for_params[j]));
|
||||
phase_op.params[j * 2] = t;
|
||||
phase_op.params[j * 2 + 1] = t;
|
||||
|
@ -173,64 +173,64 @@ void BatchConverter::gate_to_matrix(Operations::Op &op,
|
|||
|
||||
auto store_matrix = [&matrix_array, matrix_size](int_t iparam,
|
||||
cvector_t mat) {
|
||||
for (int_t j = 0; j < matrix_size; j++)
|
||||
for (uint_t j = 0; j < matrix_size; j++)
|
||||
matrix_array[iparam * matrix_size + j] = mat[j];
|
||||
};
|
||||
|
||||
switch (it->second) {
|
||||
case ParamGates::mcr:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i,
|
||||
Linalg::VMatrix::r(op.params[i * 2], op.params[i * 2 + 1]));
|
||||
break;
|
||||
case ParamGates::mcrx:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::rx(std::real(op.params[i])));
|
||||
break;
|
||||
case ParamGates::mcry:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::ry(std::real(op.params[i])));
|
||||
break;
|
||||
case ParamGates::mcrz:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::rz_diag(std::real(op.params[i])));
|
||||
break;
|
||||
case ParamGates::rxx:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::rxx(std::real(op.params[i])));
|
||||
break;
|
||||
case ParamGates::ryy:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::ryy(std::real(op.params[i])));
|
||||
break;
|
||||
case ParamGates::rzz:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::rzz_diag(std::real(op.params[i])));
|
||||
break;
|
||||
case ParamGates::rzx:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::rzx(std::real(op.params[i])));
|
||||
break;
|
||||
case ParamGates::mcu3:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::u3(std::real(op.params[i * 3]),
|
||||
std::real(op.params[i * 3 + 1]),
|
||||
std::real(op.params[i * 3 + 2])));
|
||||
break;
|
||||
case ParamGates::mcu:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::u4(std::real(op.params[i * 4]),
|
||||
std::real(op.params[i * 4 + 1]),
|
||||
std::real(op.params[i * 4 + 2]),
|
||||
std::real(op.params[i * 4 + 3])));
|
||||
break;
|
||||
case ParamGates::mcu2:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::u2(std::real(op.params[i * 2]),
|
||||
std::real(op.params[i * 2 + 1])));
|
||||
break;
|
||||
case ParamGates::mcp:
|
||||
for (int_t i = 0; i < num_params; i++)
|
||||
for (uint_t i = 0; i < num_params; i++)
|
||||
store_matrix(i, Linalg::VMatrix::phase_diag(std::real(op.params[i])));
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -68,16 +68,16 @@ public:
|
|||
void set_num_processes(int np) { num_processes_ = np; }
|
||||
|
||||
protected:
|
||||
mutable int block_bits_; // qubits less than this will be blocked
|
||||
mutable int qubits_;
|
||||
mutable uint_t block_bits_; // qubits less than this will be blocked
|
||||
mutable uint_t qubits_;
|
||||
mutable reg_t qubitMap_;
|
||||
mutable reg_t qubitSwapped_;
|
||||
mutable bool blocking_enabled_;
|
||||
mutable bool sample_measure_ = false;
|
||||
mutable bool restore_qubit_map_ = false;
|
||||
int memory_blocking_bits_ = 0;
|
||||
uint_t memory_blocking_bits_ = 0;
|
||||
bool density_matrix_ = false;
|
||||
int num_processes_ = 1;
|
||||
uint_t num_processes_ = 1;
|
||||
|
||||
bool block_circuit(Circuit &circ, bool doSwap) const;
|
||||
|
||||
|
@ -150,7 +150,6 @@ void CacheBlocking::set_blocking(int bits, size_t min_memory, uint_t n_place,
|
|||
size_t complex_size, bool is_matrix) {
|
||||
int chunk_bits = bits;
|
||||
uint_t scale = is_matrix ? 2 : 1;
|
||||
size_t size;
|
||||
|
||||
// get largest possible chunk bits
|
||||
while ((complex_size << (scale * chunk_bits)) > min_memory) {
|
||||
|
@ -215,7 +214,7 @@ void CacheBlocking::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
|
|||
|
||||
// loop over operations to find max number of parameters for cross-qubits
|
||||
// operations
|
||||
int_t max_params = 1;
|
||||
uint_t max_params = 1;
|
||||
for (uint_t i = 0; i < circ.ops.size(); i++) {
|
||||
if (is_blockable_operation(circ.ops[i]) &&
|
||||
is_cross_qubits_op(circ.ops[i])) {
|
||||
|
@ -302,7 +301,7 @@ void CacheBlocking::define_blocked_qubits(std::vector<Operations::Op> &ops,
|
|||
reg_t &blockedQubits,
|
||||
bool crossQubitOnly) const {
|
||||
uint_t i, j, iq;
|
||||
int nq, nb;
|
||||
uint_t nq;
|
||||
bool exist;
|
||||
for (i = 0; i < ops.size(); i++) {
|
||||
if (blockedQubits.size() >= block_bits_)
|
||||
|
@ -384,7 +383,7 @@ bool CacheBlocking::can_reorder(
|
|||
}
|
||||
|
||||
bool CacheBlocking::block_circuit(Circuit &circ, bool doSwap) const {
|
||||
uint_t i, n;
|
||||
uint_t n;
|
||||
std::vector<Operations::Op> out;
|
||||
std::vector<Operations::Op> queue;
|
||||
std::vector<Operations::Op> queue_next;
|
||||
|
@ -523,11 +522,8 @@ uint_t CacheBlocking::add_ops(std::vector<Operations::Op> &ops,
|
|||
std::vector<Operations::Op> &queue, bool doSwap,
|
||||
bool first, bool crossQubitOnly) const {
|
||||
uint_t i, j, iq;
|
||||
|
||||
int nqubitUsed = 0;
|
||||
reg_t blockedQubits;
|
||||
int nq;
|
||||
bool exist;
|
||||
uint_t nq;
|
||||
uint_t pos_begin, num_gates_added;
|
||||
bool end_block_inserted;
|
||||
|
||||
|
@ -807,7 +803,7 @@ bool CacheBlocking::split_pauli(const Operations::Op &op,
|
|||
reg_t qubits_out_chunk;
|
||||
std::string pauli_in_chunk;
|
||||
std::string pauli_out_chunk;
|
||||
int_t i, j, n;
|
||||
uint_t i, j, n;
|
||||
bool inside;
|
||||
|
||||
// get inner/outer chunk pauli string
|
||||
|
@ -857,7 +853,7 @@ bool CacheBlocking::split_op(const Operations::Op &op,
|
|||
std::vector<Operations::Op> &queue) const {
|
||||
reg_t qubits_in_chunk;
|
||||
reg_t qubits_out_chunk;
|
||||
int_t i, j, n;
|
||||
uint_t i, j, n;
|
||||
bool inside;
|
||||
|
||||
n = op.qubits.size();
|
||||
|
|
|
@ -67,7 +67,7 @@ public:
|
|||
}
|
||||
} else {
|
||||
// loop for runtime parameter binding
|
||||
for (int_t p = 0; p < num_params_; p++) {
|
||||
for (uint_t p = 0; p < num_params_; p++) {
|
||||
std::vector<op_t> ops;
|
||||
ops.reserve(fusioned_ops.size());
|
||||
for (auto &op : fusioned_ops) {
|
||||
|
@ -449,18 +449,18 @@ bool NQubitFusion<N>::aggregate_operations(oplist_t &ops,
|
|||
std::vector<std::pair<uint_t, std::vector<op_t>>> targets;
|
||||
bool fused = false;
|
||||
|
||||
for (uint_t op_idx = fusion_start; op_idx < fusion_end; ++op_idx) {
|
||||
for (int op_idx = fusion_start; op_idx < fusion_end; ++op_idx) {
|
||||
// skip operations to be ignored
|
||||
if (!method.can_apply(ops[op_idx], max_fused_qubits) ||
|
||||
ops[op_idx].type == optype_t::nop)
|
||||
continue;
|
||||
|
||||
// 1. find a N-qubit operation
|
||||
if (ops[op_idx].qubits.size() != N)
|
||||
if (ops[op_idx].qubits.size() != N) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<uint_t> fusing_op_idxs = {op_idx};
|
||||
|
||||
std::vector<uint_t> fusing_op_idxs = {(uint_t)op_idx};
|
||||
std::vector<uint_t> fusing_qubits;
|
||||
fusing_qubits.insert(fusing_qubits.end(), ops[op_idx].qubits.begin(),
|
||||
ops[op_idx].qubits.end());
|
||||
|
@ -895,14 +895,14 @@ void Fusion::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
|
|||
|
||||
if (parallelization_ > 1) {
|
||||
#pragma omp parallel for num_threads(parallelization_)
|
||||
for (int_t i = 0; i < parallelization_; i++) {
|
||||
for (int_t i = 0; i < (int_t)parallelization_; i++) {
|
||||
int_t start = unit * i;
|
||||
int_t end = std::min(start + unit, (int_t)circ.ops.size());
|
||||
optimize_circuit(circ, noise, allowed_opset, start, end, fuser,
|
||||
method);
|
||||
}
|
||||
} else {
|
||||
for (int_t i = 0; i < parallelization_; i++) {
|
||||
for (uint_t i = 0; i < parallelization_; i++) {
|
||||
int_t start = unit * i;
|
||||
int_t end = std::min(start + unit, (int_t)circ.ops.size());
|
||||
optimize_circuit(circ, noise, allowed_opset, start, end, fuser,
|
||||
|
|
Loading…
Reference in New Issue