Reduce warning meesages (#2013)

* reduce warning meesages

* format

* fix Windows

* uint -> uint_t

* fix Thrust seg fault

* format
This commit is contained in:
Jun Doi 2023-12-22 15:06:32 +09:00 committed by GitHub
parent f7fcbc2b4c
commit 180a0b6431
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
51 changed files with 776 additions and 840 deletions

View File

@ -143,7 +143,7 @@ void bind_aer_circuit(MODULE m) {
<< ", num_registers=" << circ.num_registers;
ss << ", ops={";
for (auto i = 0; i < circ.ops.size(); ++i)
for (uint_t i = 0; i < circ.ops.size(); ++i)
if (i == 0)
ss << circ.ops[i];
else

View File

@ -130,8 +130,8 @@ void bind_aer_state(MODULE m) {
size_t mat_len = (1UL << qubits.size());
auto ptr = values.unchecked<2>();
cmatrix_t mat(mat_len, mat_len);
for (auto i = 0; i < mat_len; ++i)
for (auto j = 0; j < mat_len; ++j)
for (uint_t i = 0; i < mat_len; ++i)
for (uint_t j = 0; j < mat_len; ++j)
mat(i, j) = ptr(i, j);
state.apply_unitary(qubits, mat);
});
@ -144,10 +144,10 @@ void bind_aer_state(MODULE m) {
size_t mat_size = (1UL << control_qubits.size());
auto ptr = values.unchecked<3>();
std::vector<cmatrix_t> mats;
for (auto i = 0; i < mat_size; ++i) {
for (uint_t i = 0; i < mat_size; ++i) {
cmatrix_t mat(mat_len, mat_len);
for (auto j = 0; j < mat_len; ++j)
for (auto k = 0; k < mat_len; ++k)
for (uint_t j = 0; j < mat_len; ++j)
for (uint_t k = 0; k < mat_len; ++k)
mat(j, k) = ptr(i, j, k);
mats.push_back(mat);
}

View File

@ -414,7 +414,7 @@ size_t Controller::get_system_memory_mb() {
size_t Controller::get_gpu_memory_mb() {
size_t total_physical_memory = 0;
#ifdef AER_THRUST_GPU
for (int_t iDev = 0; iDev < target_gpus_.size(); iDev++) {
for (uint_t iDev = 0; iDev < target_gpus_.size(); iDev++) {
size_t freeMem, totalMem;
cudaSetDevice(target_gpus_[iDev]);
cudaMemGetInfo(&freeMem, &totalMem);
@ -515,7 +515,7 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
uint_t result_size;
reg_t result_offset(circuits.size());
result_size = 0;
for (int_t i = 0; i < circuits.size(); i++) {
for (uint_t i = 0; i < circuits.size(); i++) {
result_offset[i] = result_size;
result_size += circuits[i]->num_bind_params;
}
@ -532,11 +532,11 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
// set parallelization for experiments
try {
uint_t res_pos = 0;
for (int i = 0; i < circuits.size(); i++) {
for (uint_t i = 0; i < circuits.size(); i++) {
executors[i] = make_circuit_executor(methods[i]);
required_memory_mb_list[i] =
executors[i]->required_memory_mb(config, *circuits[i], noise_model);
for (int j = 0; j < circuits[i]->num_bind_params; j++) {
for (uint_t j = 0; j < circuits[i]->num_bind_params; j++) {
result.results[res_pos++].metadata.add(required_memory_mb_list[i],
"required_memory_mb");
}
@ -588,9 +588,9 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
reg_t seeds(result_size);
reg_t avg_seeds(result_size);
int_t iseed = 0;
for (int_t i = 0; i < circuits.size(); i++) {
for (uint_t i = 0; i < circuits.size(); i++) {
if (circuits[i]->num_bind_params > 1) {
for (int_t j = 0; i < circuits[i]->num_bind_params; i++)
for (uint_t j = 0; i < circuits[i]->num_bind_params; i++)
seeds[iseed++] = circuits[i]->seed_for_params[j];
} else
seeds[iseed++] = circuits[i]->seed;
@ -598,9 +598,9 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
MPI_Allreduce(seeds.data(), avg_seeds.data(), result_size, MPI_UINT64_T,
MPI_SUM, MPI_COMM_WORLD);
iseed = 0;
for (int_t i = 0; i < circuits.size(); i++) {
for (uint_t i = 0; i < circuits.size(); i++) {
if (circuits[i]->num_bind_params > 1) {
for (int_t j = 0; i < circuits[i]->num_bind_params; i++)
for (uint_t j = 0; i < circuits[i]->num_bind_params; i++)
circuits[i]->seed_for_params[j] =
avg_seeds[iseed++] / num_processes_;
} else
@ -626,7 +626,7 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
bool all_failed = true;
result.status = Result::Status::completed;
for (int i = 0; i < result.results.size(); ++i) {
for (uint_t i = 0; i < result.results.size(); ++i) {
auto &experiment = result.results[i];
if (experiment.status == ExperimentResult::Status::completed) {
all_failed = false;

View File

@ -118,13 +118,13 @@ Result controller_execute(std::vector<std::shared_ptr<Circuit>> &input_circs,
param_circ->global_phase_for_params.resize(num_params);
for (size_t j = 0; j < num_params; j++)
param_circ->global_phase_for_params[j] = params.second[j];
} else if (instr_pos >= num_instr) {
} else if ((uint_t)instr_pos >= num_instr) {
throw std::invalid_argument(
R"(Invalid parameterized qobj: instruction position out of range)");
}
auto &op = param_circ->ops[instr_pos];
if (!op.has_bind_params) {
if (param_pos >= op.params.size()) {
if ((uint_t)param_pos >= op.params.size()) {
throw std::invalid_argument(
R"(Invalid parameterized qobj: instruction param position out of range)");
}
@ -160,7 +160,7 @@ Result controller_execute(std::vector<std::shared_ptr<Circuit>> &input_circs,
// negative position is for global phase
circ->global_phase_angle = params.second[j];
} else {
if (instr_pos >= num_instr) {
if ((uint_t)instr_pos >= num_instr) {
std::cout << "Invalid parameterization: instruction position "
"out of range: "
<< instr_pos << std::endl;
@ -168,7 +168,7 @@ Result controller_execute(std::vector<std::shared_ptr<Circuit>> &input_circs,
R"(Invalid parameterization: instruction position out of range)");
}
auto &op = param_circ->ops[instr_pos];
if (param_pos >= op.params.size()) {
if ((uint_t)param_pos >= op.params.size()) {
throw std::invalid_argument(
R"(Invalid parameterization: instruction param position out of range)");
}
@ -215,7 +215,7 @@ Result controller_execute(std::vector<std::shared_ptr<Circuit>> &input_circs,
for (auto &circ : circs) {
circ->seed = seed + seed_shift;
circ->seed_for_params.resize(circ->num_bind_params);
for (int_t i = 0; i < circ->num_bind_params; i++) {
for (uint_t i = 0; i < circ->num_bind_params; i++) {
circ->seed_for_params[i] = seed + seed_shift;
seed_shift += 2113;
}

View File

@ -630,7 +630,7 @@ void AerState::set_seed(int_t seed) {
reg_t AerState::allocate_qubits(uint_t num_qubits) {
assert_not_initialized();
reg_t ret;
for (auto i = 0; i < num_qubits; ++i)
for (uint_t i = 0; i < num_qubits; ++i)
ret.push_back(num_of_qubits_++);
return ret;
};
@ -816,7 +816,7 @@ reg_t AerState::initialize_statevector(uint_t num_of_qubits, complex_t *data,
reg_t ret;
ret.reserve(num_of_qubits);
for (auto i = 0; i < num_of_qubits; ++i)
for (uint_t i = 0; i < num_of_qubits; ++i)
ret.push_back(i);
return ret;
};
@ -861,7 +861,7 @@ reg_t AerState::initialize_density_matrix(uint_t num_of_qubits, complex_t *data,
reg_t ret;
ret.reserve(num_of_qubits);
for (auto i = 0; i < num_of_qubits; ++i)
for (uint_t i = 0; i < num_of_qubits; ++i)
ret.push_back(i);
return ret;
};
@ -892,7 +892,7 @@ AER::Vector<complex_t> AerState::move_to_vector() {
throw std::runtime_error("move_to_vector() supports only statevector or "
"matrix_product_state or density_matrix methods");
}
for (auto i = 0; i < num_of_qubits_; ++i)
for (uint_t i = 0; i < num_of_qubits_; ++i)
op.qubits.push_back(i);
op.string_params.push_back("s");
op.save_type = Operations::DataSubType::single;
@ -907,7 +907,7 @@ AER::Vector<complex_t> AerState::move_to_vector() {
.value()["s"]
.value());
clear();
return std::move(vec);
return vec;
} else if (method_ == Method::density_matrix) {
auto mat =
std::move(static_cast<DataMap<AverageData, matrix<complex_t>, 1>>(
@ -917,7 +917,7 @@ AER::Vector<complex_t> AerState::move_to_vector() {
auto vec = Vector<complex_t>::move_from_buffer(
mat.GetColumns() * mat.GetRows(), mat.move_to_buffer());
clear();
return std::move(vec);
return vec;
} else {
throw std::runtime_error("move_to_vector() supports only statevector or "
"matrix_product_state or density_matrix methods");
@ -941,7 +941,7 @@ matrix<complex_t> AerState::move_to_matrix() {
throw std::runtime_error("move_to_matrix() supports only statevector or "
"matrix_product_state or density_matrix methods");
}
for (auto i = 0; i < num_of_qubits_; ++i)
for (uint_t i = 0; i < num_of_qubits_; ++i)
op.qubits.push_back(i);
op.string_params.push_back("s");
op.save_type = Operations::DataSubType::single;
@ -966,7 +966,7 @@ matrix<complex_t> AerState::move_to_matrix() {
.value())["s"]
.value());
clear();
return std::move(mat);
return mat;
} else {
throw std::runtime_error("move_to_matrix() supports only statevector or "
"matrix_product_state or density_matrix methods");

View File

@ -263,7 +263,7 @@ void std::from_json(const json_t &js,
template <typename RealType>
void std::to_json(json_t &js, const AER::Vector<std::complex<RealType>> &vec) {
std::vector<std::vector<RealType>> out;
for (int64_t i = 0; i < vec.size(); ++i) {
for (size_t i = 0; i < vec.size(); ++i) {
auto &z = vec[i];
out.push_back(std::vector<RealType>{real(z), imag(z)});
}

View File

@ -35,7 +35,8 @@ T *malloc_data(size_t size) {
// Data allocated here may need to be properly aligned to be compliant with
// AVX2.
void *data = nullptr;
posix_memalign(&data, 64, sizeof(T) * size);
if (posix_memalign(&data, 64, sizeof(T) * size) != 0)
throw std::runtime_error("Cannot allocate memory by posix_memalign");
return reinterpret_cast<T *>(data);
#else
return reinterpret_cast<T *>(malloc(sizeof(T) * size));

View File

@ -52,11 +52,13 @@ enum class BinaryOp {
GreaterEqual
};
bool isBoolBinaryOp(const BinaryOp binary_op);
bool isBoolBinaryOp(const BinaryOp binary_op) {
return binary_op != BinaryOp::BitAnd && binary_op != BinaryOp::BitOr &&
binary_op != BinaryOp::BitXor;
}
uint_t truncate(const uint_t val, const size_t width);
uint_t truncate(const uint_t val, const size_t width) {
size_t shift = 64 - width;
return (val << shift) >> shift;
@ -68,8 +70,8 @@ enum class ValueType { Bool, Uint };
class ScalarType {
public:
ScalarType(const ValueType type_, const size_t width_)
: type(type_), width(width_) {}
ScalarType(const ValueType _type, const size_t width_)
: type(_type), width(width_) {}
public:
const ValueType type;
@ -97,8 +99,8 @@ public:
class CExpr {
public:
CExpr(const CExprType expr_type_, const std::shared_ptr<ScalarType> type_)
: expr_type(expr_type_), type(type_) {}
CExpr(const CExprType _expr_type, const std::shared_ptr<ScalarType> _type)
: expr_type(_expr_type), type(_type) {}
virtual bool eval_bool(const std::string &memory) { return false; };
virtual uint_t eval_uint(const std::string &memory) { return 0ul; };
@ -109,9 +111,9 @@ public:
class CastExpr : public CExpr {
public:
CastExpr(std::shared_ptr<ScalarType> type,
CastExpr(std::shared_ptr<ScalarType> _type,
const std::shared_ptr<CExpr> operand_)
: CExpr(CExprType::Cast, type), operand(operand_) {}
: CExpr(CExprType::Cast, _type), operand(operand_) {}
virtual bool eval_bool(const std::string &memory) {
if (type->type != ValueType::Bool)
@ -143,9 +145,9 @@ public:
class VarExpr : public CExpr {
public:
VarExpr(std::shared_ptr<ScalarType> type,
const std::vector<uint_t> &cbit_idxs)
: CExpr(CExprType::Var, type), cbit_idxs(cbit_idxs) {}
VarExpr(std::shared_ptr<ScalarType> _type,
const std::vector<uint_t> &_cbit_idxs)
: CExpr(CExprType::Var, _type), cbit_idxs(_cbit_idxs) {}
virtual bool eval_bool(const std::string &memory) {
if (type->type != ValueType::Bool)
@ -164,7 +166,6 @@ public:
private:
uint_t eval_uint_(const std::string &memory) {
uint_t val = 0ul;
const uint_t memory_size = memory.size();
uint_t shift = 0;
for (const uint_t cbit_idx : cbit_idxs) {
if (memory.size() <= cbit_idx)
@ -182,7 +183,8 @@ public:
class ValueExpr : public CExpr {
public:
ValueExpr(std::shared_ptr<ScalarType> type) : CExpr(CExprType::Value, type) {}
ValueExpr(std::shared_ptr<ScalarType> _type)
: CExpr(CExprType::Value, _type) {}
};
class UintValue : public ValueExpr {
@ -943,6 +945,11 @@ inline Op make_bfunc(const std::string &mask, const std::string &val,
return op;
}
Op make_gate(const std::string &name, const reg_t &qubits,
const std::vector<complex_t> &params,
const std::vector<std::string> &string_params,
const int_t conditional, const std::shared_ptr<CExpr> expr,
const std::string &label);
Op make_gate(const std::string &name, const reg_t &qubits,
const std::vector<complex_t> &params,
const std::vector<std::string> &string_params,
@ -1313,12 +1320,12 @@ inline Op bind_parameter(const Op &src, const uint_t iparam,
if (src.params.size() > 0) {
uint_t stride = src.params.size() / num_params;
op.params.resize(stride);
for (int_t i = 0; i < stride; i++)
for (uint_t i = 0; i < stride; i++)
op.params[i] = src.params[iparam * stride + i];
} else if (src.mats.size() > 0) {
uint_t stride = src.mats.size() / num_params;
op.mats.resize(stride);
for (int_t i = 0; i < stride; i++)
for (uint_t i = 0; i < stride; i++)
op.mats[i] = src.mats[iparam * stride + i];
}
return op;
@ -1528,6 +1535,7 @@ json_t op_to_json(const Op &op) {
return ret;
}
void to_json(json_t &js, const OpType &type);
void to_json(json_t &js, const OpType &type) {
std::stringstream ss;
ss << type;

View File

@ -32,6 +32,8 @@
#include "misc/warnings.hpp"
DISABLE_WARNING_PUSH
#pragma GCC diagnostic ignored "-Wfloat-equal"
#include <pybind11/cast.h>
#include <pybind11/complex.h>
#include <pybind11/numpy.h>
@ -40,6 +42,7 @@ DISABLE_WARNING_PUSH
#include <nlohmann/json.hpp>
DISABLE_WARNING_POP
#pragma GCC diagnostic warning "-Wfloat-equal"
#include "framework/json.hpp"
@ -293,7 +296,7 @@ void std::from_json(const json_t &js, py::object &o) {
o = py::str(js.get<nl::json::string_t>());
} else if (js.is_array()) {
std::vector<py::object> obj(js.size());
for (auto i = 0; i < js.size(); i++) {
for (size_t i = 0; i < js.size(); i++) {
py::object tmp;
from_json(js[i], tmp);
obj[i] = tmp;

View File

@ -155,12 +155,12 @@ Qobj::Qobj(const inputdata_t &input) {
// negative position is for global phase
param_circuit->global_phase_angle = params.second[j];
} else {
if (instr_pos >= num_instr) {
if ((uint_t)instr_pos >= num_instr) {
throw std::invalid_argument(
R"(Invalid parameterized qobj: instruction position out of range)");
}
auto &op = param_circuit->ops[instr_pos];
if (param_pos >= op.params.size()) {
if ((uint_t)param_pos >= op.params.size()) {
throw std::invalid_argument(
R"(Invalid parameterized qobj: instruction param position out of range)");
}

View File

@ -1270,7 +1270,7 @@ uint_t (*popcount)(uint_t) = is_avx2_supported() ? &_instrinsic_weight
bool (*hamming_parity)(uint_t) = &_naive_parity;
uint_t (*popcount)(uint_t) = &_naive_weight;
#endif
size_t get_system_memory_mb();
size_t get_system_memory_mb() {
size_t total_physical_memory = 0;
#if defined(__linux__)

View File

@ -386,7 +386,7 @@ void NoiseModel::enable_superop_method(int num_threads) {
exs.resize(std::max(num_threads, 1));
#pragma omp parallel for if (num_threads > 1 && quantum_errors_.size() > 10) \
num_threads(num_threads)
for (int i = 0; i < quantum_errors_.size(); i++) {
for (int i = 0; i < (int_t)quantum_errors_.size(); i++) {
try {
quantum_errors_[i].compute_superoperator();
} catch (...) {
@ -406,7 +406,7 @@ void NoiseModel::enable_kraus_method(int num_threads) {
exs.resize(std::max(num_threads, 1));
#pragma omp parallel for if (num_threads > 1 && quantum_errors_.size() > 10) \
num_threads(num_threads)
for (int i = 0; i < quantum_errors_.size(); i++) {
for (int i = 0; i < (int_t)quantum_errors_.size(); i++) {
try {
quantum_errors_[i].compute_kraus();
} catch (...) {
@ -851,6 +851,8 @@ cmatrix_t NoiseModel::op2superop(const Operations::Op &op) const {
case ParamGate::cu:
return Linalg::SMatrix::cu(op.params[0], op.params[1], op.params[2],
op.params[3]);
default:
break;
}
} else {
// Check if we can convert this gate to a standard superoperator matrix
@ -897,6 +899,8 @@ cmatrix_t NoiseModel::op2unitary(const Operations::Op &op) const {
return Linalg::Matrix::rzx(op.params[0]);
case ParamGate::cp:
return Linalg::Matrix::cphase(op.params[0]);
default:
break;
}
} else {
// Check if we can convert this gate to a standard superoperator matrix

View File

@ -133,7 +133,7 @@ void BatchShotsExecutor<state_t>::set_parallelization(
enable_batch_multi_shots_ = false;
if (batched_shots_gpu_ && Base::sim_device_ != Device::CPU) {
enable_batch_multi_shots_ = true;
if (circ.num_qubits > batched_shots_gpu_max_qubits_)
if (circ.num_qubits > (uint_t)batched_shots_gpu_max_qubits_)
enable_batch_multi_shots_ = false;
else if (circ.shots == 1 && circ.num_bind_params == 1)
enable_batch_multi_shots_ = false;
@ -156,8 +156,7 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
}
Noise::NoiseModel dummy_noise;
state_t dummy_state;
int_t i;
int_t i_begin, n_shots;
uint_t i_begin, n_shots;
Base::num_qubits_ = circ.num_qubits;
Base::num_creg_memory_ = circ.num_memory;
@ -195,7 +194,7 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
fusion_result);
auto time_taken =
std::chrono::duration<double>(myclock_t::now() - timer_start).count();
for (i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.metadata.copy(fusion_result.metadata);
// Add batched multi-shots optimizaiton metadata
@ -222,13 +221,13 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
while (i_begin < Base::num_local_states_) {
// loop for states can be stored in available memory
n_shots = Base::num_local_states_ - i_begin;
n_shots = std::min(n_shots, (int_t)Base::num_max_shots_);
n_shots = std::min(n_shots, Base::num_max_shots_);
// allocate shots
this->allocate_states(n_shots, config);
// Set state config
for (i = 0; i < n_shots; i++) {
for (uint_t i = 0; i < n_shots; i++) {
Base::states_[i].set_parallelization(Base::parallel_state_update_);
}
@ -256,7 +255,7 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
auto apply_ops_lambda = [this, circ, init_rng, first_meas, final_ops,
dummy_noise, &result_it](int_t i) {
std::vector<RngEngine> rng(Base::num_states_in_group_[i]);
for (int_t j = 0; j < Base::num_states_in_group_[i]; j++) {
for (uint_t j = 0; j < Base::num_states_in_group_[i]; j++) {
uint_t iparam =
Base::global_state_index_ + Base::top_state_of_group_[i] + j;
if (iparam == 0)
@ -284,8 +283,8 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
if (Base::num_process_per_experiment_ > 1) {
Base::gather_creg_memory(Base::cregs_, Base::state_index_begin_);
for (i = 0; i < circ.num_bind_params; i++) {
for (int_t j = 0; j < circ.shots; j++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t j = 0; j < circ.shots; j++) {
(result_it + i)
->save_count_data(Base::cregs_[i * circ.shots + j],
Base::save_creg_memory_);
@ -304,7 +303,7 @@ void BatchShotsExecutor<state_t>::run_circuit_with_sampling(
}
if (nDev > Base::num_groups_)
nDev = Base::num_groups_;
for (i = 0; i < circ.num_bind_params; i++)
for (uint_t i = 0; i < circ.num_bind_params; i++)
(result_it + i)
->metadata.add(nDev, "batched_shots_optimization_parallel_gpus");
}
@ -362,10 +361,9 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
Base::max_matrix_qubits_ = Base::get_max_matrix_qubits(circ_opt);
int_t i;
int_t i_begin, n_shots;
uint_t i_begin, n_shots;
for (i = 0; i < Base::num_bind_params_; i++) {
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
ExperimentResult &result = *(result_it + i);
result.metadata.copy(fusion_result.metadata);
// Add batched multi-shots optimizaiton metadata
@ -382,13 +380,13 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
while (i_begin < Base::num_local_states_) {
// loop for states can be stored in available memory
n_shots = Base::num_local_states_ - i_begin;
n_shots = std::min(n_shots, (int_t)Base::num_max_shots_);
n_shots = std::min(n_shots, Base::num_max_shots_);
// allocate shots
this->allocate_states(n_shots, config);
// Set state config
for (i = 0; i < n_shots; i++) {
for (uint_t i = 0; i < n_shots; i++) {
Base::states_[i].set_parallelization(Base::parallel_state_update_);
}
@ -418,7 +416,7 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
noise](int_t i) {
par_results[i].resize(circ.num_bind_params);
std::vector<RngEngine> rng(Base::num_states_in_group_[i]);
for (int_t j = 0; j < Base::num_states_in_group_[i]; j++) {
for (uint_t j = 0; j < Base::num_states_in_group_[i]; j++) {
uint_t ishot =
Base::global_state_index_ + Base::top_state_of_group_[i] + j;
uint_t iparam = ishot / Base::num_shots_per_bind_param_;
@ -441,13 +439,13 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
Base::num_groups_, apply_ops_lambda, Base::num_groups_);
for (auto &res : par_results) {
for (i = 0; i < Base::num_bind_params_; i++) {
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
(result_it + i)->combine(std::move(res[i]));
}
}
// collect measured bits and copy memory
for (i = 0; i < n_shots; i++) {
for (uint_t i = 0; i < n_shots; i++) {
if (Base::num_process_per_experiment_ > 1) {
Base::states_[i].qreg().read_measured_data(
Base::cregs_[Base::global_state_index_ + i_begin + i]);
@ -469,7 +467,7 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
if (Base::num_process_per_experiment_ > 1) {
Base::gather_creg_memory(Base::cregs_, Base::state_index_begin_);
for (i = 0; i < circ_opt.shots; i++) {
for (uint_t i = 0; i < circ_opt.shots; i++) {
uint_t iparam = i / Base::num_shots_per_bind_param_;
(result_it + iparam)
->save_count_data(Base::cregs_[i], Base::save_creg_memory_);
@ -487,7 +485,7 @@ void BatchShotsExecutor<state_t>::run_circuit_shots(
}
if (nDev > Base::num_groups_)
nDev = Base::num_groups_;
for (i = 0; i < Base::num_bind_params_; i++)
for (uint_t i = 0; i < Base::num_bind_params_; i++)
(result_it + i)
->metadata.add(nDev, "batched_shots_optimization_parallel_gpus");
}
@ -516,7 +514,7 @@ void BatchShotsExecutor<state_t>::apply_ops_batched_shots_for_group(
Base::states_[j].qreg().read_measured_data(Base::states_[j].creg());
std::vector<Operations::Op> nops = noise.sample_noise_loc(
*op, rng[j - Base::top_state_of_group_[i_group]]);
for (int_t k = 0; k < nops.size(); k++) {
for (uint_t k = 0; k < nops.size(); k++) {
Base::states_[j].apply_op(
nops[k], *result_it,
rng[j - Base::top_state_of_group_[i_group]], false);
@ -534,13 +532,13 @@ void BatchShotsExecutor<state_t>::apply_ops_batched_shots_for_group(
uint_t non_pauli_gate_count = 0;
if (num_inner_threads > 1) {
#pragma omp parallel for reduction(+: count_ops,non_pauli_gate_count) num_threads(num_inner_threads)
for (int_t j = 0; j < count; j++) {
for (int_t j = 0; j < (int_t)count; j++) {
noise_ops[j] = noise.sample_noise_loc(*op, rng[j]);
if (!(noise_ops[j].size() == 0 ||
(noise_ops[j].size() == 1 && noise_ops[j][0].name == "id"))) {
count_ops++;
for (int_t k = 0; k < noise_ops[j].size(); k++) {
for (uint_t k = 0; k < noise_ops[j].size(); k++) {
if (noise_ops[j][k].name != "id" && noise_ops[j][k].name != "x" &&
noise_ops[j][k].name != "y" && noise_ops[j][k].name != "z" &&
noise_ops[j][k].name != "pauli") {
@ -551,13 +549,13 @@ void BatchShotsExecutor<state_t>::apply_ops_batched_shots_for_group(
}
}
} else {
for (int_t j = 0; j < count; j++) {
for (uint_t j = 0; j < count; j++) {
noise_ops[j] = noise.sample_noise_loc(*op, rng[j]);
if (!(noise_ops[j].size() == 0 ||
(noise_ops[j].size() == 1 && noise_ops[j][0].name == "id"))) {
count_ops++;
for (int_t k = 0; k < noise_ops[j].size(); k++) {
for (uint_t k = 0; k < noise_ops[j].size(); k++) {
if (noise_ops[j][k].name != "id" && noise_ops[j][k].name != "x" &&
noise_ops[j][k].name != "y" && noise_ops[j][k].name != "z" &&
noise_ops[j][k].name != "pauli") {
@ -584,7 +582,7 @@ void BatchShotsExecutor<state_t>::apply_ops_batched_shots_for_group(
continue;
}
// call apply_op for each state
for (int_t j = 0; j < Base::num_states_in_group_[i_group]; j++) {
for (uint_t j = 0; j < Base::num_states_in_group_[i_group]; j++) {
uint_t is = Base::top_state_of_group_[i_group] + j;
uint_t ip =
(Base::global_state_index_ + is) / Base::num_shots_per_bind_param_;
@ -602,13 +600,13 @@ template <class state_t>
void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
const int_t i_group, const std::vector<std::vector<Operations::Op>> &ops,
ResultItr result_it, std::vector<RngEngine> &rng) {
int_t i, j, k, count, nop, pos = 0;
uint_t count;
uint_t istate = Base::top_state_of_group_[i_group];
count = ops.size();
reg_t mask(count);
std::vector<bool> finished(count, false);
for (i = 0; i < count; i++) {
for (uint_t i = 0; i < count; i++) {
int_t cond_reg = -1;
if (finished[i])
@ -620,7 +618,7 @@ void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
mask[i] = 1;
// find same ops to be exectuted in a batch
for (j = i + 1; j < count; j++) {
for (uint_t j = i + 1; j < count; j++) {
if (finished[j]) {
mask[j] = 0;
continue;
@ -638,7 +636,7 @@ void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
}
mask[j] = true;
for (k = 0; k < ops[i].size(); k++) {
for (uint_t k = 0; k < ops[i].size(); k++) {
if (ops[i][k].conditional) {
cond_reg = ops[i][k].conditional_reg;
}
@ -657,7 +655,7 @@ void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
cond_reg, mask);
// batched execution on same ops
for (k = 0; k < ops[i].size(); k++) {
for (uint_t k = 0; k < ops[i].size(); k++) {
Operations::Op cop = ops[i][k];
// mark op conditional to mask shots
@ -666,7 +664,7 @@ void BatchShotsExecutor<state_t>::apply_batched_noise_ops(
if (!apply_batched_op(istate, cop, result_it, rng, false)) {
// call apply_op for each state
for (int_t j = 0; j < Base::num_states_in_group_[i_group]; j++) {
for (uint_t j = 0; j < Base::num_states_in_group_[i_group]; j++) {
uint_t is = Base::top_state_of_group_[i_group] + j;
uint_t ip = (Base::global_state_index_ + is) /
Base::num_shots_per_bind_param_;
@ -688,7 +686,7 @@ void BatchShotsExecutor<state_t>::apply_batched_expval(const int_t istate,
ResultItr result) {
std::vector<double> val;
bool variance = (op.type == Operations::OpType::save_expval_var);
for (int_t i = 0; i < op.expval_params.size(); i++) {
for (uint_t i = 0; i < op.expval_params.size(); i++) {
std::complex<double> cprm;
if (variance)
@ -706,7 +704,7 @@ void BatchShotsExecutor<state_t>::apply_batched_expval(const int_t istate,
return;
if (variance) {
for (int_t i = 0; i < val.size() / 2; i++) {
for (uint_t i = 0; i < val.size() / 2; i++) {
uint_t ip = (Base::global_state_index_ + istate + i) /
Base::num_shots_per_bind_param_;
@ -719,7 +717,7 @@ void BatchShotsExecutor<state_t>::apply_batched_expval(const int_t istate,
op.save_type);
}
} else {
for (int_t i = 0; i < val.size(); i++) {
for (uint_t i = 0; i < val.size(); i++) {
uint_t ip = (Base::global_state_index_ + istate + i) /
Base::num_shots_per_bind_param_;
@ -737,7 +735,7 @@ void BatchShotsExecutor<state_t>::batched_measure_sampler(
InputIterator first_meas, InputIterator last_meas, uint_t shots,
uint_t i_group, ResultItr result, std::vector<RngEngine> &rng) {
uint_t par_states = 1;
if (Base::max_parallel_threads_ >= Base::num_groups_ * 2) {
if ((uint_t)Base::max_parallel_threads_ >= Base::num_groups_ * 2) {
par_states =
std::min((uint_t)(Base::max_parallel_threads_ / Base::num_groups_),
Base::num_states_in_group_[i_group]);
@ -798,7 +796,7 @@ void BatchShotsExecutor<state_t>::batched_measure_sampler(
state_end = Base::num_states_in_group_[i_group] * (i + 1) / par_states;
for (; i_state < state_end; i_state++) {
for (int_t j = 0; j < shots; j++)
for (uint_t j = 0; j < shots; j++)
rnd_shots[i_state * shots + j] =
rng[i_state].rand(0, 1) + (double)i_state;
}
@ -830,14 +828,14 @@ void BatchShotsExecutor<state_t>::batched_measure_sampler(
uint_t is = Base::top_state_of_group_[i_group] + i_state;
uint_t ip = (Base::global_state_index_ + is);
for (int_t i = 0; i < shots; i++) {
for (uint_t i = 0; i < shots; i++) {
ClassicalRegister creg;
creg.initialize(num_memory, num_registers);
reg_t all_samples(meas_qubits.size());
uint_t val = allbit_samples[i_state * shots + i] & mask;
reg_t allbit_sample = Utils::int2reg(val, 2, Base::num_qubits_);
for (int_t mq = 0; mq < meas_qubits.size(); mq++) {
for (uint_t mq = 0; mq < meas_qubits.size(); mq++) {
all_samples[mq] = allbit_sample[meas_qubits[mq]];
}
@ -870,7 +868,7 @@ void BatchShotsExecutor<state_t>::batched_measure_sampler(
auto time_taken =
std::chrono::duration<double>(myclock_t::now() - timer_start).count();
for (int_t i_state = 0; i_state < Base::num_states_in_group_[i_group];
for (uint_t i_state = 0; i_state < Base::num_states_in_group_[i_group];
i_state++) {
uint_t ip = Base::global_state_index_ + Base::top_state_of_group_[i_group] +
i_state;

View File

@ -22,13 +22,22 @@ namespace AER {
namespace Chunk {
void get_qubits_inout(const int chunk_qubits, const reg_t &qubits,
reg_t &qubits_in, reg_t &qubits_out);
void get_inout_ctrl_qubits(const Operations::Op &op, const uint_t num_qubits,
reg_t &qubits_in, reg_t &qubits_out);
Operations::Op correct_gate_op_in_chunk(const Operations::Op &op,
reg_t &qubits_in);
void block_diagonal_matrix(const uint_t gid, const uint_t chunk_bits,
reg_t &qubits, cvector_t &diag);
void get_qubits_inout(const int chunk_qubits, const reg_t &qubits,
reg_t &qubits_in, reg_t &qubits_out) {
int_t i;
uint_t i;
qubits_in.clear();
qubits_out.clear();
for (i = 0; i < qubits.size(); i++) {
if (qubits[i] < chunk_qubits) { // in chunk
if (qubits[i] < (uint_t)chunk_qubits) { // in chunk
qubits_in.push_back(qubits[i]);
} else {
qubits_out.push_back(qubits[i]);
@ -40,7 +49,7 @@ void get_inout_ctrl_qubits(const Operations::Op &op, const uint_t num_qubits,
reg_t &qubits_in, reg_t &qubits_out) {
if (op.type == Operations::OpType::gate &&
(op.name[0] == 'c' || op.name.find("mc") == 0)) {
for (int i = 0; i < op.qubits.size(); i++) {
for (uint_t i = 0; i < op.qubits.size(); i++) {
if (op.qubits[i] < num_qubits)
qubits_in.push_back(op.qubits[i]);
else

View File

@ -114,7 +114,7 @@ protected:
uint_t distributed_group_; // group id of distribution
int_t distributed_proc_bits_; // distributed_procs_=2^distributed_proc_bits_
// (if nprocs != power of 2, set -1)
int num_process_per_experiment_ = 1;
uint_t num_process_per_experiment_ = 1;
#ifdef AER_MPI
// communicator group to simulate a circuit (for multi-experiments)
@ -215,6 +215,20 @@ protected:
void gather_creg_memory(std::vector<ClassicalRegister> &cregs,
reg_t &shot_index);
#endif
// Sample n-measurement outcomes without applying the measure operation
// to the system state
virtual std::vector<reg_t> sample_measure(const reg_t &qubits, uint_t shots,
RngEngine &rng) const {
std::vector<reg_t> ret;
return ret;
};
virtual std::vector<reg_t> sample_measure(state_t &state, const reg_t &qubits,
uint_t shots,
std::vector<RngEngine> &rng) const {
// this is for single rng, impement in sub-class for multi-shots case
return state.sample_measure(qubits, shots, rng[0]);
}
};
template <class state_t>
@ -437,7 +451,6 @@ void Executor<state_t>::set_parallelization(const Config &config,
distributed_group_ = myrank_ / distributed_procs_;
distributed_proc_bits_ = 0;
int proc_bits = 0;
uint_t p = distributed_procs_;
while (p > 1) {
if ((p & 1) != 0) { // procs is not power of 2
@ -518,11 +531,11 @@ void Executor<state_t>::set_parallelization(const Config &config,
// Parallel shots is > 1
// Limit parallel shots by available memory and number of shots
// And assign the remaining threads to state update
int circ_memory_mb =
required_memory_mb(config, circ, noise) / num_process_per_experiment_;
int circ_memory_mb = (int)(required_memory_mb(config, circ, noise) /
num_process_per_experiment_);
size_t mem_size =
(sim_device_ == Device::GPU) ? max_gpu_memory_mb_ : max_memory_mb_;
if (mem_size < circ_memory_mb)
if (mem_size < (size_t)circ_memory_mb)
throw std::runtime_error(
"a circuit requires more memory than max_memory_mb.");
// If circ memory is 0, set it to 1 so that we don't divide by zero
@ -561,7 +574,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
rng.set_seed(circ.seed);
// Output data container
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.set_config(config);
result.metadata.add(method_names_.at(method), "method");
@ -602,7 +615,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
// Ideal circuit
if (noise.is_ideal()) {
opt_circ = circ;
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.metadata.add("ideal", "noise");
}
@ -610,7 +623,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
// Readout error only
else if (noise.has_quantum_errors() == false) {
opt_circ = noise.sample_noise(circ, rng);
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.metadata.add("readout", "noise");
}
@ -621,7 +634,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
// Sample noise using SuperOp method
opt_circ =
noise.sample_noise(circ, rng, Noise::NoiseModel::Method::superop);
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.metadata.add("superop", "noise");
}
@ -631,7 +644,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
noise.opset().contains(Operations::OpType::superop)) {
opt_circ =
noise.sample_noise(circ, rng, Noise::NoiseModel::Method::kraus);
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.metadata.add("kraus", "noise");
}
@ -639,7 +652,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
// General circuit noise sampling
else {
noise_sampling = true;
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.metadata.add("circuit", "noise");
}
@ -658,7 +671,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
run_circuit_shots(opt_circ, noise, config, rng, result_it, false);
}
}
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
// Report success
result.status = ExperimentResult::Status::completed;
@ -692,7 +705,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
auto timer_stop = myclock_t::now(); // stop timer
double time_taken =
std::chrono::duration<double>(timer_stop - timer_start).count();
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.time_taken = time_taken;
// save time also to metadata to pick time in primitive result
@ -701,7 +714,7 @@ void Executor<state_t>::run_circuit(Circuit &circ,
}
// If an exception occurs during execution, catch it and pass it to the output
catch (std::exception &e) {
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.status = ExperimentResult::Status::error;
result.message = e.what();
@ -816,30 +829,26 @@ void Executor<state_t>::run_circuit_shots(
std::vector<ClassicalRegister> cregs;
reg_t shot_begin(distributed_procs_);
reg_t shot_end(distributed_procs_);
for (int_t i = 0; i < distributed_procs_; i++) {
for (uint_t i = 0; i < distributed_procs_; i++) {
shot_begin[i] = num_shots * i / distributed_procs_;
shot_end[i] = num_shots * (i + 1) / distributed_procs_;
}
uint_t num_local_shots =
shot_end[distributed_rank_] - shot_begin[distributed_rank_];
int max_matrix_qubits;
auto fusion_pass = transpile_fusion(circ.opset(), config);
int max_matrix_qubits = 1;
if (!sample_noise) {
Noise::NoiseModel dummy_noise;
state_t dummy_state;
auto fusion_pass = transpile_fusion(circ.opset(), config);
ExperimentResult fusion_result;
auto fusion_pass = transpile_fusion(circ.opset(), config);
fusion_pass.optimize_circuit(circ, dummy_noise, dummy_state.opset(),
fusion_result);
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
ExperimentResult &result = *(result_it + i);
result.metadata.copy(fusion_result.metadata);
}
max_matrix_qubits = get_max_matrix_qubits(circ);
} else {
max_matrix_qubits = get_max_matrix_qubits(circ);
max_matrix_qubits = std::max(max_matrix_qubits, (int)fusion_pass.max_qubit);
}
num_bind_params_ = circ.num_bind_params;
@ -857,9 +866,9 @@ void Executor<state_t>::run_circuit_shots(
init_rng, max_matrix_qubits,
num_local_shots](int_t i) {
state_t state;
uint_t i_shot, shot_end;
uint_t i_shot, e_shot;
i_shot = num_local_shots * i / par_shots;
shot_end = num_local_shots * (i + 1) / par_shots;
e_shot = num_local_shots * (i + 1) / par_shots;
auto fusion_pass = transpile_fusion(circ.opset(), config);
@ -871,7 +880,7 @@ void Executor<state_t>::run_circuit_shots(
state.set_distribution(this->num_process_per_experiment_);
state.set_num_global_qubits(circ.num_qubits);
for (; i_shot < shot_end; i_shot++) {
for (; i_shot < e_shot; i_shot++) {
RngEngine rng;
uint_t shot_index = shot_begin[distributed_rank_] + i_shot;
uint_t iparam = shot_index / circ.shots;
@ -892,7 +901,9 @@ void Executor<state_t>::run_circuit_shots(
circ_opt = noise.sample_noise(circ, rng);
fusion_pass.optimize_circuit(circ_opt, dummy_noise, state.opset(),
result);
state.set_max_matrix_qubits(get_max_matrix_qubits(circ_opt));
int max_bits = get_max_matrix_qubits(circ_opt);
state.set_max_matrix_qubits(
std::max(max_bits, (int)fusion_pass.max_qubit));
} else
state.set_max_matrix_qubits(max_matrix_qubits);
@ -947,11 +958,11 @@ void Executor<state_t>::run_circuit_shots(
num_shots = circ.shots * circ.num_bind_params;
auto save_cregs = [this, &par_results, par_shots, num_shots, circ,
cregs](int_t i) {
uint_t i_shot, shot_end;
uint_t i_shot, e_shot;
i_shot = num_shots * i / par_shots;
shot_end = num_shots * (i + 1) / par_shots;
e_shot = num_shots * (i + 1) / par_shots;
for (; i_shot < shot_end; i_shot++) {
for (; i_shot < e_shot; i_shot++) {
uint_t ip = i_shot / circ.shots;
par_results[i][ip].save_count_data(cregs[i_shot], save_creg_memory_);
}
@ -962,12 +973,12 @@ void Executor<state_t>::run_circuit_shots(
#endif
for (auto &res : par_results) {
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
(result_it + i)->combine(std::move(res[i]));
}
}
if (sim_device_ == Device::GPU) {
for (int_t i = 0; i < circ.num_bind_params; i++) {
for (uint_t i = 0; i < circ.num_bind_params; i++) {
#ifdef AER_CUSTATEVEC
(result_it + i)->metadata.add(cuStateVec_enable_, "cuStateVec_enable");
#endif
@ -1292,7 +1303,7 @@ int_t Executor<state_t>::get_matrix_bits(const Operations::Op &op) const {
template <class state_t>
int_t Executor<state_t>::get_max_matrix_qubits(const Circuit &circ) const {
int_t max_bits = 0;
int_t i;
uint_t i;
if (sim_device_ != Device::CPU) { // Only applicable for GPU (and Thrust)
for (i = 0; i < circ.ops.size(); i++) {
@ -1315,7 +1326,6 @@ bool Executor<state_t>::has_statevector_ops(const Circuit &circ) const {
template <class state_t>
void Executor<state_t>::gather_creg_memory(
std::vector<ClassicalRegister> &cregs, reg_t &shot_index) {
int_t i, j;
uint_t n64, i64, ibit, num_local_shots;
if (distributed_procs_ == 0)
@ -1337,9 +1347,9 @@ void Executor<state_t>::gather_creg_memory(
reg_t bin_memory(n64 * num_local_shots, 0);
// compress memory string to binary
#pragma omp parallel for private(i, j, i64, ibit)
for (i = 0; i < num_local_shots; i++) {
for (j = 0; j < size; j++) {
#pragma omp parallel for private(i64, ibit)
for (int_t i = 0; i < (int_t)num_local_shots; i++) {
for (int_t j = 0; j < size; j++) {
i64 = j >> 6;
ibit = j & 63;
if (cregs[shot_index[distributed_rank_] + i].creg_memory()[j] == '1') {
@ -1352,21 +1362,22 @@ void Executor<state_t>::gather_creg_memory(
std::vector<int> recv_counts(distributed_procs_);
std::vector<int> recv_offset(distributed_procs_);
for (i = 0; i < distributed_procs_ - 1; i++) {
for (uint_t i = 0; i < distributed_procs_ - 1; i++) {
recv_offset[i] = shot_index[i];
recv_counts[i] = shot_index[i + 1] - shot_index[i];
}
recv_offset[distributed_procs_ - 1] = shot_index[distributed_procs_ - 1];
recv_counts[i] = cregs.size() - shot_index[distributed_procs_ - 1];
recv_counts[distributed_procs_ - 1] =
cregs.size() - shot_index[distributed_procs_ - 1];
MPI_Allgatherv(&bin_memory[0], n64 * num_local_shots, MPI_UINT64_T, &recv[0],
&recv_counts[0], &recv_offset[0], MPI_UINT64_T,
distributed_comm_);
// store gathered memory
#pragma omp parallel for private(i, j, i64, ibit)
for (i = 0; i < cregs.size(); i++) {
for (j = 0; j < size; j++) {
#pragma omp parallel for private(i64, ibit)
for (int_t i = 0; i < (int_t)cregs.size(); i++) {
for (int_t j = 0; j < size; j++) {
i64 = j >> 6;
ibit = j & 63;
if (((recv[i * n64 + i64] >> ibit) & 1) == 1)

View File

@ -242,13 +242,13 @@ void DensityMatrix<data_t>::initialize_from_vector(list_t &&vec) {
template <typename data_t>
void DensityMatrix<data_t>::transpose() {
const size_t rows = BaseMatrix::num_rows();
const int_t rows = BaseMatrix::num_rows();
#pragma omp parallel for if (BaseVector::num_qubits_ > \
BaseVector::omp_threshold_ && \
BaseVector::omp_threads_ > 1) \
num_threads(BaseVector::omp_threads_)
for (int_t i = 0; i < rows; i++) {
for (int_t j = i + 1; j < rows; j++) {
for (uint_t j = i + 1; j < rows; j++) {
const uint_t pos_a = i * rows + j;
const uint_t pos_b = j * rows + i;
const auto tmp = BaseVector::data_[pos_a];
@ -483,7 +483,7 @@ DensityMatrix<data_t>::expval_pauli(const reg_t &qubits,
auto lambda = [&](const int_t i, double &val_re, double &val_im) -> void {
(void)val_im; // unused
auto idx_vec = ((i << 1) & mask_u) | (i & mask_l);
auto idx_mat = idx_vec ^ x_mask + nrows * idx_vec;
auto idx_mat = (idx_vec ^ x_mask) + nrows * idx_vec;
// Since rho is hermitian rho[i, j] + rho[j, i] = 2 real(rho[i, j])
auto val = 2 * std::real(phase * BaseVector::data_[idx_mat]);
if (z_mask && (AER::Utils::popcount(idx_vec & z_mask) & 1)) {
@ -511,7 +511,7 @@ double DensityMatrix<data_t>::expval_pauli_non_diagonal_chunk(
auto lambda = [&](const int_t i, double &val_re, double &val_im) -> void {
(void)val_im; // unused
auto idx_mat = i ^ x_mask + nrows * i;
auto idx_mat = (i ^ x_mask) + nrows * i;
auto val = std::real(phase * BaseVector::data_[idx_mat]);
if (z_mask && (AER::Utils::popcount(i & z_mask) & 1)) {
val = -val;

View File

@ -40,6 +40,7 @@ class Executor : public CircuitExecutor::ParallelStateExecutor<state_t>,
using Base = CircuitExecutor::MultiStateExecutor<state_t>;
using BasePar = CircuitExecutor::ParallelStateExecutor<state_t>;
using BaseBatch = CircuitExecutor::BatchShotsExecutor<state_t>;
using Base::sample_measure;
protected:
public:
@ -203,14 +204,14 @@ protected:
//-------------------------------------------------------------------------
template <class densmat_t>
void Executor<densmat_t>::initialize_qreg(uint_t num_qubits) {
for (int_t i = 0; i < Base::states_.size(); i++) {
for (uint_t i = 0; i < Base::states_.size(); i++) {
Base::states_[i].qreg().set_num_qubits(BasePar::chunk_bits_);
}
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
if (Base::global_state_index_ + iChunk == 0) {
Base::states_[iChunk].qreg().initialize();
@ -220,7 +221,7 @@ void Executor<densmat_t>::initialize_qreg(uint_t num_qubits) {
}
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++) {
for (uint_t i = 0; i < Base::states_.size(); i++) {
if (Base::global_state_index_ + i == 0) {
Base::states_[i].qreg().initialize();
} else {
@ -236,11 +237,10 @@ void Executor<densmat_t>::initialize_from_vector(const list_t &vec) {
if ((1ull << (Base::num_qubits_ * 2)) == vec.size()) {
BasePar::initialize_from_vector(vec);
} else if ((1ull << (Base::num_qubits_ * 2)) == vec.size() * vec.size()) {
int_t iChunk;
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
uint_t irow_chunk = ((iChunk + Base::global_state_index_) >>
((Base::num_qubits_ - BasePar::chunk_bits_)))
@ -251,7 +251,7 @@ void Executor<densmat_t>::initialize_from_vector(const list_t &vec) {
<< (BasePar::chunk_bits_);
// copy part of state for this chunk
uint_t i, row, col;
uint_t i;
list_t vec1(1ull << BasePar::chunk_bits_);
list_t vec2(1ull << BasePar::chunk_bits_);
@ -264,7 +264,7 @@ void Executor<densmat_t>::initialize_from_vector(const list_t &vec) {
}
}
} else {
for (iChunk = 0; iChunk < Base::states_.size(); iChunk++) {
for (uint_t iChunk = 0; iChunk < Base::states_.size(); iChunk++) {
uint_t irow_chunk = ((iChunk + Base::global_state_index_) >>
((Base::num_qubits_ - BasePar::chunk_bits_)))
<< (BasePar::chunk_bits_);
@ -274,7 +274,7 @@ void Executor<densmat_t>::initialize_from_vector(const list_t &vec) {
<< (BasePar::chunk_bits_);
// copy part of state for this chunk
uint_t i, row, col;
uint_t i;
list_t vec1(1ull << BasePar::chunk_bits_);
list_t vec2(1ull << BasePar::chunk_bits_);
@ -515,12 +515,12 @@ void Executor<densmat_t>::apply_save_amplitudes_sq(const Operations::Op &op,
throw std::invalid_argument(
"Invalid save_amplitudes_sq instructions (empty params).");
}
const int_t size = op.int_params.size();
const uint_t size = op.int_params.size();
rvector_t amps_sq(size);
int_t iChunk;
#pragma omp parallel for if (BasePar::chunk_omp_parallel_) private(iChunk)
for (iChunk = 0; iChunk < Base::states_.size(); iChunk++) {
for (iChunk = 0; iChunk < (int_t)Base::states_.size(); iChunk++) {
uint_t irow, icol;
irow = (Base::global_state_index_ + iChunk) >>
((Base::num_qubits_ - BasePar::chunk_bits_));
@ -529,7 +529,7 @@ void Executor<densmat_t>::apply_save_amplitudes_sq(const Operations::Op &op,
if (irow != icol)
continue;
for (int_t i = 0; i < size; ++i) {
for (uint_t i = 0; i < size; ++i) {
uint_t idx = BasePar::mapped_index(op.int_params[i]);
if (idx >= (irow << BasePar::chunk_bits_) &&
idx < ((irow + 1) << BasePar::chunk_bits_))
@ -691,7 +691,7 @@ cmatrix_t Executor<densmat_t>::reduced_density_matrix(const reg_t &qubits,
if (qubits.empty()) {
reduced_state = cmatrix_t(1, 1);
std::complex<double> sum = 0.0;
for (int_t i = 0; i < Base::states_.size(); i++) {
for (uint_t i = 0; i < Base::states_.size(); i++) {
sum += Base::states_[i].qreg().trace();
}
#ifdef AER_MPI
@ -719,7 +719,7 @@ template <class densmat_t>
cmatrix_t
Executor<densmat_t>::reduced_density_matrix_helper(const reg_t &qubits,
const reg_t &qubits_sorted) {
int_t iChunk;
uint_t iChunk;
uint_t size = 1ull << (BasePar::chunk_bits_ * 2);
uint_t mask = (1ull << (BasePar::chunk_bits_)) - 1;
uint_t num_threads = Base::states_[0].qreg().get_omp_threads();
@ -753,12 +753,12 @@ Executor<densmat_t>::reduced_density_matrix_helper(const reg_t &qubits,
BasePar::recv_data(tmp.data(), size, 0, iChunk);
#endif
#pragma omp parallel for if (num_threads > 1) num_threads(num_threads)
for (i = 0; i < size; i++) {
for (i = 0; i < (int_t)size; i++) {
uint_t irow = (i >> (BasePar::chunk_bits_)) + irow_chunk;
uint_t icol = (i & mask) + icol_chunk;
uint_t irow_out = 0;
uint_t icol_out = 0;
int j;
uint_t j;
for (j = 0; j < qubits.size(); j++) {
if ((irow >> qubits[j]) & 1) {
irow &= ~(1ull << qubits[j]);
@ -803,7 +803,7 @@ void Executor<densmat_t>::apply_save_density_matrix(
final_op);
std::vector<bool> copied(Base::num_bind_params_, false);
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -843,7 +843,7 @@ void Executor<densmat_t>::apply_save_state(CircuitExecutor::Branch &root,
std::vector<bool> copied(Base::num_bind_params_, false);
if (final_op) {
auto state = Base::states_[root.state_index()].move_to_matrix();
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -855,7 +855,7 @@ void Executor<densmat_t>::apply_save_state(CircuitExecutor::Branch &root,
} else {
auto state = Base::states_[root.state_index()].copy_to_matrix();
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -878,7 +878,7 @@ void Executor<densmat_t>::apply_save_probs(CircuitExecutor::Branch &root,
std::vector<bool> copied(Base::num_bind_params_, false);
if (op.type == Operations::OpType::save_probs_ket) {
// Convert to ket dict
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -890,7 +890,7 @@ void Executor<densmat_t>::apply_save_probs(CircuitExecutor::Branch &root,
}
}
} else {
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -918,7 +918,7 @@ void Executor<densmat_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
Base::states_[root.state_index()].qreg().probability(op.int_params[i]);
}
std::vector<bool> copied(Base::num_bind_params_, false);
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -951,7 +951,7 @@ template <class densmat_t>
rvector_t Executor<densmat_t>::measure_probs(const reg_t &qubits) const {
uint_t dim = 1ull << qubits.size();
rvector_t sum(dim, 0.0);
int_t i, j, k;
uint_t i, j, k;
reg_t qubits_in_chunk;
reg_t qubits_out_chunk;
@ -965,7 +965,7 @@ rvector_t Executor<densmat_t>::measure_probs(const reg_t &qubits) const {
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for private(i, j, k)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++) {
uint_t irow, icol;
@ -1084,14 +1084,14 @@ template <class densmat_t>
void Executor<densmat_t>::apply_reset(const reg_t &qubits) {
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
Base::states_[iChunk].qreg().apply_reset(qubits);
}
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
Base::states_[i].qreg().apply_reset(qubits);
}
}
@ -1120,13 +1120,13 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
mdiag[meas_state] = 1. / std::sqrt(meas_prob);
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t i = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++)
Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag);
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag);
}
@ -1135,13 +1135,13 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
if (qubits[0] < BasePar::chunk_bits_) {
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t i = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++)
Base::states_[i].qreg().apply_x(qubits[0]);
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
Base::states_[i].qreg().apply_x(qubits[0]);
}
} else {
@ -1158,13 +1158,13 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
mdiag[meas_state] = 1. / std::sqrt(meas_prob);
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t i = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++)
Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag);
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag);
}
@ -1183,7 +1183,7 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
reg_t qubits_in_chunk;
reg_t qubits_out_chunk;
for (int_t i = 0; i < qubits.size(); i++) {
for (uint_t i = 0; i < qubits.size(); i++) {
if (qubits[i] < BasePar::chunk_bits_) {
qubits_in_chunk.push_back(qubits[i]);
} else {
@ -1193,18 +1193,18 @@ void Executor<densmat_t>::measure_reset_update(const reg_t &qubits,
if (qubits_in_chunk.size() > 0) { // in chunk exchange
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t i = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++)
Base::states_[i].qreg().apply_unitary_matrix(qubits, perm);
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
Base::states_[i].qreg().apply_unitary_matrix(qubits, perm);
}
}
if (qubits_out_chunk.size() > 0) { // out of chunk exchange
for (int_t i = 0; i < qubits_out_chunk.size(); i++) {
for (uint_t i = 0; i < qubits_out_chunk.size(); i++) {
BasePar::apply_chunk_x(qubits_out_chunk[i]);
BasePar::apply_chunk_x(qubits_out_chunk[i] +
(Base::num_qubits_ - BasePar::chunk_bits_));
@ -1225,13 +1225,13 @@ std::vector<reg_t> Executor<densmat_t>::sample_measure(const reg_t &qubits,
rnds.push_back(rng.rand(0, 1));
reg_t allbit_samples(shots, 0);
int_t i, j;
uint_t i, j;
std::vector<double> chunkSum(Base::states_.size() + 1, 0);
double sum, localSum;
// calculate per chunk sum
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for private(i)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++) {
uint_t irow, icol;
@ -1348,7 +1348,7 @@ Executor<state_t>::sample_measure_with_prob(CircuitExecutor::Branch &root,
uint_t nshots = root.num_shots();
reg_t shot_branch(nshots);
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
shot_branch[i] = root.rng_shots()[i].rand_int(probs);
}
@ -1382,11 +1382,11 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
root.branches()[i]->add_op_after_branch(op);
if (final_state >= 0 && final_state != i) {
Operations::Op op;
op.type = OpType::gate;
op.name = "x";
op.qubits = qubits;
root.branches()[i]->add_op_after_branch(op);
Operations::Op op2;
op2.type = OpType::gate;
op2.name = "x";
op2.qubits = qubits;
root.branches()[i]->add_op_after_branch(op2);
}
}
}
@ -1394,7 +1394,7 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
else {
// Diagonal matrix for projecting and renormalizing to measurement outcome
const size_t dim = 1ULL << qubits.size();
for (int_t i = 0; i < dim; i++) {
for (uint_t i = 0; i < dim; i++) {
cvector_t mdiag(dim, 0.);
mdiag[i] = 1. / std::sqrt(meas_probs[i]);
@ -1404,20 +1404,20 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
op.params = mdiag;
root.branches()[i]->add_op_after_branch(op);
if (final_state >= 0 && final_state != i) {
if (final_state >= 0 && final_state != (int_t)i) {
// build vectorized permutation matrix
cvector_t perm(dim * dim, 0.);
perm[final_state * dim + i] = 1.;
perm[i * dim + final_state] = 1.;
for (size_t j = 0; j < dim; j++) {
if (j != final_state && j != i)
if ((int_t)j != final_state && j != i)
perm[j * dim + j] = 1.;
}
Operations::Op op;
op.type = OpType::matrix;
op.qubits = qubits;
op.mats.push_back(Utils::devectorize_matrix(perm));
root.branches()[i]->add_op_after_branch(op);
Operations::Op op2;
op2.type = OpType::matrix;
op2.qubits = qubits;
op2.mats.push_back(Utils::devectorize_matrix(perm));
root.branches()[i]->add_op_after_branch(op2);
}
}
}
@ -1430,41 +1430,23 @@ void Executor<state_t>::apply_measure(CircuitExecutor::Branch &root,
rvector_t probs = sample_measure_with_prob(root, qubits);
// save result to cregs
for (int_t i = 0; i < probs.size(); i++) {
for (uint_t i = 0; i < probs.size(); i++) {
const reg_t outcome = Utils::int2reg(i, 2, qubits.size());
root.branches()[i]->creg().store_measure(outcome, cmemory, cregister);
}
measure_reset_update(root, qubits, -1, probs);
}
/*
template <class state_t>
void Executor<state_t>::apply_reset(CircuitExecutor::Branch& root, const
reg_t &qubits)
{
rvector_t probs = sample_measure_with_prob(root, qubits);
measure_reset_update(root, qubits, 0, probs);
}
*/
template <class state_t>
std::vector<reg_t>
Executor<state_t>::sample_measure(state_t &state, const reg_t &qubits,
uint_t shots,
std::vector<RngEngine> &rng) const {
int_t i, j;
uint_t i;
std::vector<double> rnds;
rnds.reserve(shots);
/*
double norm = std::real( state.qreg().trace() );
std::cout << " trace = " << norm<<std::endl;
for (i = 0; i < shots; ++i)
rnds.push_back(rng[i].rand(0, norm));
*/
for (i = 0; i < shots; ++i)
rnds.push_back(rng[i].rand(0, 1));
@ -1496,15 +1478,15 @@ void Executor<densmat_t>::apply_kraus(const reg_t &qubits,
const std::vector<cmatrix_t> &kmats) {
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
Base::states_[iChunk].qreg().apply_superop_matrix(
qubits, Utils::vectorize_matrix(Utils::kraus_superop(kmats)));
}
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
Base::states_[i].qreg().apply_superop_matrix(
qubits, Utils::vectorize_matrix(Utils::kraus_superop(kmats)));
}
@ -1549,7 +1531,7 @@ template <class densmat_t>
void Executor<densmat_t>::apply_multi_chunk_swap(const reg_t &qubits) {
reg_t qubits_density;
for (int_t i = 0; i < qubits.size(); i += 2) {
for (uint_t i = 0; i < qubits.size(); i += 2) {
uint_t q0, q1;
q0 = qubits[i * 2];
q1 = qubits[i * 2 + 1];

View File

@ -362,7 +362,6 @@ void State<densmat_t>::initialize_qreg(uint_t num_qubits, densmat_t &&state) {
template <class densmat_t>
void State<densmat_t>::initialize_omp() {
uint_t i;
BaseState::qreg_.set_omp_threshold(omp_qubit_threshold_);
if (BaseState::threads_ > 0)
BaseState::qreg_.set_omp_threads(
@ -404,7 +403,6 @@ void State<densmat_t>::set_config(const Config &config) {
// Set threshold for truncating snapshots
json_chop_threshold_ = config.chop_threshold;
uint_t i;
BaseState::qreg_.set_json_chop_threshold(json_chop_threshold_);
// Set OMP threshold for state update functions
@ -650,7 +648,7 @@ void State<densmat_t>::apply_gate(const Operations::Op &op) {
}
if (qubits_out.size() > 0) {
uint_t mask = 0;
for (int i = 0; i < qubits_out.size(); i++) {
for (uint_t i = 0; i < qubits_out.size(); i++) {
mask |= (1ull << (qubits_out[i] - BaseState::qreg_.num_qubits()));
}
if ((BaseState::qreg_.chunk_index() & mask) != mask) {
@ -670,7 +668,7 @@ void State<densmat_t>::apply_gate(const Operations::Op &op) {
else if (ctrl_chunk)
apply_gate_statevector(new_op);
else {
for (int i = 0; i < new_op.qubits.size(); i++)
for (uint_t i = 0; i < new_op.qubits.size(); i++)
new_op.qubits[i] += BaseState::qreg_.num_qubits();
apply_gate_statevector(new_op);
}
@ -861,7 +859,7 @@ void State<densmat_t>::apply_diagonal_unitary_matrix(const reg_t &qubits,
if (qubits_in.size() == qubits.size()) {
BaseState::qreg_.apply_diagonal_unitary_matrix(qubits, diag);
} else {
for (int_t i = 0; i < qubits.size(); i++) {
for (uint_t i = 0; i < qubits.size(); i++) {
if (qubits[i] >= BaseState::qreg_.num_qubits())
qubits_row[i] = qubits[i] + BaseState::num_global_qubits_ -
BaseState::qreg_.num_qubits();
@ -871,7 +869,7 @@ void State<densmat_t>::apply_diagonal_unitary_matrix(const reg_t &qubits,
diag_row);
reg_t qubits_chunk(qubits_in.size() * 2);
for (int_t i = 0; i < qubits_in.size(); i++) {
for (uint_t i = 0; i < qubits_in.size(); i++) {
qubits_chunk[i] = qubits_in[i];
qubits_chunk[i + qubits_in.size()] =
qubits_in[i] + BaseState::qreg_.num_qubits();

View File

@ -38,6 +38,7 @@ public:
// Parent class aliases
using BaseVector = QubitVectorThrust<data_t>;
using BaseMatrix = UnitaryMatrixThrust<data_t>;
using BaseVector::probabilities;
//-----------------------------------------------------------------------
// Constructors and Destructor
@ -449,9 +450,9 @@ public:
template <typename data_t>
class DensityDiagMatMultNxN : public Chunk::GateFuncBase<data_t> {
protected:
int nqubits_;
int total_bits_;
int chunk_bits_;
uint_t nqubits_;
uint_t total_bits_;
uint_t chunk_bits_;
public:
DensityDiagMatMultNxN(const reg_t &qb, int total, int chunk) {
@ -541,7 +542,7 @@ public:
offset_ = 1ull << qubits[qubits.size() - 1];
offset_sp_ = 1ull << (qubits[qubits.size() - 1] + chunk_qubits_);
cmask_ = 0;
for (int i = 0; i < qubits.size() - 1; i++)
for (uint_t i = 0; i < qubits.size() - 1; i++)
cmask_ |= (1ull << qubits[i]);
enable_batch_ = batch;
}
@ -629,7 +630,7 @@ public:
offset_ = 1ull << qubits[qubits.size() - 1];
offset_sp_ = 1ull << (qubits[qubits.size() - 1] + chunk_qubits_);
cmask_ = 0;
for (int i = 0; i < qubits.size() - 1; i++)
for (uint_t i = 0; i < qubits.size() - 1; i++)
cmask_ |= (1ull << qubits[i]);
enable_batch_ = batch;
}
@ -1081,7 +1082,7 @@ public:
vec = this->data_;
idx_vec = ((i << 1) & mask_u_) | (i & mask_l_);
idx_mat = idx_vec ^ x_mask_ + rows_ * idx_vec;
idx_mat = (idx_vec ^ x_mask_) + rows_ * idx_vec;
q0 = vec[idx_mat];
q0 = 2 * phase_ * q0;
@ -1158,7 +1159,7 @@ public:
vec = this->data_;
idx_mat = i ^ x_mask_ + rows_ * i;
idx_mat = (i ^ x_mask_) + rows_ * i;
q0 = vec[idx_mat];
q0 = phase_ * q0;
@ -1353,7 +1354,7 @@ template <typename data_t>
void DensityMatrixThrust<data_t>::apply_batched_measure(
const reg_t &qubits, std::vector<RngEngine> &rng, const reg_t &cmemory,
const reg_t &cregs) {
const int_t DIM = 1 << qubits.size();
const uint_t DIM = 1 << qubits.size();
uint_t i, count = 1;
if (BaseVector::enable_batch_) {
if (BaseVector::chunk_.pos() != 0) {
@ -1503,7 +1504,7 @@ void DensityMatrixThrust<data_t>::apply_reset(const reg_t &qubits) {
auto qubits_sorted = qubits;
std::sort(qubits_sorted.begin(), qubits_sorted.end());
for (int_t i = 0; i < qubits.size(); i++) {
for (uint_t i = 0; i < qubits.size(); i++) {
qubits_sorted.push_back(qubits[i]);
}
BaseVector::chunk_.StoreUintParams(qubits_sorted);

View File

@ -426,8 +426,8 @@ scalar_t StabilizerState::ProposeFlip(unsigned flip_pos) {
scalar_t amp;
amp.e = 2 * Q.e;
amp.p = -1 *
(AER::Utils::popcount(v)); // each Hadamard gate contributes 1/sqrt(2)
// each Hadamard gate contributes 1/sqrt(2)
amp.p = -1 * (int)(AER::Utils::popcount(v));
bool isNonZero = true;
for (unsigned q = 0; q < n; q++) {

View File

@ -743,7 +743,7 @@ void State::apply_measure(const reg_t &qubits, const reg_t &cmemory,
const reg_t &cregister, RngEngine &rng) {
rvector_t rands;
rands.reserve(qubits.size());
for (int_t i = 0; i < qubits.size(); ++i)
for (uint_t i = 0; i < qubits.size(); ++i)
rands.push_back(rng.rand(0., 1.));
reg_t outcome = qreg_.apply_measure(qubits, rands);
creg().store_measure(outcome, cmemory, cregister);
@ -777,10 +777,10 @@ State::sample_measure_using_apply_measure(const reg_t &qubits, uint_t shots,
all_samples.resize(shots);
std::vector<rvector_t> rnds_list;
rnds_list.reserve(shots);
for (int_t i = 0; i < shots; ++i) {
for (uint_t i = 0; i < shots; ++i) {
rvector_t rands;
rands.reserve(qubits.size());
for (int_t j = 0; j < qubits.size(); ++j)
for (uint_t j = 0; j < qubits.size(); ++j)
rands.push_back(rng.rand(0., 1.));
rnds_list.push_back(rands);
}

View File

@ -53,7 +53,7 @@ void MPSSizeEstimator::initialize(uint_t nq) {
qubit_map_.resize(nq);
qubit_order_.resize(nq);
for (int_t i = 0; i < nq; i++) {
for (uint_t i = 0; i < nq; i++) {
tensor_size_[i].first = 1;
tensor_size_[i].second = 1;
@ -66,7 +66,7 @@ void MPSSizeEstimator::initialize(uint_t nq) {
uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops) {
uint_t n = ops.size();
for (int_t i = 0; i < n; i++) {
for (uint_t i = 0; i < n; i++) {
switch (ops[i].type) {
case Operations::OpType::gate:
case Operations::OpType::matrix:
@ -79,7 +79,7 @@ uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops) {
}
}
uint_t max_bond = 0;
for (int_t i = 0; i < num_qubits_ - 1; i++) {
for (uint_t i = 0; i < num_qubits_ - 1; i++) {
if (max_bond < bond_dimensions_[i])
max_bond = bond_dimensions_[i];
}
@ -89,16 +89,16 @@ uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops) {
void MPSSizeEstimator::apply_qubits(const reg_t &qubits) {
reg_t sorted(qubits.size());
for (int_t i = 0; i < qubits.size(); i++) {
for (uint_t i = 0; i < qubits.size(); i++) {
sorted[i] = qubit_map_[qubits[i]];
}
std::sort(sorted.begin(), sorted.end());
for (int_t i = 1; i < qubits.size(); i++) {
for (uint_t i = 1; i < qubits.size(); i++) {
reorder_qubit(sorted[i - 1], sorted[i]);
}
for (int_t i = 0; i < qubits.size() - 1; i++) {
for (uint_t i = 0; i < qubits.size() - 1; i++) {
update(sorted[i]);
}
}

View File

@ -139,14 +139,6 @@ protected:
void measure_sampler(InputIterator first_meas, InputIterator last_meas,
Branch &branch, ResultItr result_it);
// sampling measure
virtual std::vector<reg_t> sample_measure(state_t &state, const reg_t &qubits,
uint_t shots,
std::vector<RngEngine> &rng) const {
// this is for single rng, impement in sub-class for multi-shots case
return state.sample_measure(qubits, shots, rng[0]);
}
void apply_save_expval(Branch &root, const Operations::Op &op,
ResultItr result);
};
@ -192,7 +184,7 @@ void MultiStateExecutor<state_t>::set_distribution(uint_t num_states) {
state_index_begin_.resize(Base::distributed_procs_);
state_index_end_.resize(Base::distributed_procs_);
for (int_t i = 0; i < Base::distributed_procs_; i++) {
for (uint_t i = 0; i < Base::distributed_procs_; i++) {
state_index_begin_[i] = num_global_states_ * i / Base::distributed_procs_;
state_index_end_[i] =
num_global_states_ * (i + 1) / Base::distributed_procs_;
@ -212,7 +204,7 @@ void MultiStateExecutor<state_t>::set_parallelization(
template <class state_t>
bool MultiStateExecutor<state_t>::allocate_states(uint_t num_shots,
const Config &config) {
int_t i;
uint_t i;
bool ret = true;
states_.resize(num_shots);
@ -281,12 +273,11 @@ void MultiStateExecutor<state_t>::run_circuit_shots(
Noise::NoiseModel dummy_noise;
state_t dummy_state;
RngEngine dummy_rng;
dummy_rng.set_seed(circ.seed); // this is not used actually
Circuit circ_opt;
if (sample_noise) {
RngEngine dummy_rng;
dummy_rng.set_seed(circ.seed);
circ_opt = noise.sample_noise(circ, dummy_rng,
Noise::NoiseModel::Method::circuit, true);
auto fusion_pass = Base::transpile_fusion(circ_opt.opset(), config);
@ -385,12 +376,12 @@ void MultiStateExecutor<state_t>::run_circuit_shots(
#endif
for (auto &res : par_results) {
for (int_t i = 0; i < Base::num_bind_params_; i++) {
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
(result_it + i)->combine(std::move(res[i]));
}
}
for (int_t i = 0; i < Base::num_bind_params_; i++) {
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
(result_it + i)->metadata.add(true, "shot_branching_enabled");
(result_it + i)
->metadata.add(sample_noise, "runtime_noise_sampling_enabled");
@ -413,7 +404,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
bool can_sample = false;
OpItr measure_seq = last;
OpItr it = last - 1;
int_t num_measure = 0;
uint_t num_measure = 0;
if (shot_branching_sampling_enable_) {
do {
@ -445,14 +436,14 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
if (Base::num_bind_params_ > 1) {
if (par_shots > 1) {
#pragma omp parallel for num_threads(par_shots)
for (int_t i = 0; i < nshots; i++) {
for (int_t i = 0; i < (int_t)nshots; i++) {
uint_t gid = global_state_index_ + ishot + i;
uint_t ip = gid / Base::num_shots_per_bind_param_;
shots_storage[i].set_seed(circ.seed_for_params[ip] +
(gid % Base::num_shots_per_bind_param_));
}
} else {
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
uint_t gid = global_state_index_ + ishot + i;
uint_t ip = gid / Base::num_shots_per_bind_param_;
shots_storage[i].set_seed(circ.seed_for_params[ip] +
@ -466,10 +457,10 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
shots_storage[0].set_seed(circ.seed + global_state_index_ + ishot);
if (par_shots > 1) {
#pragma omp parallel for num_threads(par_shots)
for (int_t i = 1; i < nshots; i++)
for (int_t i = 1; i < (int_t)nshots; i++)
shots_storage[i].set_seed(circ.seed + global_state_index_ + ishot + i);
} else {
for (int_t i = 1; i < nshots; i++)
for (uint_t i = 1; i < nshots; i++)
shots_storage[i].set_seed(circ.seed + global_state_index_ + ishot + i);
}
}
@ -498,7 +489,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
uint_t num_active_states = 1;
// set branches
for (int_t i = 0; i < waiting_branches.size(); i++) {
for (uint_t i = 0; i < waiting_branches.size(); i++) {
if (i > num_states)
break;
uint_t sid = top_state + i;
@ -547,9 +538,9 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
if (branches[istate]->num_branches() > 0) {
// if there are additional ops remaining, queue them on new
// branches
for (int_t k = iadd + 1;
for (uint_t k = iadd + 1;
k < branches[istate]->additional_ops().size(); k++) {
for (int_t l = 0; l < branches[istate]->num_branches();
for (uint_t l = 0; l < branches[istate]->num_branches();
l++)
branches[istate]->branches()[l]->add_op_after_branch(
branches[istate]->additional_ops()[k]);
@ -631,10 +622,10 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
// repeat until new branch is available
if (nbranch > 0) {
uint_t num_states_prev = branches.size();
for (int_t i = 0; i < num_states_prev; i++) {
for (uint_t i = 0; i < num_states_prev; i++) {
// add new branches
if (branches[i]->num_branches() > 0) {
for (int_t j = 0; j < branches[i]->num_branches(); j++) {
for (uint_t j = 0; j < branches[i]->num_branches(); j++) {
if (branches[i]->branches()[j]->num_shots() > 0) {
// add new branched state
uint_t pos = branches.size();
@ -680,7 +671,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
// check if there are remaining ops
num_active_states = 0;
for (int_t i = 0; i < branches.size(); i++) {
for (uint_t i = 0; i < branches.size(); i++) {
if (branches[i]->op_iterator() != measure_seq ||
branches[i]->additional_ops().size() > 0)
num_active_states++;
@ -707,7 +698,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
Utils::apply_omp_parallel_for(can_parallel, 0, par_shots,
sampling_measure_func, par_shots);
for (int_t i = 0; i < Base::num_bind_params_; i++)
for (uint_t i = 0; i < Base::num_bind_params_; i++)
(result_it + i)->metadata.add(true, "shot_branching_sampling_enabled");
} else {
// save cregs to result
@ -718,7 +709,7 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
for (; istate < state_end; istate++) {
if (Base::num_process_per_experiment_ > 1) {
for (int_t j = 0; j < branches[istate]->num_shots(); j++) {
for (uint_t j = 0; j < branches[istate]->num_shots(); j++) {
uint_t idx = branches[istate]->rng_shots()[j].initial_seed();
uint_t ip = branches[istate]->param_index(j);
idx += ip * Base::num_shots_per_bind_param_;
@ -728,13 +719,13 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
} else {
std::string memory_hex =
states_[branches[istate]->state_index()].creg().memory_hex();
for (int_t j = 0; j < branches[istate]->num_shots(); j++) {
for (uint_t j = 0; j < branches[istate]->num_shots(); j++) {
uint_t ip = branches[istate]->param_index(j);
par_results[i][ip].data.add_accum(static_cast<uint_t>(1ULL),
"counts", memory_hex);
}
if (Base::save_creg_memory_) {
for (int_t j = 0; j < branches[istate]->num_shots(); j++) {
for (uint_t j = 0; j < branches[istate]->num_shots(); j++) {
uint_t ip = branches[istate]->param_index(j);
par_results[i][ip].data.add_list(memory_hex, "memory");
}
@ -748,14 +739,14 @@ void MultiStateExecutor<state_t>::run_circuit_with_shot_branching(
}
// clear
for (int_t i = 0; i < branches.size(); i++) {
for (uint_t i = 0; i < branches.size(); i++) {
branches[i].reset();
}
branches.clear();
}
for (auto &res : par_results) {
for (int_t i = 0; i < Base::num_bind_params_; i++) {
for (uint_t i = 0; i < Base::num_bind_params_; i++) {
(result_it + i)->combine(std::move(res[i]));
}
}
@ -777,7 +768,7 @@ void MultiStateExecutor<state_t>::apply_runtime_parameterization(
root.branch_shots_by_params();
// add binded op after branch
for (int_t i = 0; i < nparams; i++) {
for (uint_t i = 0; i < nparams; i++) {
uint_t ip = root.branches()[i]->param_index(0);
Operations::Op bind_op =
Operations::bind_parameter(op, ip, Base::num_bind_params_);
@ -799,7 +790,7 @@ void MultiStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
// Check if meas_circ is empty, and if so return initial creg
if (first_meas == last_meas) {
if (Base::num_process_per_experiment_ > 1) {
for (int_t i = 0; i < shots; i++) {
for (uint_t i = 0; i < shots; i++) {
uint_t idx = branch.rng_shots()[i].initial_seed();
uint_t ip = branch.param_index(i);
idx += ip * Base::num_shots_per_bind_param_;
@ -807,7 +798,7 @@ void MultiStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
cregs_[idx] = state.creg();
}
} else {
for (int_t i = 0; i < shots; i++) {
for (uint_t i = 0; i < shots; i++) {
uint_t ip = branch.param_index(i);
(result + ip)->save_count_data(state.creg(), Base::save_creg_memory_);
}
@ -837,7 +828,7 @@ void MultiStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
// Generate the samples
std::vector<reg_t> all_samples;
all_samples = sample_measure(state, meas_qubits, shots, rng);
all_samples = this->sample_measure(state, meas_qubits, shots, rng);
// Make qubit map of position in vector of measured qubits
std::unordered_map<uint_t, uint_t> qubit_map;
@ -859,10 +850,6 @@ void MultiStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
}
// Process samples
uint_t num_memory =
(memory_map.empty()) ? 0ULL : 1 + memory_map.rbegin()->first;
uint_t num_registers =
(register_map.empty()) ? 0ULL : 1 + register_map.rbegin()->first;
for (int_t i = all_samples.size() - 1; i >= 0; i--) {
ClassicalRegister creg = state.creg();
@ -929,7 +916,7 @@ void MultiStateExecutor<state_t>::apply_save_expval(Branch &root,
std::vector<double> expval_var(2);
expval_var[0] = expval; // mean
expval_var[1] = sq_expval - expval * expval; // variance
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -940,7 +927,7 @@ void MultiStateExecutor<state_t>::apply_save_expval(Branch &root,
}
}
} else {
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)

View File

@ -136,14 +136,6 @@ protected:
// Apply a save expectation value instruction
void apply_save_expval(const Operations::Op &op, ExperimentResult &result);
// Sample n-measurement outcomes without applying the measure operation
// to the system state
virtual std::vector<reg_t> sample_measure(const reg_t &qubits, uint_t shots,
RngEngine &rng) const {
std::vector<reg_t> ret;
return ret;
};
// swap between chunks
virtual void apply_chunk_swap(const reg_t &qubits);
@ -270,7 +262,7 @@ ParallelStateExecutor<state_t>::transpile_cache_blocking(
template <class state_t>
bool ParallelStateExecutor<state_t>::allocate(uint_t num_qubits,
const Config &config) {
int_t i;
uint_t i;
Base::num_qubits_ = num_qubits;
chunk_bits_ = cache_block_qubit_;
@ -312,9 +304,9 @@ bool ParallelStateExecutor<state_t>::allocate(uint_t num_qubits,
template <class state_t>
bool ParallelStateExecutor<state_t>::allocate_states(uint_t num_states,
const Config &config) {
int_t i;
uint_t i;
bool init_states = true;
uint_t num_states_allocated;
uint_t num_states_allocated = num_states;
// deallocate qregs before reallocation
if (Base::states_.size() > 0) {
if (Base::states_.size() == num_states)
@ -532,7 +524,7 @@ void ParallelStateExecutor<state_t>::run_circuit_shots(
result.metadata.copy(fusion_result.metadata);
}
for (int_t ishot = 0; ishot < circ.shots; ishot++) {
for (uint_t ishot = 0; ishot < circ.shots; ishot++) {
RngEngine rng;
if (iparam == 0 && ishot == 0)
rng = init_rng;
@ -616,7 +608,7 @@ void ParallelStateExecutor<state_t>::measure_sampler(InputIterator first_meas,
// Generate the samples
auto timer_start = myclock_t::now();
auto all_samples = sample_measure(meas_qubits, shots, rng);
auto all_samples = this->sample_measure(meas_qubits, shots, rng);
auto time_taken =
std::chrono::duration<double>(myclock_t::now() - timer_start).count();
result.metadata.add(time_taken, "sample_measure_time");
@ -792,11 +784,11 @@ void ParallelStateExecutor<state_t>::apply_ops_chunks(
uint_t iOpBegin = iOp + 1;
if (Base::num_groups_ > 1 && chunk_omp_parallel_) {
#pragma omp parallel for num_threads(Base::num_groups_)
for (int_t ig = 0; ig < Base::num_groups_; ig++)
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++)
apply_cache_blocking_ops(ig, first + iOpBegin, first + iOpEnd, result,
rng, iparam);
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++)
for (uint_t ig = 0; ig < Base::num_groups_; ig++)
apply_cache_blocking_ops(ig, first + iOpBegin, first + iOpEnd, result,
rng, iparam);
}
@ -810,11 +802,11 @@ void ParallelStateExecutor<state_t>::apply_ops_chunks(
final_ops && nOp == iOp + 1)) {
if (Base::num_groups_ > 1 && chunk_omp_parallel_) {
#pragma omp parallel for num_threads(Base::num_groups_)
for (int_t ig = 0; ig < Base::num_groups_; ig++)
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++)
apply_cache_blocking_ops(ig, bind_op.cbegin(), bind_op.cend(),
result, rng, iparam);
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++)
for (uint_t ig = 0; ig < Base::num_groups_; ig++)
apply_cache_blocking_ops(ig, bind_op.cbegin(), bind_op.cend(),
result, rng, iparam);
}
@ -824,11 +816,11 @@ void ParallelStateExecutor<state_t>::apply_ops_chunks(
final_ops && nOp == iOp + 1)) {
if (Base::num_groups_ > 1 && chunk_omp_parallel_) {
#pragma omp parallel for num_threads(Base::num_groups_)
for (int_t ig = 0; ig < Base::num_groups_; ig++)
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++)
apply_cache_blocking_ops(ig, first + iOp, first + iOp + 1, result,
rng, iparam);
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++)
for (uint_t ig = 0; ig < Base::num_groups_; ig++)
apply_cache_blocking_ops(ig, first + iOp, first + iOp + 1, result,
rng, iparam);
}
@ -843,10 +835,10 @@ void ParallelStateExecutor<state_t>::apply_ops_chunks(
if (Base::num_groups_ > 1 && chunk_omp_parallel_) {
#pragma omp parallel for num_threads(Base::num_groups_)
for (int_t ig = 0; ig < Base::num_groups_; ig++)
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++)
Base::states_[Base::top_state_of_group_[ig]].qreg().synchronize();
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++)
for (uint_t ig = 0; ig < Base::num_groups_; ig++)
Base::states_[Base::top_state_of_group_[ig]].qreg().synchronize();
}
@ -881,7 +873,7 @@ void ParallelStateExecutor<state_t>::apply_cache_blocking_ops(
const int_t iGroup, InputIterator first, InputIterator last,
ExperimentResult &result, RngEngine &rng, uint_t iparam) {
// for each chunk in group
for (int_t iChunk = Base::top_state_of_group_[iGroup];
for (uint_t iChunk = Base::top_state_of_group_[iGroup];
iChunk < Base::top_state_of_group_[iGroup + 1]; iChunk++) {
// fecth chunk in cache
if (Base::states_[iChunk].qreg().fetch_chunk()) {
@ -901,15 +893,15 @@ void ParallelStateExecutor<state_t>::apply_cache_blocking_ops(
template <class state_t>
template <typename list_t>
void ParallelStateExecutor<state_t>::initialize_from_vector(const list_t &vec) {
int_t iChunk;
uint_t iChunk;
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for private(iChunk)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
list_t tmp(1ull << (chunk_bits_ * qubit_scale()));
for (int_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
for (uint_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
tmp[i] = vec[((Base::global_state_index_ + iChunk)
<< (chunk_bits_ * qubit_scale())) +
i];
@ -920,7 +912,7 @@ void ParallelStateExecutor<state_t>::initialize_from_vector(const list_t &vec) {
} else {
for (iChunk = 0; iChunk < Base::num_local_states_; iChunk++) {
list_t tmp(1ull << (chunk_bits_ * qubit_scale()));
for (int_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
for (uint_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
tmp[i] = vec[((Base::global_state_index_ + iChunk)
<< (chunk_bits_ * qubit_scale())) +
i];
@ -933,10 +925,10 @@ void ParallelStateExecutor<state_t>::initialize_from_vector(const list_t &vec) {
template <class state_t>
template <typename list_t>
void ParallelStateExecutor<state_t>::initialize_from_matrix(const list_t &mat) {
int_t iChunk;
uint_t iChunk;
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for private(iChunk)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
list_t tmp(1ull << (chunk_bits_), 1ull << (chunk_bits_));
@ -949,7 +941,7 @@ void ParallelStateExecutor<state_t>::initialize_from_matrix(const list_t &mat) {
<< (chunk_bits_);
// copy part of state for this chunk
uint_t i, row, col;
uint_t i;
for (i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
uint_t icol = i & ((1ull << chunk_bits_) - 1);
uint_t irow = i >> chunk_bits_;
@ -970,7 +962,7 @@ void ParallelStateExecutor<state_t>::initialize_from_matrix(const list_t &mat) {
<< (chunk_bits_);
// copy part of state for this chunk
uint_t i, row, col;
uint_t i;
for (i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) {
uint_t icol = i & ((1ull << chunk_bits_) - 1);
uint_t irow = i >> chunk_bits_;
@ -985,7 +977,7 @@ void ParallelStateExecutor<state_t>::initialize_from_matrix(const list_t &mat) {
template <class state_t>
auto ParallelStateExecutor<state_t>::apply_to_matrix(bool copy) {
// this function is used to collect states over chunks
int_t iChunk;
uint_t iChunk;
uint_t size = 1ull << (chunk_bits_ * qubit_scale());
uint_t mask = (1ull << (chunk_bits_)) - 1;
uint_t num_threads = Base::states_[0].qreg().get_omp_threads();
@ -1024,7 +1016,7 @@ auto ParallelStateExecutor<state_t>::apply_to_matrix(bool copy) {
recv_data(tmp.data(), size, 0, iChunk);
#endif
#pragma omp parallel for if (num_threads > 1) num_threads(num_threads)
for (i = 0; i < size; i++) {
for (i = 0; i < (int_t)size; i++) {
uint_t irow = i >> (chunk_bits_);
uint_t icol = i & mask;
uint_t idx =
@ -1093,9 +1085,7 @@ void ParallelStateExecutor<state_t>::apply_save_expval(
template <class state_t>
void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
uint_t nLarge = 1;
uint_t q0, q1;
int_t iChunk;
q0 = qubits[qubits.size() - 2];
q1 = qubits[qubits.size() - 1];
@ -1112,14 +1102,14 @@ void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
// inside chunk
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for num_threads(Base::num_groups_)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
Base::states_[iChunk].qreg().apply_mcswap(qubits);
}
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
Base::states_[iChunk].qreg().apply_mcswap(qubits);
}
@ -1139,7 +1129,7 @@ void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
// processes
// is needed
auto apply_chunk_swap_1qubit = [this, mask1, qubits](int_t iGroup) {
for (int_t ic = Base::top_state_of_group_[iGroup];
for (uint_t ic = Base::top_state_of_group_[iGroup];
ic < Base::top_state_of_group_[iGroup + 1]; ic++) {
uint_t baseChunk;
baseChunk = ic & (~mask1);
@ -1150,7 +1140,7 @@ void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
};
auto apply_chunk_swap_2qubits = [this, mask0, mask1,
qubits](int_t iGroup) {
for (int_t ic = Base::top_state_of_group_[iGroup];
for (uint_t ic = Base::top_state_of_group_[iGroup];
ic < Base::top_state_of_group_[iGroup + 1]; ic++) {
uint_t baseChunk;
baseChunk = ic & (~(mask0 | mask1));
@ -1172,7 +1162,8 @@ void ParallelStateExecutor<state_t>::apply_chunk_swap(const reg_t &qubits) {
}
#ifdef AER_MPI
else {
int_t iPair;
uint_t nLarge = 1;
uint_t iPair;
uint_t nPair;
uint_t baseChunk, iChunk1, iChunk2;
@ -1343,14 +1334,14 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
// swap inside chunks to prepare for all-to-all shuffle
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps);
}
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps);
}
@ -1366,7 +1357,7 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
for (uint_t i = 0; i < nchunk; i++) {
chunk_offset[i] = 0;
for (uint_t k = 0; k < nswap; k++) {
for (int_t k = 0; k < nswap; k++) {
if (((i >> k) & 1) != 0)
chunk_offset[i] += (1ull << chunk_shuffle_qubits[k]);
}
@ -1381,7 +1372,7 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
uint_t i1, i2, k, ii, t;
baseChunk = 0;
ii = iPair;
for (k = 0; k < nswap; k++) {
for (k = 0; k < (uint_t)nswap; k++) {
t = ii & ((1ull << chunk_shuffle_qubits_sorted[k]) - 1);
baseChunk += t;
ii = (ii - t) << 1;
@ -1395,7 +1386,6 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
// all-to-all
// send data
for (uint_t iswap = 1; iswap < nchunk; iswap++) {
uint_t sizeRecv, sizeSend;
uint_t num_local_swap = 0;
for (i1 = 0; i1 < nchunk; i1++) {
i2 = i1 ^ iswap;
@ -1412,6 +1402,7 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
continue; // swap while data is exchanged between processes
}
#ifdef AER_MPI
uint_t sizeRecv, sizeSend;
uint_t offset1 = i1 << (chunk_bits_ * qubit_scale() - nswap);
uint_t offset2 = i2 << (chunk_bits_ * qubit_scale() - nswap);
uint_t iChunk1 =
@ -1419,7 +1410,7 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
uint_t iChunk2 =
baseChunk + chunk_offset[i2] - Base::global_state_index_;
int_t tid = (iPair << nswap) + iswap;
uint_t tid = (iPair << nswap) + iswap;
if (iProc1 == Base::distributed_rank_) {
auto pRecv = Base::states_[iChunk1].qreg().recv_buffer(sizeRecv);
@ -1499,14 +1490,14 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
// restore qubits order
if (chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps);
}
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps);
}
@ -1515,13 +1506,10 @@ void ParallelStateExecutor<state_t>::apply_multi_chunk_swap(
template <class state_t>
void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
int_t iChunk;
uint_t nLarge = 1;
if (qubit < chunk_bits_ * qubit_scale()) {
auto apply_mcx = [this, qubit](int_t ig) {
reg_t qubits(1, qubit);
for (int_t iChunk = Base::top_state_of_group_[ig];
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
Base::states_[iChunk].qreg().apply_mcx(qubits);
};
@ -1529,9 +1517,7 @@ void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
(chunk_omp_parallel_ && Base::num_groups_ > 1), 0, Base::num_groups_,
apply_mcx);
} else { // exchange over chunks
int_t iPair;
uint_t nPair, mask;
uint_t baseChunk, iChunk1, iChunk2;
reg_t qubits(2);
qubits[0] = qubit;
qubits[1] = qubit;
@ -1547,7 +1533,7 @@ void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
nPair = Base::num_local_states_ >> 1;
auto apply_chunk_swap = [this, mask, qubits](int_t iGroup) {
for (int_t ic = Base::top_state_of_group_[iGroup];
for (uint_t ic = Base::top_state_of_group_[iGroup];
ic < Base::top_state_of_group_[iGroup + 1]; ic++) {
uint_t pairChunk;
pairChunk = ic ^ mask;
@ -1562,6 +1548,9 @@ void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
}
#ifdef AER_MPI
else {
uint_t iPair;
uint_t baseChunk, iChunk1, iChunk2;
// chunk scheduler that supports any number of processes
uint_t nu[3];
uint_t ub[3];
@ -1570,7 +1559,6 @@ void ParallelStateExecutor<state_t>::apply_chunk_x(const uint_t qubit) {
uint_t iLocalChunk, iRemoteChunk, iProc;
int i;
nLarge = 1;
nu[0] = 1ull << (qubit - chunk_bits_ * qubit_scale());
ub[0] = 0;
iu[0] = 0;
@ -1864,8 +1852,8 @@ void ParallelStateExecutor<state_t>::gather_state(
AER::Vector<std::complex<data_t>> &state) {
#ifdef AER_MPI
if (Base::distributed_procs_ > 1) {
uint_t size, local_size, global_size, offset;
int i;
uint_t global_size;
uint_t i;
std::vector<int> recv_counts(Base::distributed_procs_);
std::vector<int> recv_offset(Base::distributed_procs_);

View File

@ -83,7 +83,7 @@ public:
void set_shots(std::vector<RngEngine> &shots) { shots_ = shots; }
void initialize_shots(const uint_t nshots, const uint_t seed) {
shots_.resize(nshots);
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
shots_[i].set_seed(seed + i);
}
}
@ -151,7 +151,7 @@ public:
if (param_index_.size() == 1) {
return param_index_[0];
}
for (int_t i = 0; i < param_index_.size(); i++) {
for (uint_t i = 0; i < param_index_.size(); i++) {
if (param_shots_[i] > ishot) {
return param_index_[i];
}
@ -174,13 +174,13 @@ void Branch::branch_shots(reg_t &shots, int_t nbranch) {
if (param_index_.size() > 1) {
branches_[i]->param_index_ = param_index_;
branches_[i]->param_shots_.resize(param_index_.size());
for (int_t j = 0; j < param_index_.size(); j++)
for (uint_t j = 0; j < param_index_.size(); j++)
branches_[i]->param_shots_[j] = 0;
}
}
uint_t pos = 0;
for (int_t i = 0; i < shots.size(); i++) {
for (uint_t i = 0; i < shots.size(); i++) {
branches_[shots[i]]->shots_.push_back(shots_[i]);
if (param_index_.size() > 1) {
@ -193,19 +193,19 @@ void Branch::branch_shots(reg_t &shots, int_t nbranch) {
// set parameter indices
if (param_index_.size() > 1) {
for (int_t i = 0; i < nbranch; i++) {
uint_t pos = 0;
while (pos < branches_[i]->param_index_.size()) {
if (branches_[i]->param_shots_[pos] == 0) {
uint_t ppos = 0;
while (ppos < branches_[i]->param_index_.size()) {
if (branches_[i]->param_shots_[ppos] == 0) {
branches_[i]->param_index_.erase(branches_[i]->param_index_.begin() +
pos);
ppos);
branches_[i]->param_shots_.erase(branches_[i]->param_index_.begin() +
pos);
ppos);
} else {
if (pos > 0) {
branches_[i]->param_shots_[pos] +=
branches_[i]->param_shots_[pos - 1];
if (ppos > 0) {
branches_[i]->param_shots_[ppos] +=
branches_[i]->param_shots_[ppos - 1];
}
pos++;
ppos++;
}
}
}
@ -218,27 +218,27 @@ void Branch::branch_shots(reg_t &shots, int_t nbranch) {
void Branch::branch_shots_by_params(void) {
branches_.resize(param_index_.size());
for (int_t i = 0; i < param_index_.size(); i++) {
for (uint_t i = 0; i < param_index_.size(); i++) {
branches_[i] = std::make_shared<Branch>();
branches_[i]->creg_ = creg_;
branches_[i]->iter_ = iter_;
branches_[i]->flow_marks_ = flow_marks_;
}
uint_t pos = 0;
for (int_t i = 0; i < shots_.size(); i++) {
for (uint_t i = 0; i < shots_.size(); i++) {
if (i >= param_shots_[pos])
pos++;
branches_[pos]->shots_.push_back(shots_[i]);
}
for (int_t i = 0; i < param_index_.size(); i++) {
for (uint_t i = 0; i < param_index_.size(); i++) {
branches_[i]->set_param_index(param_index_[i], 0);
}
}
void Branch::advance_iterator(void) {
iter_++;
for (int_t i = 0; i < branches_.size(); i++) {
for (uint_t i = 0; i < branches_.size(); i++) {
branches_[i]->iter_++;
}
}
@ -253,24 +253,24 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
reg_t shot_map(nshots);
std::vector<std::vector<Operations::Op>> noises;
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
std::vector<Operations::Op> noise_ops =
noise.sample_noise_loc(op, shots_[i]);
// search same noise ops
int_t pos = -1;
for (int_t j = 0; j < noises.size(); j++) {
for (uint_t j = 0; j < noises.size(); j++) {
if (noise_ops.size() != noises[j].size())
continue;
bool same = true;
for (int_t k = 0; k < noise_ops.size(); k++) {
for (uint_t k = 0; k < noise_ops.size(); k++) {
if (noise_ops[k].type != noises[j][k].type ||
noise_ops[k].name != noises[j][k].name)
same = false;
else if (noise_ops[k].qubits.size() != noises[j][k].qubits.size())
same = false;
else {
for (int_t l = 0; l < noise_ops[k].qubits.size(); l++) {
for (uint_t l = 0; l < noise_ops[k].qubits.size(); l++) {
if (noise_ops[k].qubits[l] != noises[j][k].qubits[l]) {
same = false;
break;
@ -286,7 +286,7 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
} else if (noise_ops[k].params.size() != noises[j][k].params.size())
same = false;
else {
for (int_t l = 0; l < noise_ops[k].params.size(); l++) {
for (uint_t l = 0; l < noise_ops[k].params.size(); l++) {
if (noise_ops[k].params[l] != noises[j][k].params[l]) {
same = false;
break;
@ -298,12 +298,12 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
if (noise_ops[k].mats.size() != noises[j][k].mats.size())
same = false;
else {
for (int_t l = 0; l < noise_ops[k].mats.size(); l++) {
for (uint_t l = 0; l < noise_ops[k].mats.size(); l++) {
if (noise_ops[k].mats[l].size() != noises[j][k].mats[l].size()) {
same = false;
break;
}
for (int_t m = 0; m < noise_ops[k].mats[l].size(); m++) {
for (uint_t m = 0; m < noise_ops[k].mats[l].size(); m++) {
if (noise_ops[k].mats[l][m] != noises[j][k].mats[l][m]) {
same = false;
break;
@ -333,7 +333,7 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
creg_ = creg;
branch_shots(shot_map, noises.size());
for (int_t i = 0; i < noises.size(); i++) {
for (uint_t i = 0; i < noises.size(); i++) {
branches_[i]->copy_ops_after_branch(noises[i]);
}
@ -342,7 +342,7 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg,
void Branch::remove_empty_branches(void) {
int_t istart = 0;
for (int_t j = 0; j < branches_.size(); j++) {
for (uint_t j = 0; j < branches_.size(); j++) {
if (branches_[j]->num_shots() > 0) {
// copy shots to the root
shots_ = branches_[j]->rng_shots();
@ -359,7 +359,7 @@ void Branch::remove_empty_branches(void) {
std::vector<std::shared_ptr<Branch>> new_branches;
for (int_t j = istart; j < branches_.size(); j++) {
for (uint_t j = istart; j < branches_.size(); j++) {
if (branches_[j]->num_shots() > 0)
new_branches.push_back(branches_[j]);
else
@ -370,7 +370,7 @@ void Branch::remove_empty_branches(void) {
void Branch::reset_branch(void) {
// reset random seeds
for (int_t i = 0; i < shots_.size(); i++) {
for (uint_t i = 0; i < shots_.size(); i++) {
shots_[i].set_seed(shots_[i].initial_seed());
}
additional_ops_.clear();
@ -390,7 +390,7 @@ void Branch::set_param_index(uint_t ishot, uint_t nshots_per_param) {
param_shots_.clear();
param_index_.push_back(ishot / nshots_per_param);
for (int_t i = 1; i < shots_.size(); i++) {
for (uint_t i = 1; i < shots_.size(); i++) {
uint_t ip = (ishot + i) / nshots_per_param;
if (ip != param_index_[pos]) {
param_shots_.push_back(i);

View File

@ -431,7 +431,7 @@ bool Clifford::measure_and_update(const uint64_t qubit,
auto measure_non_determinisitic_func = [this, rS, row,
qubit](AER::int_t i) {
uint64_t row_mask = ~0ull;
if ((row >> destabilizer_phases_.BLOCK_BITS) == i)
if ((row >> destabilizer_phases_.BLOCK_BITS) == (uint_t)i)
row_mask ^= (1ull << (row & destabilizer_phases_.BLOCK_MASK));
uint64_t d_mask = row_mask & destabilizer_table_[qubit].X(i);

View File

@ -255,7 +255,7 @@ void State::set_config(const Config &config) {
}
bool State::validate_parameters(const std::vector<Operations::Op> &ops) const {
for (int_t i = 0; i < ops.size(); i++) {
for (uint_t i = 0; i < ops.size(); i++) {
if (ops[i].type == OpType::gate) {
// check parameter of R gates
if (ops[i].name == "rx" || ops[i].name == "ry" || ops[i].name == "rz") {
@ -639,7 +639,7 @@ template <typename T>
void State::get_probabilities_auxiliary(const reg_t &qubits,
std::string outcome,
double outcome_prob, T &probs) {
uint_t qubit_for_branching = -1;
int_t qubit_for_branching = -1;
for (uint_t i = 0; i < qubits.size(); ++i) {
uint_t qubit = qubits[qubits.size() - i - 1];
if (outcome[i] == 'X') {
@ -690,7 +690,7 @@ void State::get_probability_helper(const reg_t &qubits,
const std::string &outcome,
std::string &outcome_carry,
double &prob_carry) {
uint_t qubit_for_branching = -1;
int_t qubit_for_branching = -1;
for (uint_t i = 0; i < qubits.size(); ++i) {
uint_t qubit = qubits[qubits.size() - i - 1];
if (outcome_carry[i] == 'X') {

View File

@ -395,7 +395,7 @@ void ChunkContainer<data_t>::UnmapBuffer(Chunk<data_t> &buf) {
template <typename data_t>
void ChunkContainer<data_t>::unmap_all(void) {
int_t i;
uint_t i;
for (i = 0; i < chunks_map_.size(); i++)
chunks_map_[i] = false;
num_chunk_mapped_ = 0;
@ -804,14 +804,8 @@ void ChunkContainer<data_t>::ExecuteSum2(double *pSum, Function func,
#endif
}
void host_func_launcher(void *pParam) {
HostFuncBase *func = reinterpret_cast<HostFuncBase *>(pParam);
func->execute();
}
template <typename data_t>
void ChunkContainer<data_t>::allocate_chunks(void) {
uint_t i;
chunks_map_.resize(num_chunks_, false);
reduced_queue_begin_.resize(num_chunks_, 0);
@ -855,7 +849,7 @@ void ChunkContainer<data_t>::apply_matrix(
#else
if (N <= 10) {
#endif
int i;
uint_t i;
for (i = 0; i < N; i++) {
qubits_sorted.push_back(qubits[i]);
}
@ -918,7 +912,7 @@ void ChunkContainer<data_t>::apply_batched_matrix(
} else {
auto qubits_sorted = qubits;
std::sort(qubits_sorted.begin(), qubits_sorted.end());
for (int i = 0; i < N; i++) {
for (uint_t i = 0; i < N; i++) {
qubits_sorted.push_back(qubits[i]);
}
StoreUintParams(qubits_sorted, iChunk);
@ -971,8 +965,8 @@ void ChunkContainer<data_t>::apply_phase(const uint_t iChunk,
const int_t control_bits,
const std::complex<double> phase,
const uint_t gid, const uint_t count) {
Execute(phase_func<data_t>(qubits, *(thrust::complex<double> *)&phase),
iChunk, gid, count);
thrust::complex<double> p(phase);
Execute(phase_func<data_t>(qubits, p), iChunk, gid, count);
}
template <typename data_t>
@ -989,8 +983,8 @@ void ChunkContainer<data_t>::apply_multi_swaps(const uint_t iChunk,
const uint_t gid,
const uint_t count) {
// max 5 swaps can be applied at once using GPU's shared memory
for (int_t i = 0; i < qubits.size(); i += 10) {
int_t n = 10;
for (uint_t i = 0; i < qubits.size(); i += 10) {
uint_t n = 10;
if (i + n > qubits.size())
n = qubits.size() - i;
@ -1009,7 +1003,6 @@ void ChunkContainer<data_t>::apply_permutation(
const uint_t iChunk, const reg_t &qubits,
const std::vector<std::pair<uint_t, uint_t>> &pairs, const uint_t gid,
const uint_t count) {
const size_t N = qubits.size();
auto qubits_sorted = qubits;
std::sort(qubits_sorted.begin(), qubits_sorted.end());
@ -1080,7 +1073,7 @@ void ChunkContainer<data_t>::probabilities(std::vector<double> &probs,
template <typename data_t>
double ChunkContainer<data_t>::norm(uint_t iChunk, uint_t count) const {
double ret;
double ret = 0.0;
ExecuteSum(&ret, norm_func<data_t>(), iChunk, count);
return ret;
@ -1089,7 +1082,7 @@ double ChunkContainer<data_t>::norm(uint_t iChunk, uint_t count) const {
template <typename data_t>
double ChunkContainer<data_t>::trace(uint_t iChunk, uint_t row,
uint_t count) const {
double ret;
double ret = 0.0;
ExecuteSum(&ret, trace_func<data_t>(row), iChunk, count);
return ret;
@ -1108,7 +1101,7 @@ double ChunkContainer<data_t>::expval_matrix(const uint_t iChunk,
else {
auto qubits_sorted = qubits;
std::sort(qubits_sorted.begin(), qubits_sorted.end());
for (int_t i = 0; i < N; i++) {
for (uint_t i = 0; i < N; i++) {
qubits_sorted.push_back(qubits[i]);
}
@ -1166,7 +1159,6 @@ void ChunkContainer<data_t>::batched_expval_pauli(
count, first);
return;
}
double ret;
// specialize x_max == 0
if (x_mask == 0) {
ExecuteSum2(nullptr,

View File

@ -35,11 +35,11 @@ protected:
std::vector<std::shared_ptr<ChunkContainer<data_t>>>
chunks_; // chunk containers for each device and host
int num_devices_; // number of devices
int num_places_; // number of places (devices + host)
uint_t num_devices_; // number of devices
uint_t num_places_; // number of places (devices + host)
int chunk_bits_; // number of qubits of chunk
int num_qubits_; // number of global qubits
uint_t chunk_bits_; // number of qubits of chunk
uint_t num_qubits_; // number of global qubits
uint_t num_chunks_; // number of chunks on this process
uint_t chunk_index_; // global chunk index for the first chunk
@ -105,7 +105,6 @@ public:
template <typename data_t>
ChunkManager<data_t>::ChunkManager() {
int i, j;
num_places_ = 1;
chunk_bits_ = 0;
num_chunks_ = 0;
@ -126,7 +125,9 @@ ChunkManager<data_t>::ChunkManager() {
#else
#ifdef AER_THRUST_GPU
if (cudaGetDeviceCount(&num_devices_) == cudaSuccess) {
int ndev;
if (cudaGetDeviceCount(&ndev) == cudaSuccess) {
num_devices_ = ndev;
num_places_ = num_devices_;
} else {
cudaGetLastError();
@ -168,19 +169,21 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
bool density_mat, reg_t &gpus,
bool enable_cuStatevec) {
uint_t num_buffers;
int iDev;
uint_t iDev;
uint_t is, ie, nc;
int i;
uint_t i;
char *str;
bool multi_gpu = false;
bool hybrid = false;
bool hybrid = false;
#ifdef AER_THRUST_GPU
bool multi_gpu = false;
//--- for test
str = getenv("AER_MULTI_GPU");
if (str) {
multi_gpu = true;
num_places_ = num_devices_;
}
#endif
str = getenv("AER_HYBRID");
if (str) {
hybrid = true;
@ -192,8 +195,10 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
target_gpus_ = gpus;
if (target_gpus_.size() > 0) {
num_devices_ = target_gpus_.size();
#ifdef AER_THRUST_GPU
if (num_devices_ > 1)
multi_gpu = true;
#endif
} else {
target_gpus_.resize(num_devices_);
for (iDev = 0; iDev < num_devices_; iDev++) {
@ -203,7 +208,7 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
chunk_index_ = chunk_index;
if (num_qubits_ != nqubits || chunk_bits_ != chunk_bits ||
if (num_qubits_ != (uint_t)nqubits || chunk_bits_ != (uint_t)chunk_bits ||
nchunks > num_chunks_) {
// free previous allocation
Free();
@ -224,7 +229,6 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
multi_shots_ = true;
#ifdef AER_THRUST_CPU
multi_gpu = false;
num_places_ = 1;
#else
if (chunk_distribution_enable_) {
@ -260,7 +264,9 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
#endif
} else { // single chunk
num_buffers = 0;
#ifdef AER_THRUST_GPU
multi_gpu = false;
#endif
num_places_ = 1;
num_chunks_ = nchunks;
multi_shots_ = false;
@ -346,7 +352,7 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
}
}
if (chunks_allocated < num_chunks_) {
int nplaces_add = num_places_;
uint_t nplaces_add = num_places_;
if ((num_chunks_ - chunks_allocated) < nplaces_add)
nplaces_add = (num_chunks_ - chunks_allocated);
// rest of chunks are stored on host
@ -391,7 +397,7 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits, int nqubits,
template <typename data_t>
void ChunkManager<data_t>::Free(void) {
int i;
uint_t i;
for (i = 0; i < chunks_.size(); i++) {
chunks_[i]->Deallocate();
@ -408,7 +414,7 @@ void ChunkManager<data_t>::Free(void) {
template <typename data_t>
bool ChunkManager<data_t>::MapChunk(Chunk<data_t> &chunk, int iplace) {
int i;
uint_t i;
for (i = 0; i < num_places_; i++) {
if (chunks_[(iplace + i) % num_places_]->MapChunk(chunk)) {
@ -422,7 +428,7 @@ bool ChunkManager<data_t>::MapChunk(Chunk<data_t> &chunk, int iplace) {
template <typename data_t>
bool ChunkManager<data_t>::MapBufferChunk(Chunk<data_t> &out, int idev) {
if (idev < 0) {
int i;
uint_t i;
for (i = 0; i < num_devices_; i++) {
if (chunks_[i]->MapBufferChunk(out))
break;

View File

@ -377,9 +377,6 @@ void cuStateVecChunkContainer<data_t>::apply_diagonal_matrix(
qubits32[i] = qubits[i];
int32_t *pQubits = &qubits32[control_bits];
int32_t *pControl = nullptr;
if (control_bits > 0)
pControl = &qubits32[0];
uint_t bits;
uint_t nc;
@ -686,7 +683,6 @@ void cuStateVecChunkContainer<data_t>::apply_rotation(
const uint_t iChunk, const reg_t &qubits, const Rotation r,
const double theta, const uint_t gid, const uint_t count) {
custatevecPauli_t pauli[2];
int nPauli = 1;
BaseContainer::set_device();
@ -705,25 +701,21 @@ void cuStateVecChunkContainer<data_t>::apply_rotation(
case Rotation::xx:
pauli[0] = CUSTATEVEC_PAULI_X;
pauli[1] = CUSTATEVEC_PAULI_X;
nPauli = 2;
control_bits--;
break;
case Rotation::yy:
pauli[0] = CUSTATEVEC_PAULI_Y;
pauli[1] = CUSTATEVEC_PAULI_Y;
nPauli = 2;
control_bits--;
break;
case Rotation::zz:
pauli[0] = CUSTATEVEC_PAULI_Z;
pauli[1] = CUSTATEVEC_PAULI_Z;
nPauli = 2;
control_bits--;
break;
case Rotation::zx:
pauli[0] = CUSTATEVEC_PAULI_Z;
pauli[1] = CUSTATEVEC_PAULI_X;
nPauli = 2;
control_bits--;
break;
default:
@ -911,7 +903,7 @@ double cuStateVecChunkContainer<data_t>::expval_pauli(
const custatevecPauli_t *pauliOperatorsArray[] = {pauliOps};
const int32_t *basisBitsArray[] = {qubits32};
double ret[1];
const uint32_t nBasisBitsArray[] = {qubits.size()};
const uint32_t nBasisBitsArray[] = {(uint32_t)qubits.size()};
custatevecStatus_t err;
err = custatevecComputeExpectationsOnPauliBasis(

View File

@ -220,7 +220,7 @@ public:
void allocate_creg(uint_t num_mem, uint_t num_reg);
int measured_cbit(uint_t iChunk, int qubit) {
uint_t n64, i64, ibit;
if (qubit >= this->num_creg_bits_)
if ((uint_t)qubit >= this->num_creg_bits_)
return -1;
n64 = (this->num_creg_bits_ + 63) >> 6;
i64 = qubit >> 6;
@ -324,7 +324,6 @@ uint_t DeviceChunkContainer<data_t>::Allocate(int idev, int chunk_bits,
bool density_matrix) {
uint_t nc = chunks;
uint_t i;
int mat_bits;
this->chunk_bits_ = chunk_bits;
this->num_qubits_ = num_qubits;
@ -359,13 +358,10 @@ uint_t DeviceChunkContainer<data_t>::Allocate(int idev, int chunk_bits,
if (multi_shots) { // mult-shot parallelization for small qubits
multi_shots_ = true;
mat_bits = AER_DEFAULT_MATRIX_BITS;
nc = chunks;
num_matrices_ = chunks;
} else {
multi_shots_ = false;
mat_bits = AER_DEFAULT_MATRIX_BITS;
num_matrices_ = 1;
nc = chunks;
}
@ -519,7 +515,7 @@ void DeviceChunkContainer<data_t>::calculate_matrix_buffer_size(int bits,
if (shots > AER_MAX_SAMPLING_SHOTS)
shots = AER_MAX_SAMPLING_SHOTS;
uint_t b = this->matrix_bits_;
while ((1ull << (b * 2)) < shots) {
while ((1ull << (b * 2)) < (uint_t)shots) {
b++;
}
this->matrix_bits_ = b;
@ -545,7 +541,7 @@ void DeviceChunkContainer<data_t>::calculate_matrix_buffer_size(int bits,
}
params_buffer_size_ = size;
if (shots > 1 && params_buffer_size_ < shots) {
if (shots > 1 && params_buffer_size_ < (uint_t)shots) {
params_buffer_size_ = shots;
}
}
@ -553,10 +549,9 @@ void DeviceChunkContainer<data_t>::calculate_matrix_buffer_size(int bits,
template <typename data_t>
void DeviceChunkContainer<data_t>::ResizeMatrixBuffers(int bits,
int max_shots) {
uint_t size;
uint_t n = num_matrices_ + this->num_buffers_;
if (bits != this->matrix_bits_) {
if ((uint_t)bits != this->matrix_bits_) {
calculate_matrix_buffer_size(bits, max_shots);
}
@ -941,7 +936,7 @@ void DeviceChunkContainer<data_t>::set_blocked_qubits(uint_t iChunk,
auto qubits_sorted = qubits;
std::sort(qubits_sorted.begin(), qubits_sorted.end());
int i;
uint_t i;
for (i = 0; i < qubits.size(); i++) {
blocked_qubits_holder_[iBlock * QV_MAX_REGISTERS + i] = qubits_sorted[i];
}
@ -1010,8 +1005,7 @@ void DeviceChunkContainer<data_t>::queue_blocked_gate(
}
cvector_t<double> mat(4, 0.0);
int i;
uint_t idx, idxParam, iBlock;
uint_t iBlock;
if (iChunk >= this->num_chunks_) { // for buffer chunks
iBlock = num_matrices_ + iChunk - this->num_chunks_;
} else {
@ -1028,7 +1022,7 @@ void DeviceChunkContainer<data_t>::queue_blocked_gate(
params.mask_ = mask;
params.gate_ = gate;
params.qubit_ = 0;
for (i = 0; i < num_blocked_qubits_[iBlock]; i++) {
for (uint_t i = 0; i < num_blocked_qubits_[iBlock]; i++) {
if (blocked_qubits_holder_[iBlock * QV_MAX_REGISTERS + i] == qubit) {
params.qubit_ = i;
break;
@ -1408,8 +1402,8 @@ void DeviceChunkContainer<data_t>::copy_reduce_buffer(std::vector<double> &ret,
count * reduce_buffer_size_, tmp.begin());
#endif
for (int_t i = 0; i < count; i++) {
for (int_t j = 0; j < num_val; j++)
for (uint_t i = 0; i < count; i++) {
for (uint_t j = 0; j < num_val; j++)
ret[i * num_val + j] = tmp[i * reduce_buffer_size_ + j];
}
}

View File

@ -121,7 +121,6 @@ uint_t HostChunkContainer<data_t>::Allocate(int idev, int chunk_bits,
int matrix_bit, int max_shots,
bool density_matrix) {
uint_t nc = chunks;
uint_t i;
ChunkContainer<data_t>::chunk_bits_ = chunk_bits;
ChunkContainer<data_t>::num_qubits_ = num_qubits;

View File

@ -69,7 +69,10 @@ protected:
public:
GateFuncBase() {
data_ = NULL;
matrix_ = NULL;
params_ = NULL;
base_index_ = 0;
chunk_bits_ = 0;
cregs_ = NULL;
num_creg_bits_ = 0;
conditional_bit_ = -1;
@ -147,7 +150,7 @@ public:
template <typename data_t>
class GateFuncWithCache : public GateFuncBase<data_t> {
protected:
int nqubits_;
uint_t nqubits_;
public:
GateFuncWithCache(uint_t nq) { nqubits_ = nq; }
@ -210,7 +213,7 @@ public:
template <typename data_t>
class GateFuncSumWithCache : public GateFuncBase<data_t> {
protected:
int nqubits_;
uint_t nqubits_;
public:
GateFuncSumWithCache(uint_t nq) { nqubits_ = nq; }
@ -276,7 +279,7 @@ public:
: public thrust::unary_function<difference_type, difference_type> {
difference_type stride;
stride_functor(difference_type stride) : stride(stride) {}
stride_functor(difference_type _stride) : stride(_stride) {}
__host__ __device__ difference_type
operator()(const difference_type &i) const {
@ -301,8 +304,8 @@ public:
typedef PermutationIterator iterator;
// construct strided_range for the range [first,last)
strided_range(Iterator first, Iterator last, difference_type stride)
: first(first), last(last), stride(stride) {}
strided_range(Iterator _first, Iterator _last, difference_type _stride)
: first(_first), last(_last), stride(_stride) {}
iterator begin(void) const {
return PermutationIterator(
@ -409,7 +412,7 @@ public:
template <typename data_t>
class initialize_component_func : public GateFuncBase<data_t> {
protected:
int nqubits;
uint_t nqubits;
uint_t offset;
uint_t mat_pos;
uint_t mat_num;
@ -825,7 +828,7 @@ public:
int qubits_count(void) { return 4; }
__host__ __device__ void operator()(const uint_t &i) const {
uint_t i0, i1, i2, i3, i4, offset, f0, f1, f2;
uint_t i0, i1, i2, i3, i4, offset;
thrust::complex<data_t> *vec;
thrust::complex<data_t> q0, q1, q2, q3, q4, q5, q6, q7;
thrust::complex<data_t> q8, q9, q10, q11, q12, q13, q14, q15;
@ -865,9 +868,6 @@ public:
q15 = vec[i0 + offset3 + offset2 + offset1 + offset0];
offset = 0;
f0 = 0;
f1 = 0;
f2 = 0;
for (j = 0; j < 16; j++) {
r = pMat[0 + j] * q0;
r += pMat[16 + j] * q1;
@ -936,9 +936,9 @@ public:
template <typename data_t>
class MatrixMultNxN_LU : public GateFuncBase<data_t> {
protected:
int nqubits;
uint_t nqubits;
uint_t matSize;
int nswap;
uint_t nswap;
public:
MatrixMultNxN_LU(const cvector_t<double> &mat, const reg_t &qb,
@ -978,7 +978,7 @@ public:
params[nqubits + i] = j;
}
if (dmax != 0) {
if (dmax > 0) {
c0 = matLU[(i << nqubits) + params[nqubits + i]];
for (j = i + 1; j < matSize; j++) {
@ -1211,7 +1211,7 @@ protected:
public:
BatchedMatrixMult2x2(const reg_t &qubits, uint_t imat,
uint_t nshots_per_mat) {
int i;
uint_t i;
nqubits_ = qubits.size();
offset_ = 1ull << qubits[nqubits_ - 1];
@ -1402,7 +1402,7 @@ public:
template <typename data_t>
class DiagonalMultNxN : public GateFuncBase<data_t> {
protected:
int nqubits;
uint_t nqubits;
public:
DiagonalMultNxN(const reg_t &qb) { nqubits = qb.size(); }
@ -1504,7 +1504,7 @@ protected:
public:
BatchedDiagonalMatrixMult2x2(const reg_t &qubits, uint_t imat,
uint_t nshots_per_mat) {
int i;
uint_t i;
nqubits_ = qubits.size();
mask_ = (1ull << qubits[nqubits_ - 1]);
@ -1557,7 +1557,6 @@ protected:
public:
BatchedDiagonalMatrixMultNxN(const uint_t nq, uint_t imat,
uint_t nshots_per_mat) {
int i;
nqubits_ = nq;
matrix_begin_ = imat;
@ -1894,9 +1893,8 @@ public:
CSwapChunk_func(const reg_t &qubits, uint_t block_bits,
thrust::complex<data_t> *pVec0,
thrust::complex<data_t> *pVec1, bool wb) {
int i;
int nqubits;
int qubit_t;
uint_t nqubits;
uint_t qubit_t;
nqubits = qubits.size();
if (qubits[nqubits - 2] < qubits[nqubits - 1]) {
@ -2078,10 +2076,8 @@ public:
thrust::complex<data_t> q, r;
thrust::complex<double> m;
uint_t mat_size, irow;
thrust::complex<data_t> *vec;
thrust::complex<double> *pMat;
vec = this->data_;
pMat = this->matrix_;
mat_size = 1ull << this->nqubits_;
@ -2492,7 +2488,7 @@ public:
operator()(const uint_t &i) const {
thrust::complex<data_t> q;
thrust::complex<data_t> *vec;
double d, dv;
double d, dv = 0.0;
vec = this->data_;
q = vec[i];
@ -2529,7 +2525,7 @@ public:
operator()(const uint_t &i) const {
thrust::complex<data_t> *vec;
thrust::complex<data_t> q0;
double d, dv;
double d, dv = 0.0;
vec = this->data_;
@ -2585,7 +2581,7 @@ public:
thrust::complex<data_t> q1;
thrust::complex<data_t> q0p;
thrust::complex<data_t> q1p;
double d0, d1, ret, ret_v;
double d0, d1, ret, ret_v = 0.0;
uint_t idx0, idx1;
vec = this->data_;

View File

@ -955,7 +955,9 @@ void QubitVector<data_t>::allocate_mem(size_t data_size) {
if (data_ == nullptr) {
#if !defined(_WIN64) && !defined(_WIN32)
void *data = nullptr;
posix_memalign(&data, 64, sizeof(std::complex<data_t>) * data_size);
if (posix_memalign(&data, 64, sizeof(std::complex<data_t>) * data_size) !=
0)
throw std::runtime_error("Cannot allocate memory by posix_memalign");
data_ = reinterpret_cast<std::complex<data_t> *>(data);
#else
data_ = reinterpret_cast<std::complex<data_t> *>(
@ -969,7 +971,8 @@ void QubitVector<data_t>::allocate_checkpoint(size_t data_size) {
free_checkpoint();
#if !defined(_WIN64) && !defined(_WIN32)
void *data = nullptr;
posix_memalign(&data, 64, sizeof(std::complex<data_t>) * data_size);
if (posix_memalign(&data, 64, sizeof(std::complex<data_t>) * data_size) != 0)
throw std::runtime_error("Cannot allocate memory by posix_memalign");
checkpoint_ = reinterpret_cast<std::complex<data_t> *>(data);
#else
checkpoint_ = reinterpret_cast<std::complex<data_t> *>(
@ -1765,13 +1768,13 @@ void QubitVector<data_t>::apply_chunk_swap(const reg_t &qubits,
if (write_back) {
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
num_threads(omp_threads_)
for (int_t k = 0; k < data_size_; ++k) {
for (int_t k = 0; k < (int_t)data_size_; ++k) {
std::swap(data_[k], src.data_[k]);
}
} else {
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
num_threads(omp_threads_)
for (int_t k = 0; k < data_size_; ++k) {
for (int_t k = 0; k < (int_t)data_size_; ++k) {
data_[k] = src.data_[k];
}
}
@ -1803,7 +1806,7 @@ void QubitVector<data_t>::apply_chunk_swap(const reg_t &qubits,
if (q0 >= num_qubits_) { // exchange whole of chunk each other
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
num_threads(omp_threads_)
for (int_t k = 0; k < data_size_; ++k) {
for (int_t k = 0; k < (int_t)data_size_; ++k) {
data_[k] = recv_buffer_[k];
}
} else {
@ -1824,13 +1827,13 @@ void QubitVector<data_t>::apply_chunk_swap(QubitVector<data_t> &src,
if (src.chunk_index_ == chunk_index_) {
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
num_threads(omp_threads_)
for (int_t k = 0; k < size; ++k) {
for (int_t k = 0; k < (int_t)size; ++k) {
data_[dest_offset + k] = src.recv_buffer_[src_offset + k];
}
} else {
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
num_threads(omp_threads_)
for (int_t k = 0; k < size; ++k) {
for (int_t k = 0; k < (int_t)size; ++k) {
std::swap(data_[dest_offset + k], src.data_[src_offset + k]);
}
}
@ -1838,8 +1841,8 @@ void QubitVector<data_t>::apply_chunk_swap(QubitVector<data_t> &src,
template <typename data_t>
void QubitVector<data_t>::apply_multi_swaps(const reg_t &qubits) {
for (int_t i = 0; i < qubits.size(); i += 10) {
int_t n = 10;
for (uint_t i = 0; i < qubits.size(); i += 10) {
uint_t n = 10;
if (i + n > qubits.size())
n = qubits.size() - i;
@ -1850,17 +1853,17 @@ void QubitVector<data_t>::apply_multi_swaps(const reg_t &qubits) {
auto lambda = [&](const indexes_t &inds) -> void {
cvector_t<data_t> cache(size);
for (int_t i = 0; i < size; i++)
cache[i] = data_[inds[i]];
for (uint_t ii = 0; ii < size; ii++)
cache[ii] = data_[inds[ii]];
for (int_t i = 0; i < size; i++) {
uint_t pos = i;
for (int_t j = 0; j < nq; j += 2) {
for (uint_t ii = 0; ii < size; ii++) {
uint_t pos = ii;
for (uint_t j = 0; j < nq; j += 2) {
if ((((pos >> j) & 1) ^ ((pos >> (j + 1)) & 1)) != 0) {
pos ^= ((1ull << j) | (1ull << (j + 1)));
}
}
data_[inds[i]] = cache[pos];
data_[inds[ii]] = cache[pos];
}
};
apply_lambda(lambda, qubits_swap);

View File

@ -819,17 +819,17 @@ void QubitVectorThrust<data_t>::initialize_component(
std::sort(qubits_sorted.begin(), qubits_sorted.end());
auto qubits_param = qubits;
int i;
uint_t i;
for (i = 0; i < qubits.size(); i++)
qubits_param.push_back(qubits_sorted[i]);
int nbit = chunk_.container()->matrix_bits();
uint_t nbit = chunk_.container()->matrix_bits();
if (nbit > qubits.size())
nbit = qubits.size();
uint_t dim = 1ull << qubits.size();
uint_t sub_dim = 1ull << nbit;
for (uint_t i = 0; i < dim; i += sub_dim) {
for (i = 0; i < dim; i += sub_dim) {
cvector_t<double> state(sub_dim);
for (uint_t j = 0; j < sub_dim; j++)
state[j] = state0[dim - sub_dim - i + j];
@ -872,7 +872,7 @@ uint_t QubitVectorThrust<data_t>::chunk_setup(int chunk_bits, int num_qubits,
if (chunk_manager_->chunk_bits() == chunk_bits &&
chunk_manager_->num_qubits() == num_qubits) {
bool mapped = chunk_manager_->MapChunk(chunk_, 0);
chunk_manager_->MapChunk(chunk_, 0);
chunk_.set_chunk_index(chunk_index_);
return num_local_chunks;
}
@ -903,8 +903,8 @@ uint_t QubitVectorThrust<data_t>::chunk_setup(int chunk_bits, int num_qubits,
recv_chunk_.unmap();
// mapping/setting chunk
bool mapped = chunk_manager_->MapChunk(chunk_, 0);
chunk_.set_chunk_index(chunk_index_);
chunk_manager_->MapChunk(chunk_, 0);
return num_chunks_allocated;
}
@ -932,7 +932,7 @@ QubitVectorThrust<data_t>::chunk_setup(const QubitVectorThrust<data_t> &base,
// mapping/setting chunk
chunk_manager_ = base.chunk_manager_;
bool mapped = chunk_manager_->MapChunk(chunk_, 0);
chunk_manager_->MapChunk(chunk_, 0);
return 0;
}
@ -1260,7 +1260,7 @@ void QubitVectorThrust<data_t>::initialize_from_vector(const list_t &statevec) {
int_t i;
#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \
num_threads(omp_threads_)
for (i = 0; i < data_size_; i++) {
for (i = 0; i < (int_t)data_size_; i++) {
tmp[i] = statevec[i];
}
initialize_from_data(&tmp[0], tmp.size());
@ -1322,7 +1322,7 @@ void QubitVectorThrust<data_t>::initialize_creg(
if (chunk_.pos() == 0) {
chunk_.container()->allocate_creg(num_cmem_bits_, num_creg_bits_);
int_t i;
uint_t i;
for (i = 0; i < num_register; i++) {
if (register_hex[register_hex.size() - 1 - i] == '0') {
store_cregister(i, 0);
@ -1528,7 +1528,6 @@ void QubitVectorThrust<data_t>::apply_multiplexer(
for (const auto &q : control_qubits) {
qubits.push_back(q);
}
size_t N = qubits.size();
cvector_t<double> matMP(DIM * DIM, 0.0);
uint_t b, i, j;
@ -1627,7 +1626,7 @@ void QubitVectorThrust<data_t>::apply_mcx(const reg_t &qubits) {
return;
if (register_blocking_) {
int i;
uint_t i;
uint_t mask = 0;
for (i = 0; i < qubits.size() - 1; i++) {
mask |= (1ull << qubits[i]);
@ -1645,7 +1644,7 @@ void QubitVectorThrust<data_t>::apply_mcy(const reg_t &qubits) {
return;
if (register_blocking_) {
int i;
uint_t i;
uint_t mask = 0;
for (i = 0; i < qubits.size() - 1; i++) {
mask |= (1ull << qubits[i]);
@ -1678,7 +1677,7 @@ template <typename data_t>
void QubitVectorThrust<data_t>::apply_chunk_swap(const reg_t &qubits,
QubitVectorThrust<data_t> &src,
bool write_back) {
int q0, q1, t;
uint_t q0, q1, t;
q0 = qubits[0];
q1 = qubits[1];
@ -1759,7 +1758,7 @@ void QubitVectorThrust<data_t>::apply_chunk_swap(const reg_t &qubits,
template <typename data_t>
void QubitVectorThrust<data_t>::apply_chunk_swap(const reg_t &qubits,
uint_t remote_chunk_index) {
int q0, q1, t;
uint_t q0, q1, t;
q0 = qubits[qubits.size() - 2];
q1 = qubits[qubits.size() - 1];
@ -1840,7 +1839,7 @@ void QubitVectorThrust<data_t>::apply_mcphase(
return;
if (register_blocking_) {
int i;
uint_t i;
uint_t mask = 0;
for (i = 0; i < qubits.size() - 1; i++) {
mask |= (1ull << qubits[i]);
@ -1875,7 +1874,7 @@ void QubitVectorThrust<data_t>::apply_mcu(const reg_t &qubits,
return;
} else {
if (register_blocking_) {
int i;
uint_t i;
uint_t mask = 0;
for (i = 0; i < qubits.size() - 1; i++) {
mask |= (1ull << qubits[i]);
@ -1897,7 +1896,7 @@ void QubitVectorThrust<data_t>::apply_mcu(const reg_t &qubits,
return;
} else {
if (register_blocking_) {
int i;
uint_t i;
uint_t mask = 0;
for (i = 0; i < qubits.size() - 1; i++) {
mask |= (1ull << qubits[i]);
@ -2252,7 +2251,7 @@ template <typename data_t>
void QubitVectorThrust<data_t>::apply_batched_measure(
const reg_t &qubits, std::vector<RngEngine> &rng, const reg_t &cmemory,
const reg_t &cregs) {
const int_t DIM = 1 << qubits.size();
const uint_t DIM = 1 << qubits.size();
uint_t i, count = 1;
if (enable_batch_) {
if (chunk_.pos() != 0) {
@ -2386,7 +2385,7 @@ public:
template <typename data_t>
void QubitVectorThrust<data_t>::apply_batched_reset(
const reg_t &qubits, std::vector<RngEngine> &rng) {
const int_t DIM = 1 << qubits.size();
const uint_t DIM = 1 << qubits.size();
uint_t i, count = 1;
if (enable_batch_) {
if (chunk_.pos() != 0) {
@ -2547,7 +2546,6 @@ public:
uint_t *mask;
uint_t val = 1;
n64 = (this->num_creg_bits_ + 63) >> 6;
int j;
mask = this->params_;
@ -2686,7 +2684,7 @@ void QubitVectorThrust<data_t>::batched_expval_pauli(
std::vector<double> &val, const reg_t &qubits, const std::string &pauli,
bool variance, std::complex<double> param, bool last,
const complex_t initial_phase) const {
uint_t i, count = 1;
uint_t count = 1;
if (enable_batch_) {
if (chunk_.pos() != 0) {
return; // first chunk execute all in batch
@ -2898,12 +2896,11 @@ void QubitVectorThrust<data_t>::apply_batched_pauli_ops(
}
uint_t count = ops.size();
int num_inner_threads = omp_get_max_threads() / num_threads_per_group_;
int_t i;
reg_t params(4 * count);
auto count_paulis = [this, &params, ops](int_t i) {
int_t j;
uint_t j;
uint_t x_max = 0;
uint_t num_y = 0;
uint_t x_mask = 0;
@ -2975,7 +2972,6 @@ public:
thrust::complex<data_t> q0, q1;
thrust::complex<data_t> *vec0;
thrust::complex<data_t> *vec1;
double p, p0, p1, rnd;
uint_t iChunk = i >> this->chunk_bits_;
double scale =
@ -3012,7 +3008,7 @@ public:
__host__ __device__ void
run_with_cache(uint_t _tid, uint_t _idx,
thrust::complex<data_t> *_cache) const {
uint_t j, threadID;
uint_t j;
thrust::complex<data_t> q, r;
thrust::complex<double> m;
uint_t mat_size, irow;
@ -3066,7 +3062,6 @@ public:
__host__ __device__ void operator()(const uint_t &i) const {
uint_t iChunk = i;
double p0, p1, rnd;
bool mult = false;
p0 = reduce_[iChunk * reduce_buf_size_];
probs_[iChunk + QV_RESET_CURRENT_PROB * prob_buf_size_] = p0;
@ -3103,7 +3098,6 @@ void QubitVectorThrust<data_t>::apply_batched_kraus(
std::vector<RngEngine> &rng) {
const size_t N = qubits.size();
uint_t i, count;
double ret;
count = chunk_.container()->num_chunks();
@ -3266,7 +3260,7 @@ void QubitVectorThrust<data_t>::apply_bfunc(const Operations::Op &op) {
return; // first chunk execute all in batch
reg_t params;
int_t i, n64, n, iparam;
uint_t i, n64, n, iparam;
// registers to be updated
for (i = 0; i < op.registers.size(); i++)
@ -3377,7 +3371,7 @@ void QubitVectorThrust<data_t>::apply_roerror(const Operations::Op &op,
reg_t params;
std::vector<double> probs;
int_t i, j, offset;
uint_t i, offset;
for (i = 0; i < op.memory.size(); i++)
params.push_back(op.memory[i]);

View File

@ -43,7 +43,7 @@ namespace {
/** Remember we cannot use STL (or memcpy) **/
template <typename T, typename U>
void copy(T dest, const U orig, size_t size) {
for (auto i = 0; i < size; ++i)
for (size_t i = 0; i < size; ++i)
dest[i] = orig[i];
}
@ -1114,7 +1114,8 @@ Avx apply_diagonal_matrix_avx<double>(
#endif
#if !defined(_WIN64) && !defined(_WIN32)
void *data = nullptr;
posix_memalign(&data, 64, sizeof(std::complex<double>) * 2);
if (posix_memalign(&data, 64, sizeof(std::complex<double>) * 2) != 0)
throw std::runtime_error("Cannot allocate memory by posix_memalign");
auto double_tmp = reinterpret_cast<std::complex<double> *>(data);
#else
auto double_tmp = reinterpret_cast<std::complex<double> *>(
@ -1122,7 +1123,7 @@ Avx apply_diagonal_matrix_avx<double>(
#endif
size_t q0_mask_ = 0;
for (int i = 0; i < qregs_size; ++i) {
for (size_t i = 0; i < qregs_size; ++i) {
if (qregs[i] == 0) {
q0_mask_ = 1UL << i;
break;
@ -1135,9 +1136,9 @@ Avx apply_diagonal_matrix_avx<double>(
#pragma omp for
for (int64_t k = 0; k < END; k += 1) {
const auto base = k << (batch + 1);
const auto until = base + (1UL << (batch + 1));
for (auto i = base; i < until; i += 2) {
const int64_t base = k << (batch + 1);
const int64_t until = base + (1UL << (batch + 1));
for (int64_t i = base; i < until; i += 2) {
auto tgt_qv_data =
_mm256_load(reinterpret_cast<double *>(&(qv_data[i])));
auto input_data = _load_diagonal_input(input_vec, double_tmp, i, qregs,
@ -1171,7 +1172,8 @@ Avx apply_diagonal_matrix_avx<float>(float *qv_data_, const uint64_t data_size,
{
#if !defined(_WIN64) && !defined(_WIN32)
void *data = nullptr;
posix_memalign(&data, 64, sizeof(std::complex<float>) * 4);
if (posix_memalign(&data, 64, sizeof(std::complex<float>) * 4) != 0)
throw std::runtime_error("Cannot allocate memory by posix_memalign");
auto float_tmp = reinterpret_cast<std::complex<float> *>(data);
#else
auto float_tmp = reinterpret_cast<std::complex<float> *>(
@ -1199,9 +1201,9 @@ Avx apply_diagonal_matrix_avx<float>(float *qv_data_, const uint64_t data_size,
#pragma omp for
for (int64_t k = 0; k < END; k += 1) {
const auto base = k << (batch + 2);
const auto until = base + (1UL << (batch + 2));
for (auto i = base; i < until; i += 4) {
const int64_t base = k << (batch + 2);
const int64_t until = base + (1UL << (batch + 2));
for (int64_t i = base; i < until; i += 4) {
m256_t<float> tgt_qv_data =
_mm256_load(reinterpret_cast<float *>(&(qv_data[i])));
auto input_data = _load_diagonal_input(input_vec, float_tmp, i, qregs,

View File

@ -41,6 +41,7 @@ class Executor : public CircuitExecutor::ParallelStateExecutor<state_t>,
using Base = CircuitExecutor::MultiStateExecutor<state_t>;
using BasePar = CircuitExecutor::ParallelStateExecutor<state_t>;
using BaseBatch = CircuitExecutor::BatchShotsExecutor<state_t>;
using Base::sample_measure;
protected:
public:
@ -434,7 +435,7 @@ bool Executor<state_t>::apply_branching_op(CircuitExecutor::Branch &root,
template <class state_t>
void Executor<state_t>::initialize_qreg(uint_t num_qubits) {
int_t i;
uint_t i;
for (i = 0; i < Base::states_.size(); i++) {
Base::states_[i].qreg().set_num_qubits(BasePar::chunk_bits_);
@ -442,8 +443,8 @@ void Executor<state_t>::initialize_qreg(uint_t num_qubits) {
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t iChunk = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
if (Base::global_state_index_ + iChunk == 0 ||
this->num_qubits_ == this->chunk_bits_) {
@ -482,7 +483,7 @@ auto Executor<state_t>::move_to_vector(void) {
state.resize(Base::num_local_states_ << BasePar::chunk_bits_);
#pragma omp parallel for if (BasePar::chunk_omp_parallel_) private(iChunk)
for (iChunk = 1; iChunk < Base::states_.size(); iChunk++) {
for (iChunk = 1; iChunk < (int_t)Base::states_.size(); iChunk++) {
auto tmp = Base::states_[iChunk].qreg().move_to_vector();
uint_t j, offset = iChunk << BasePar::chunk_bits_;
for (j = 0; j < tmp.size(); j++) {
@ -511,7 +512,7 @@ auto Executor<state_t>::copy_to_vector(void) {
state.resize(Base::num_local_states_ << BasePar::chunk_bits_);
#pragma omp parallel for if (BasePar::chunk_omp_parallel_) private(iChunk)
for (iChunk = 1; iChunk < Base::states_.size(); iChunk++) {
for (iChunk = 1; iChunk < (int_t)Base::states_.size(); iChunk++) {
auto tmp = Base::states_[iChunk].qreg().copy_to_vector();
uint_t j, offset = iChunk << BasePar::chunk_bits_;
for (j = 0; j < tmp.size(); j++) {
@ -553,12 +554,12 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
reg_t qubits_out_chunk;
std::string pauli_in_chunk;
std::string pauli_out_chunk;
int_t i, n;
uint_t n;
double expval(0.);
// get inner/outer chunk pauli string
n = pauli.size();
for (i = 0; i < n; i++) {
for (uint_t i = 0; i < n; i++) {
if (qubits[i] < BasePar::chunk_bits_) {
qubits_in_chunk.push_back(qubits[i]);
pauli_in_chunk.push_back(pauli[n - i - 1]);
@ -583,18 +584,18 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
if (x_mask != 0) { // pairing state is out of chunk
bool on_same_process = true;
#ifdef AER_MPI
int proc_bits = 0;
uint_t proc_bits = 0;
uint_t procs = Base::distributed_procs_;
while (procs > 1) {
if ((procs & 1) != 0) {
proc_bits = -1;
proc_bits = 0;
break;
}
proc_bits++;
procs >>= 1;
}
if (x_mask & (~((1ull << (Base::num_qubits_ - proc_bits)) - 1)) !=
0) { // data exchange between processes is required
if ((x_mask & (~((1ull << (Base::num_qubits_ - proc_bits)) - 1))) !=
0) { // data exchange between processes is required
on_same_process = false;
}
#endif
@ -609,8 +610,8 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
auto apply_expval_pauli_chunk = [this, x_mask, z_mask, x_max, mask_u,
mask_l, qubits_in_chunk,
pauli_in_chunk, phase](int_t iGroup) {
double expval = 0.0;
for (int_t iChunk = Base::top_state_of_group_[iGroup];
double expval_t = 0.0;
for (uint_t iChunk = Base::top_state_of_group_[iGroup];
iChunk < Base::top_state_of_group_[iGroup + 1]; iChunk++) {
uint_t pair_chunk = iChunk ^ x_mask;
if (iChunk < pair_chunk) {
@ -618,20 +619,20 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
z_count = AER::Utils::popcount(iChunk & z_mask);
z_count_pair = AER::Utils::popcount(pair_chunk & z_mask);
expval += Base::states_[iChunk - Base::global_state_index_]
.qreg()
.expval_pauli(qubits_in_chunk, pauli_in_chunk,
Base::states_[pair_chunk].qreg(),
z_count, z_count_pair, phase);
expval_t += Base::states_[iChunk - Base::global_state_index_]
.qreg()
.expval_pauli(qubits_in_chunk, pauli_in_chunk,
Base::states_[pair_chunk].qreg(),
z_count, z_count_pair, phase);
}
}
return expval;
return expval_t;
};
expval += Utils::apply_omp_parallel_for_reduction(
(BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1), 0,
Base::num_global_states_ / 2, apply_expval_pauli_chunk);
} else {
for (int_t i = 0; i < Base::num_global_states_ / 2; i++) {
for (uint_t i = 0; i < Base::num_global_states_ / 2; i++) {
uint_t iChunk = ((i << 1) & mask_u) | (i & mask_l);
uint_t pair_chunk = iChunk ^ x_mask;
uint_t iProc = BasePar::get_process_by_chunk(pair_chunk);
@ -675,9 +676,9 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
z_mask >>= BasePar::chunk_bits_;
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for reduction(+ : expval)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
double e_tmp = 0.0;
for (int_t iChunk = Base::top_state_of_group_[ig];
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
double sign = 1.0;
if (z_mask && (AER::Utils::popcount(
@ -690,7 +691,7 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
expval += e_tmp;
}
} else {
for (i = 0; i < Base::states_.size(); i++) {
for (uint_t i = 0; i < Base::states_.size(); i++) {
double sign = 1.0;
if (z_mask &&
(AER::Utils::popcount((i + Base::global_state_index_) & z_mask) &
@ -704,15 +705,15 @@ double Executor<state_t>::expval_pauli(const reg_t &qubits,
} else { // all bits are inside chunk
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for reduction(+ : expval)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
double e_tmp = 0.0;
for (int_t iChunk = Base::top_state_of_group_[ig];
for (uint_t iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++)
e_tmp += Base::states_[iChunk].qreg().expval_pauli(qubits, pauli);
expval += e_tmp;
}
} else {
for (i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
expval += Base::states_[i].qreg().expval_pauli(qubits, pauli);
}
}
@ -777,10 +778,10 @@ void Executor<state_t>::apply_save_density_matrix(const Operations::Op &op,
double sum = 0.0;
if (BasePar::chunk_omp_parallel_) {
#pragma omp parallel for reduction(+ : sum)
for (int_t i = 0; i < Base::states_.size(); i++)
for (int_t i = 0; i < (int_t)Base::states_.size(); i++)
sum += Base::states_[i].qreg().norm();
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
sum += Base::states_[i].qreg().norm();
}
#ifdef AER_MPI
@ -906,7 +907,7 @@ template <class state_t>
rvector_t Executor<state_t>::measure_probs(const reg_t &qubits) const {
uint_t dim = 1ull << qubits.size();
rvector_t sum(dim, 0.0);
int_t i, j, k;
uint_t i, j, k;
reg_t qubits_in_chunk;
reg_t qubits_out_chunk;
@ -916,8 +917,8 @@ rvector_t Executor<state_t>::measure_probs(const reg_t &qubits) const {
if (qubits_in_chunk.size() > 0) {
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for private(i, j, k)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t i = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++) {
auto chunkSum =
Base::states_[i].qreg().probabilities(qubits_in_chunk);
@ -983,8 +984,8 @@ rvector_t Executor<state_t>::measure_probs(const reg_t &qubits) const {
} else { // there is no bit in chunk
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for private(i, j, k)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t i = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++) {
auto nr = std::real(Base::states_[i].qreg().norm());
int idx = 0;
@ -1002,7 +1003,7 @@ rvector_t Executor<state_t>::measure_probs(const reg_t &qubits) const {
} else {
for (i = 0; i < Base::states_.size(); i++) {
auto nr = std::real(Base::states_[i].qreg().norm());
int idx = 0;
uint_t idx = 0;
for (k = 0; k < qubits_out_chunk.size(); k++) {
if ((((i + Base::global_state_index_) << (BasePar::chunk_bits_)) >>
qubits_out_chunk[k]) &
@ -1058,14 +1059,14 @@ void Executor<state_t>::measure_reset_update(const std::vector<uint_t> &qubits,
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].apply_diagonal_matrix(qubits, mdiag);
}
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].apply_diagonal_matrix(qubits, mdiag);
}
@ -1085,14 +1086,14 @@ void Executor<state_t>::measure_reset_update(const std::vector<uint_t> &qubits,
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].apply_diagonal_matrix(qubits, mdiag);
}
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].apply_diagonal_matrix(qubits, mdiag);
}
@ -1120,20 +1121,20 @@ void Executor<state_t>::measure_reset_update(const std::vector<uint_t> &qubits,
// apply permutation to swap state
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].qreg().apply_matrix(qubits, perm);
}
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].qreg().apply_matrix(qubits, perm);
}
}
} else {
for (int_t i = 0; i < qubits.size(); i++) {
for (int_t i = 0; i < (int_t)qubits.size(); i++) {
if (((final_state >> i) & 1) != ((meas_state >> i) & 1)) {
BasePar::apply_chunk_x(qubits[i]);
}
@ -1147,7 +1148,7 @@ template <class state_t>
std::vector<reg_t> Executor<state_t>::sample_measure(const reg_t &qubits,
uint_t shots,
RngEngine &rng) const {
int_t i, j;
uint_t i, j;
// Generate flat register for storing
std::vector<double> rnds;
rnds.reserve(shots);
@ -1162,8 +1163,8 @@ std::vector<reg_t> Executor<state_t>::sample_measure(const reg_t &qubits,
// calculate per chunk sum
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++) {
bool batched = Base::states_[ic].qreg().enable_batch(
true); // return sum of all chunks in group
@ -1172,8 +1173,8 @@ std::vector<reg_t> Executor<state_t>::sample_measure(const reg_t &qubits,
}
}
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++) {
bool batched = Base::states_[ic].qreg().enable_batch(
true); // return sum of all chunks in group
@ -1271,9 +1272,9 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
auto apply_global_phase = [&tmp, &params_in, global_phase](int_t i) {
tmp[i] = params_in[i] * global_phase;
};
Utils::apply_omp_parallel_for((qubits.size() > Base::omp_qubit_threshold_),
0, params_in.size(), apply_global_phase,
Base::parallel_state_update_);
Utils::apply_omp_parallel_for(
(qubits.size() > (uint_t)Base::omp_qubit_threshold_), 0,
params_in.size(), apply_global_phase, Base::parallel_state_update_);
}
const cvector_t &params = tmp.empty() ? params_in : tmp;
if (qubits.size() == Base::num_qubits_) {
@ -1296,13 +1297,13 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
if (qubits_out_chunk.size() == 0) { // no qubits outside of chunk
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t i = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++)
Base::states_[i].qreg().initialize_component(qubits, params);
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
Base::states_[i].qreg().initialize_component(qubits, params);
}
} else {
@ -1311,16 +1312,16 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
// scatter inside chunks
const size_t dim = 1ULL << qubits_in_chunk.size();
cvector_t perm(dim * dim, 0.);
for (int_t i = 0; i < dim; i++) {
for (uint_t i = 0; i < dim; i++) {
perm[i] = 1.0;
}
if (BasePar::chunk_omp_parallel_) {
#pragma omp parallel for
for (int_t i = 0; i < Base::states_.size(); i++)
for (int_t i = 0; i < (int_t)Base::states_.size(); i++)
Base::states_[i].qreg().apply_matrix(qubits_in_chunk, perm);
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
Base::states_[i].qreg().apply_matrix(qubits_in_chunk, perm);
}
}
@ -1329,8 +1330,9 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
auto sorted_qubits_out = qubits_out_chunk;
std::sort(sorted_qubits_out.begin(), sorted_qubits_out.end());
for (int_t i = 0; i < (1ull << (Base::num_qubits_ - BasePar::chunk_bits_ -
qubits_out_chunk.size()));
for (uint_t i = 0;
i < (1ull << (Base::num_qubits_ - BasePar::chunk_bits_ -
qubits_out_chunk.size()));
i++) {
uint_t baseChunk = 0;
uint_t j, ii, t;
@ -1344,7 +1346,7 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
baseChunk >>= BasePar::chunk_bits_;
for (j = 1; j < (1ull << qubits_out_chunk.size()); j++) {
int_t ic = baseChunk;
uint_t ic = baseChunk;
for (t = 0; t < qubits_out_chunk.size(); t++) {
if ((j >> t) & 1)
ic += (1ull << (qubits_out_chunk[t] - BasePar::chunk_bits_));
@ -1385,13 +1387,13 @@ void Executor<state_t>::apply_initialize(const reg_t &qubits,
// initialize by params
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t i = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++)
Base::states_[i].qreg().apply_diagonal_matrix(qubits, params);
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
Base::states_[i].qreg().apply_diagonal_matrix(qubits, params);
}
}
@ -1402,7 +1404,7 @@ void Executor<state_t>::initialize_from_vector(const cvector_t &params) {
uint_t local_offset = Base::global_state_index_ << BasePar::chunk_bits_;
#pragma omp parallel for if (BasePar::chunk_omp_parallel_)
for (int_t i = 0; i < Base::states_.size(); i++) {
for (int_t i = 0; i < (int_t)Base::states_.size(); i++) {
// copy part of state for this chunk
cvector_t tmp(1ull << BasePar::chunk_bits_);
std::copy(params.begin() + local_offset + (i << BasePar::chunk_bits_),
@ -1443,13 +1445,13 @@ void Executor<state_t>::apply_kraus(const reg_t &qubits,
p = 0.0;
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for reduction(+ : p)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t i = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t i = Base::top_state_of_group_[ig];
i < Base::top_state_of_group_[ig + 1]; i++)
p += Base::states_[i].qreg().norm(qubits, vmat);
}
} else {
for (int_t i = 0; i < Base::states_.size(); i++)
for (uint_t i = 0; i < Base::states_.size(); i++)
p += Base::states_[i].qreg().norm(qubits, vmat);
}
@ -1465,14 +1467,14 @@ void Executor<state_t>::apply_kraus(const reg_t &qubits,
// apply Kraus projection operator
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].qreg().apply_matrix(qubits, vmat);
}
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].qreg().apply_matrix(qubits, vmat);
}
@ -1489,14 +1491,14 @@ void Executor<state_t>::apply_kraus(const reg_t &qubits,
auto vmat = Utils::vectorize_matrix(renorm * kmats.back());
if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].qreg().apply_matrix(qubits, vmat);
}
} else {
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ic = Base::top_state_of_group_[ig];
for (uint_t ig = 0; ig < Base::num_groups_; ig++) {
for (uint_t ic = Base::top_state_of_group_[ig];
ic < Base::top_state_of_group_[ig + 1]; ic++)
Base::states_[ic].qreg().apply_matrix(qubits, vmat);
}
@ -1513,7 +1515,7 @@ Executor<state_t>::sample_measure_with_prob(CircuitExecutor::Branch &root,
uint_t nshots = root.num_shots();
reg_t shot_branch(nshots);
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
shot_branch[i] = root.rng_shots()[i].rand_int(probs);
}
@ -1547,11 +1549,11 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
root.branches()[i]->add_op_after_branch(op);
if (final_state >= 0 && final_state != i) {
Operations::Op op;
op.type = OpType::gate;
op.name = "mcx";
op.qubits = qubits;
root.branches()[i]->add_op_after_branch(op);
Operations::Op op2;
op2.type = OpType::gate;
op2.name = "mcx";
op2.qubits = qubits;
root.branches()[i]->add_op_after_branch(op2);
}
}
}
@ -1559,7 +1561,7 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
else {
// Diagonal matrix for projecting and renormalizing to measurement outcome
const size_t dim = 1ULL << qubits.size();
for (int_t i = 0; i < dim; i++) {
for (uint_t i = 0; i < dim; i++) {
cvector_t mdiag(dim, 0.);
mdiag[i] = 1. / std::sqrt(meas_probs[i]);
@ -1569,20 +1571,20 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
op.params = mdiag;
root.branches()[i]->add_op_after_branch(op);
if (final_state >= 0 && final_state != i) {
if (final_state >= 0 && final_state != (int_t)i) {
// build vectorized permutation matrix
cvector_t perm(dim * dim, 0.);
perm[final_state * dim + i] = 1.;
perm[i * dim + final_state] = 1.;
for (size_t j = 0; j < dim; j++) {
if (j != final_state && j != i)
for (uint_t j = 0; j < dim; j++) {
if ((int_t)j != final_state && j != i)
perm[j * dim + j] = 1.;
}
Operations::Op op;
op.type = OpType::matrix;
op.qubits = qubits;
op.mats.push_back(Utils::devectorize_matrix(perm));
root.branches()[i]->add_op_after_branch(op);
Operations::Op op2;
op2.type = OpType::matrix;
op2.qubits = qubits;
op2.mats.push_back(Utils::devectorize_matrix(perm));
root.branches()[i]->add_op_after_branch(op2);
}
}
}
@ -1595,7 +1597,7 @@ void Executor<state_t>::apply_measure(CircuitExecutor::Branch &root,
rvector_t probs = sample_measure_with_prob(root, qubits);
// save result to cregs
for (int_t i = 0; i < probs.size(); i++) {
for (uint_t i = 0; i < probs.size(); i++) {
const reg_t outcome = Utils::int2reg(i, 2, qubits.size());
root.branches()[i]->creg().store_measure(outcome, cmemory, cregister);
}
@ -1624,9 +1626,9 @@ void Executor<state_t>::apply_initialize(CircuitExecutor::Branch &root,
auto apply_global_phase = [&tmp, params_in, global_phase](int_t i) {
tmp[i] = params_in[i] * global_phase;
};
Utils::apply_omp_parallel_for((qubits.size() > Base::omp_qubit_threshold_),
0, params_in.size(), apply_global_phase,
Base::parallel_state_update_);
Utils::apply_omp_parallel_for(
(qubits.size() > (uint_t)Base::omp_qubit_threshold_), 0,
params_in.size(), apply_global_phase, Base::parallel_state_update_);
}
const cvector_t &params = tmp.empty() ? params_in : tmp;
if (qubits.size() == Base::num_qubits_) {
@ -1648,7 +1650,7 @@ void Executor<state_t>::apply_initialize(CircuitExecutor::Branch &root,
op.name = "initialize";
op.qubits = qubits;
op.params = params;
for (int_t i = 0; i < root.num_branches(); i++) {
for (uint_t i = 0; i < root.num_branches(); i++) {
root.branches()[i]->add_op_after_branch(op);
}
return; // initialization will be done in next call because of shot
@ -1672,10 +1674,8 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
// So we only compute probabilities for the first N-1 kraus operators
// and infer the probability of the last one from 1 - sum of the previous
double r;
double accum = 0.;
double p;
bool complete = false;
reg_t shot_branch;
uint_t nshots;
@ -1685,7 +1685,7 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
nshots = root.num_shots();
shot_branch.resize(nshots);
rshots.resize(nshots);
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
shot_branch[i] = kmats.size() - 1;
rshots[i] = root.rng_shots()[i].rand(0., 1.);
}
@ -1701,7 +1701,7 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
// check if we need to apply this operator
pmats[j] = p;
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
if (shot_branch[i] >= kmats.size() - 1) {
if (accum > rshots[i]) {
shot_branch[i] = j;
@ -1710,23 +1710,21 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
}
}
if (nshots_multiplied >= nshots) {
complete = true;
break;
}
}
// check if we haven't applied a kraus operator yet
pmats[pmats.size() - 1] = 1. - accum;
root.creg() = Base::states_[root.state_index()].creg();
root.branch_shots(shot_branch, kmats.size());
for (int_t i = 0; i < kmats.size(); i++) {
for (uint_t i = 0; i < kmats.size(); i++) {
Operations::Op op;
op.type = OpType::matrix;
op.qubits = qubits;
op.mats.push_back(kmats[i]);
p = 1 / std::sqrt(pmats[i]);
for (int_t j = 0; j < op.mats[0].size(); j++)
for (uint_t j = 0; j < op.mats[0].size(); j++)
op.mats[0][j] *= p;
root.branches()[i]->add_op_after_branch(op);
}
@ -1748,7 +1746,7 @@ void Executor<state_t>::apply_save_density_matrix(CircuitExecutor::Branch &root,
}
std::vector<bool> copied(Base::num_bind_params_, false);
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -1771,7 +1769,7 @@ void Executor<state_t>::apply_save_probs(CircuitExecutor::Branch &root,
std::vector<bool> copied(Base::num_bind_params_, false);
if (op.type == Operations::OpType::save_probs_ket) {
// Convert to ket dict
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -1783,7 +1781,7 @@ void Executor<state_t>::apply_save_probs(CircuitExecutor::Branch &root,
}
}
} else {
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -1810,7 +1808,7 @@ void Executor<state_t>::apply_save_statevector(CircuitExecutor::Branch &root,
if (last_op) {
const auto v = Base::states_[root.state_index()].move_to_vector();
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
(result + ip)
->save_data_pershot(Base::states_[root.state_index()].creg(), key, v,
@ -1818,7 +1816,7 @@ void Executor<state_t>::apply_save_statevector(CircuitExecutor::Branch &root,
}
} else {
const auto v = Base::states_[root.state_index()].copy_to_vector();
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
(result + ip)
->save_data_pershot(Base::states_[root.state_index()].creg(), key, v,
@ -1841,7 +1839,7 @@ void Executor<state_t>::apply_save_statevector_dict(
for (auto const &it : state_ket) {
result_state_ket[it.first] = it.second;
}
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
(result + ip)
->save_data_pershot(
@ -1866,7 +1864,7 @@ void Executor<state_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
amps[i] =
Base::states_[root.state_index()].qreg().get_state(op.int_params[i]);
}
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
(result + ip)
->save_data_pershot(
@ -1880,7 +1878,7 @@ void Executor<state_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
op.int_params[i]);
}
std::vector<bool> copied(Base::num_bind_params_, false);
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -1898,7 +1896,7 @@ std::vector<reg_t>
Executor<state_t>::sample_measure(state_t &state, const reg_t &qubits,
uint_t shots,
std::vector<RngEngine> &rng) const {
int_t i, j;
uint_t i;
std::vector<double> rnds;
rnds.reserve(shots);

View File

@ -402,7 +402,6 @@ const stringmap_t<Gates> State<statevec_t>::gateset_(
template <class statevec_t>
void State<statevec_t>::initialize_qreg(uint_t num_qubits) {
int_t i;
initialize_omp();
BaseState::qreg_.set_num_qubits(num_qubits);
@ -426,8 +425,6 @@ void State<statevec_t>::initialize_statevector(uint_t num_qubits,
template <class statevec_t>
void State<statevec_t>::initialize_omp() {
uint_t i;
BaseState::qreg_.set_omp_threshold(omp_qubit_threshold_);
if (BaseState::threads_ > 0) // set allowed OMP threads in qubitvector
BaseState::qreg_.set_omp_threads(BaseState::threads_);
@ -701,7 +698,7 @@ cmatrix_t State<statevec_t>::vec2density(const reg_t &qubits, const T &vec) {
cmatrix_t densmat(DIM, DIM);
if ((N == BaseState::qreg_.num_qubits()) && (qubits == qubits_sorted)) {
const int_t mask = QV::MASKS[N];
#pragma omp parallel for if (2 * N > omp_qubit_threshold_ && \
#pragma omp parallel for if (2 * N > (size_t)omp_qubit_threshold_ && \
BaseState::threads_ > 1) \
num_threads(BaseState::threads_)
for (int_t rowcol = 0; rowcol < int_t(DIM * DIM); ++rowcol) {
@ -750,7 +747,7 @@ void State<statevec_t>::apply_gate(const Operations::Op &op) {
}
if (qubits_out.size() > 0) {
uint_t mask = 0;
for (int i = 0; i < qubits_out.size(); i++) {
for (uint_t i = 0; i < qubits_out.size(); i++) {
mask |= (1ull << (qubits_out[i] - BaseState::qreg_.num_qubits()));
}
if ((BaseState::qreg_.chunk_index() & mask) == mask) {
@ -1026,7 +1023,7 @@ template <class statevec_t>
std::vector<reg_t> State<statevec_t>::sample_measure(const reg_t &qubits,
uint_t shots,
RngEngine &rng) {
int_t i, j;
uint_t i;
// Generate flat register for storing
std::vector<double> rnds;
rnds.reserve(shots);
@ -1066,9 +1063,9 @@ void State<statevec_t>::apply_initialize(const reg_t &qubits,
auto apply_global_phase = [&tmp, &params_in, this](int_t i) {
tmp[i] = params_in[i] * BaseState::global_phase_;
};
Utils::apply_omp_parallel_for((qubits.size() > omp_qubit_threshold_), 0,
params_in.size(), apply_global_phase,
BaseState::threads_);
Utils::apply_omp_parallel_for(
(qubits.size() > (uint_t)omp_qubit_threshold_), 0, params_in.size(),
apply_global_phase, BaseState::threads_);
}
const cvector_t &params = tmp.empty() ? params_in : tmp;
if (qubits.size() == BaseState::qreg_.num_qubits()) {

View File

@ -244,9 +244,9 @@ void Transformer<Container, data_t>::apply_diagonal_matrix(
auto func = [&](const areg_t<2> &inds,
const cvector_t<data_t> &_diag) -> void {
for (int_t i = 0; i < 2; ++i) {
const int_t k = inds[i];
const uint_t k = inds[i];
int_t iv = 0;
for (int_t j = 0; j < N; j++)
for (uint_t j = 0; j < N; j++)
if ((k & (1ULL << qubits[j])) != 0)
iv += (1ULL << j);
if (_diag[iv] != (data_t)1.0)

View File

@ -177,7 +177,7 @@ void Tensor<data_t>::set_conj(const reg_t &qubits,
std::vector<std::complex<data_t>> &mat) {
set(qubits, mat);
for (int i = 0; i < tensor_.size(); i++)
for (uint_t i = 0; i < tensor_.size(); i++)
tensor_[i] = std::conj(tensor_[i]);
sp_tensor_ = true;
}

View File

@ -374,7 +374,7 @@ template <typename data_t>
TensorNet<data_t>::TensorNet(const TensorNet &obj) {}
template <typename data_t>
TensorNet<data_t>::~TensorNet() {
int i;
uint_t i;
for (i = 0; i < tensors_.size(); i++) {
tensors_[i].reset();
}
@ -417,7 +417,7 @@ void TensorNet<data_t>::buffer_statevector(void) const {
std::vector<int64_t> extents_out(num_qubits_);
// output tensor
for (int_t i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
modes_out[i] = modes_qubits_[i];
extents_out[i] = 2;
}
@ -464,9 +464,9 @@ TensorNet<data_t>::reduced_density_matrix(const reg_t &qubits) {
uint_t nqubits = qubits.size();
// connect qubits not to be reduced
for (int_t i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
bool check = false;
for (int_t j = 0; j < qubits.size(); j++) {
for (uint_t j = 0; j < qubits.size(); j++) {
if (i == qubits[j]) {
check = true;
break;
@ -491,7 +491,7 @@ TensorNet<data_t>::reduced_density_matrix(const reg_t &qubits) {
std::vector<std::complex<data_t>> trace;
// output tensor
for (int_t i = 0; i < nqubits; i++) {
for (uint_t i = 0; i < nqubits; i++) {
modes_out[i] = modes_qubits_[qubits[i]];
modes_out[i + nqubits] = modes_qubits_sp_[qubits[i]];
extents_out[i] = 2;
@ -505,9 +505,9 @@ TensorNet<data_t>::reduced_density_matrix(const reg_t &qubits) {
delete contractor;
// recover connectted qubits
for (int_t i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
bool check = false;
for (int_t j = 0; j < qubits.size(); j++) {
for (uint_t j = 0; j < qubits.size(); j++) {
if (i == qubits[j]) {
check = true;
break;
@ -538,7 +538,7 @@ void TensorNet<data_t>::initialize_component(const reg_t &qubits,
statevector_.clear(); // invalidate statevector buffer
cvector_t<data_t> state(state0.size());
for (int_t i = 0; i < state0.size(); i++)
for (uint_t i = 0; i < state0.size(); i++)
state[i] = (std::complex<data_t>)state0[i];
tensors_.push_back(std::make_shared<Tensor<data_t>>());
@ -547,7 +547,7 @@ void TensorNet<data_t>::initialize_component(const reg_t &qubits,
tensors_.push_back(std::make_shared<Tensor<data_t>>());
tensors_[last + 1]->set_conj(qubits, state);
for (int i = 0; i < qubits.size(); i++) {
for (uint_t i = 0; i < qubits.size(); i++) {
modes_qubits_[qubits[i]] = mode_index_;
tensors_[last]->modes()[i] = mode_index_++;
qubits_[qubits[i]] = tensors_[last];
@ -584,7 +584,7 @@ void TensorNet<data_t>::add_tensor(const reg_t &qubits,
tensors_.push_back(std::make_shared<Tensor<data_t>>());
uint_t last = tensors_.size() - 1;
tensors_[last]->set(qubits, mat);
for (int i = 0; i < qubits.size(); i++) {
for (uint_t i = 0; i < qubits.size(); i++) {
tensors_[last]->modes()[i] = modes_qubits_[qubits[i]];
modes_qubits_[qubits[i]] = mode_index_;
tensors_[last]->modes()[qubits.size() + i] = mode_index_++;
@ -594,7 +594,7 @@ void TensorNet<data_t>::add_tensor(const reg_t &qubits,
tensors_.push_back(std::make_shared<Tensor<data_t>>());
last++;
tensors_[last]->set_conj(qubits, mat);
for (int i = 0; i < qubits.size(); i++) {
for (uint_t i = 0; i < qubits.size(); i++) {
tensors_[last]->modes()[i] = modes_qubits_sp_[qubits[i]];
modes_qubits_sp_[qubits[i]] = mode_index_;
tensors_[last]->modes()[qubits.size() + i] = mode_index_++;
@ -614,13 +614,13 @@ void TensorNet<data_t>::add_superop_tensor(
uint_t last = tensors_.size() - 1;
tensors_[last]->set(qubits, mat);
for (int i = 0; i < size; i++) {
for (uint_t i = 0; i < size; i++) {
tensors_[last]->modes()[i] = modes_qubits_[qubits[i]];
modes_qubits_[qubits[i]] = mode_index_;
tensors_[last]->modes()[size * 2 + i] = mode_index_++;
qubits_[qubits[i]] = tensors_[last];
}
for (int i = 0; i < size; i++) {
for (uint_t i = 0; i < size; i++) {
tensors_[last]->modes()[size + i] = modes_qubits_sp_[qubits[i]];
modes_qubits_sp_[qubits[i]] = mode_index_;
tensors_[last]->modes()[size * 3 + i] = mode_index_++;
@ -636,7 +636,7 @@ void TensorNet<data_t>::add_superop_tensor(
template <typename data_t>
void TensorNet<data_t>::initialize() {
int i;
uint_t i;
if (statevector_.size() > 0)
statevector_.clear(); // invalidate statevector buffer
@ -658,7 +658,7 @@ void TensorNet<data_t>::initialize() {
for (i = 0; i < num_qubits_; i++) {
tensors_.push_back(std::make_shared<Tensor<data_t>>());
uint_t last = tensors_.size() - 1;
tensors_[last]->set({i}, init);
tensors_[last]->set({(int)i}, init);
modes_qubits_[i] = mode_index_;
tensors_[last]->modes()[0] = mode_index_++;
@ -667,7 +667,7 @@ void TensorNet<data_t>::initialize() {
for (i = 0; i < num_qubits_; i++) { // for super qubits
tensors_.push_back(std::make_shared<Tensor<data_t>>());
uint_t last = tensors_.size() - 1;
tensors_[last]->set({i}, init);
tensors_[last]->set({(int)i}, init);
modes_qubits_sp_[i] = mode_index_;
tensors_[last]->modes()[0] = mode_index_++;
@ -700,19 +700,19 @@ void TensorNet<data_t>::initialize(const TensorNet<data_t> &obj) {
template <typename data_t>
void TensorNet<data_t>::initialize_from_matrix(const cmatrix_t &matrix0) {
cvector_t<data_t> matrix(matrix0.size());
for (int_t i = 0; i < matrix0.size(); i++)
for (uint_t i = 0; i < matrix0.size(); i++)
matrix[i] = (std::complex<data_t>)matrix0[i];
tensors_.push_back(std::make_shared<Tensor<data_t>>());
uint_t last = tensors_.size() - 1;
tensors_[last]->set(num_qubits_, matrix);
for (int i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
modes_qubits_[i] = mode_index_++;
tensors_[last]->modes()[i] = modes_qubits_[i];
qubits_[i] = tensors_[last];
}
for (int i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
modes_qubits_sp_[i] = mode_index_++;
tensors_[last]->modes()[i + num_qubits_] = modes_qubits_sp_[i];
qubits_sp_[i] = tensors_[last];
@ -772,7 +772,6 @@ void TensorNet<data_t>::apply_multiplexer(const reg_t &control_qubits,
for (const auto &q : control_qubits) {
qubits.push_back(q);
}
size_t N = qubits.size();
cvector_t<double> matMP(DIM * DIM, 0.0);
uint_t b, i, j;
@ -794,11 +793,10 @@ template <typename data_t>
void TensorNet<data_t>::apply_diagonal_matrix(const reg_t &qubits,
const cvector_t<double> &diag) {
cvector_t<data_t> mat(diag.size() * diag.size(), 0.0);
for (int_t i = 0; i < diag.size(); i++) {
for (uint_t i = 0; i < diag.size(); i++) {
mat[i * (diag.size() + 1)] = diag[i];
}
Tensor<data_t> *t = new Tensor<data_t>;
add_tensor(qubits, mat);
}
@ -806,7 +804,7 @@ template <typename data_t>
void TensorNet<data_t>::apply_diagonal_superop_matrix(
const reg_t &qubits, const cvector_t<double> &diag) {
cvector_t<data_t> mat(diag.size() * diag.size(), 0.0);
for (int_t i = 0; i < diag.size(); i++) {
for (uint_t i = 0; i < diag.size(); i++) {
mat[i * (diag.size() + 1)] = diag[i];
}
add_superop_tensor(qubits, mat);
@ -833,7 +831,7 @@ void TensorNet<data_t>::apply_mcx(const reg_t &qubits) {
reg_t qubits_t;
qubits_t.push_back(qubits[qubits.size() - 1]);
for (int i = 0; i < qubits.size() - 1; i++)
for (uint_t i = 0; i < qubits.size() - 1; i++)
qubits_t.push_back(qubits[i]);
add_tensor(qubits_t, mat);
@ -850,7 +848,7 @@ void TensorNet<data_t>::apply_mcy(const reg_t &qubits) {
reg_t qubits_t;
qubits_t.push_back(qubits[qubits.size() - 1]);
for (int i = 0; i < qubits.size() - 1; i++)
for (uint_t i = 0; i < qubits.size() - 1; i++)
qubits_t.push_back(qubits[i]);
add_tensor(qubits_t, mat);
@ -869,7 +867,7 @@ void TensorNet<data_t>::apply_mcswap(const reg_t &qubits) {
reg_t qubits_t;
qubits_t.push_back(qubits[qubits.size() - 2]);
qubits_t.push_back(qubits[qubits.size() - 1]);
for (int i = 0; i < qubits.size() - 2; i++)
for (uint_t i = 0; i < qubits.size() - 2; i++)
qubits_t.push_back(qubits[i]);
add_tensor(qubits_t, mat);
@ -886,7 +884,7 @@ void TensorNet<data_t>::apply_mcphase(const reg_t &qubits,
reg_t qubits_t;
qubits_t.push_back(qubits[qubits.size() - 1]);
for (int i = 0; i < qubits.size() - 1; i++)
for (uint_t i = 0; i < qubits.size() - 1; i++)
qubits_t.push_back(qubits[i]);
add_tensor(qubits_t, mat);
@ -907,7 +905,7 @@ void TensorNet<data_t>::apply_mcu(const reg_t &qubits,
reg_t qubits_t;
qubits_t.push_back(qubits[qubits.size() - 1]);
for (int i = 0; i < qubits.size() - 1; i++)
for (uint_t i = 0; i < qubits.size() - 1; i++)
qubits_t.push_back(qubits[i]);
add_tensor(qubits_t, matR);
@ -951,7 +949,7 @@ void TensorNet<data_t>::apply_rotation(const reg_t &qubits, const Rotation r,
template <typename data_t>
double TensorNet<data_t>::norm() const {
// connect qubits not used for trace
for (int_t i = 1; i < num_qubits_; i++) {
for (uint_t i = 1; i < num_qubits_; i++) {
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) {
qubits_sp_[i]->modes()[j] = modes_qubits_[i];
@ -980,7 +978,7 @@ double TensorNet<data_t>::norm() const {
delete contractor;
// restore connected qubits
for (int_t i = 1; i < num_qubits_; i++) {
for (uint_t i = 1; i < num_qubits_; i++) {
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
if (qubits_sp_[i]->modes()[j] == modes_qubits_[i]) {
qubits_sp_[i]->modes()[j] = modes_qubits_sp_[i];
@ -1002,26 +1000,26 @@ double TensorNet<data_t>::norm(const reg_t &qubits,
// additional matrix
std::vector<std::complex<data_t>> mat_t(mat.size());
for (int_t i = 0; i < mat.size(); i++)
for (uint_t i = 0; i < mat.size(); i++)
mat_t[i] = mat[i];
mat_tensors[0] = std::make_shared<Tensor<data_t>>();
mat_tensors[0]->set(qubits, mat_t);
for (int i = 0; i < qubits.size(); i++) {
for (uint_t i = 0; i < qubits.size(); i++) {
mat_tensors[0]->modes()[i] = tmp_modes[qubits[i]];
tmp_modes[qubits[i]] = tmp_index;
mat_tensors[0]->modes()[qubits.size() + i] = tmp_index++;
}
mat_tensors[1] = std::make_shared<Tensor<data_t>>();
mat_tensors[1]->set_conj(qubits, mat_t);
for (int i = 0; i < qubits.size(); i++) {
for (uint_t i = 0; i < qubits.size(); i++) {
mat_tensors[1]->modes()[i] = tmp_modes_sp[qubits[i]];
tmp_modes_sp[qubits[i]] = tmp_index;
mat_tensors[1]->modes()[qubits.size() + i] = tmp_index++;
}
// connect qubits not used for trace
for (int_t i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
if (i != qubits[0]) {
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) {
@ -1054,7 +1052,7 @@ double TensorNet<data_t>::norm(const reg_t &qubits,
delete contractor;
// restore connected qubits
for (int_t i = 1; i < num_qubits_; i++) {
for (uint_t i = 1; i < num_qubits_; i++) {
if (i != qubits[0]) {
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
if (qubits_sp_[i]->modes()[j] == tmp_modes[i]) {
@ -1085,7 +1083,7 @@ double TensorNet<data_t>::probability(const uint_t outcome) const {
template <typename data_t>
std::vector<double> TensorNet<data_t>::probabilities() const {
reg_t qubits(num_qubits_);
for (int_t i = 0; i < num_qubits_; i++)
for (uint_t i = 0; i < num_qubits_; i++)
qubits[i] = i;
return probabilities(qubits);
}
@ -1099,9 +1097,9 @@ TensorNet<data_t>::probabilities(const reg_t &qubits) const {
std::vector<int64_t> extents_out(nqubits * 2);
std::vector<std::complex<data_t>> trace;
// connect qubits not to be measured
for (int_t i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
bool check = false;
for (int_t j = 0; j < qubits.size(); j++) {
for (uint_t j = 0; j < qubits.size(); j++) {
if (i == qubits[j]) {
check = true;
break;
@ -1122,7 +1120,7 @@ TensorNet<data_t>::probabilities(const reg_t &qubits) const {
contractor->set_network(tensors_);
// output tensor
for (int_t i = 0; i < nqubits; i++) {
for (uint_t i = 0; i < nqubits; i++) {
modes_out[i] = modes_qubits_[qubits[i]];
modes_out[i + nqubits] = modes_qubits_sp_[qubits[i]];
extents_out[i] = 2;
@ -1147,9 +1145,9 @@ TensorNet<data_t>::probabilities(const reg_t &qubits) const {
delete contractor;
// recover connected qubits
for (int_t i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
bool check = false;
for (int_t j = 0; j < qubits.size(); j++) {
for (uint_t j = 0; j < qubits.size(); j++) {
if (i == qubits[j]) {
check = true;
break;
@ -1201,7 +1199,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
const reg_t &input_shot_index,
const reg_t &input_measured_probs,
const uint_t pos_measured) const {
const int_t SHOTS = rnds.size();
const uint_t SHOTS = rnds.size();
/*---------------------------------------------------------------------------
| cccccccccccc | oooooooooooooo | ************** | xxxxxxxxxxxxxx |
@ -1233,7 +1231,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
// output tensor
std::vector<int32_t> modes_out(nqubits * 2);
std::vector<int64_t> extents_out(nqubits * 2);
for (int_t i = 0; i < nqubits; i++) {
for (uint_t i = 0; i < nqubits; i++) {
modes_out[i] = modes_qubits_[pos_measured - nqubits + i];
modes_out[i + nqubits] = modes_qubits_sp_[pos_measured - nqubits + i];
extents_out[i] = 2;
@ -1245,7 +1243,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
// connect qubits not to be measured
if (pos_measured - nqubits > 0) {
for (int_t i = 0; i < pos_measured - nqubits; i++) {
for (uint_t i = 0; i < pos_measured - nqubits; i++) {
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) {
qubits_sp_[i]->modes()[j] = modes_qubits_[i];
@ -1266,7 +1264,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
shots[0] = rnds;
shot_index[0] = input_shot_index;
} else {
for (int_t i = 0; i < SHOTS; i++) {
for (uint_t i = 0; i < SHOTS; i++) {
shots[input_sample_index[i]].push_back(rnds[i]);
shot_index[input_sample_index[i]].push_back(input_shot_index[i]);
}
@ -1276,7 +1274,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
std::vector<std::shared_ptr<Tensor<data_t>>> measured_tensors;
if (measured_qubits > 0) {
measured_tensors.resize(measured_qubits * 2);
for (int_t i = 0; i < measured_qubits; i++) {
for (uint_t i = 0; i < measured_qubits; i++) {
std::vector<std::complex<data_t>> prob(2, 0.0);
prob[input_measured_probs[pos_measured + i]] = 1.0;
measured_tensors[i * 2] = std::make_shared<Tensor<data_t>>();
@ -1293,11 +1291,11 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
// 1st loop, sampling each branch before traversing branches to reuse tensor
// network
for (int_t ib = 0; ib < num_branches; ib++) {
for (uint_t ib = 0; ib < num_branches; ib++) {
if (shots[ib].size() > 0) {
if (nqubits_branch > 0) {
// tensors for measuredirmed probabilities
for (int_t i = 0; i < nqubits_branch; i++) {
for (uint_t i = 0; i < nqubits_branch; i++) {
std::vector<std::complex<data_t>> prob(2, 0.0);
if (((ib >> i) & 1) == 0)
prob[0] = 1.0;
@ -1317,7 +1315,7 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
// recover connected qubits
if (pos_measured - nqubits > 0) {
for (int_t i = 0; i < pos_measured - nqubits; i++) {
for (uint_t i = 0; i < pos_measured - nqubits; i++) {
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
if (qubits_sp_[i]->modes()[j] == modes_qubits_[i]) {
qubits_sp_[i]->modes()[j] = modes_qubits_sp_[i];
@ -1326,16 +1324,16 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
}
}
}
for (int_t i = 0; i < measured_tensors.size(); i++)
for (uint_t i = 0; i < measured_tensors.size(); i++)
measured_tensors[i].reset();
delete contractor;
// 2nd loop traverse branches
if (pos_measured - nqubits > 0) {
for (int_t ib = 0; ib < num_branches; ib++) {
for (uint_t ib = 0; ib < num_branches; ib++) {
if (shots[ib].size() > 0) {
reg_t measured_probs = input_measured_probs;
for (int_t i = 0; i < nqubits_branch; i++)
for (uint_t i = 0; i < nqubits_branch; i++)
measured_probs[pos_measured + i] = ((ib >> i) & 1);
sample_measure_branch(samples, shots[ib], sample_index[ib],
@ -1345,15 +1343,15 @@ void TensorNet<data_t>::sample_measure_branch(std::vector<reg_t> &samples,
}
} else {
// save samples
for (int_t ib = 0; ib < num_branches; ib++) {
for (uint_t ib = 0; ib < num_branches; ib++) {
if (shots[ib].size() > 0) {
reg_t sample = input_measured_probs;
for (int_t i = 0; i < nqubits_branch; i++)
for (uint_t i = 0; i < nqubits_branch; i++)
sample[pos_measured + i] = ((ib >> i) & 1);
for (int_t i = 0; i < shots[ib].size(); i++) {
for (uint_t i = 0; i < shots[ib].size(); i++) {
uint_t shot_id = shot_index[ib][i];
samples[shot_id] = sample;
for (int_t j = 0; j < nqubits; j++) {
for (uint_t j = 0; j < nqubits; j++) {
samples[shot_id][j] = ((sample_index[ib][i] >> j) & 1);
}
}
@ -1385,7 +1383,7 @@ double TensorNet<data_t>::expval_pauli(const reg_t &qubits,
mat_phase[3] = initial_phase;
// add Pauli ops to qubits
for (int_t i = 0; i < size; i++) {
for (uint_t i = 0; i < size; i++) {
cvector_t<data_t> mat(4, 0.0);
switch (pauli[size - 1 - i]) {
@ -1421,7 +1419,7 @@ double TensorNet<data_t>::expval_pauli(const reg_t &qubits,
}
// connect qubits not used for trace
for (int_t i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
if (i != qubits[0]) {
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) {
@ -1454,7 +1452,7 @@ double TensorNet<data_t>::expval_pauli(const reg_t &qubits,
delete contractor;
// restore connected qubits
for (int_t i = 0; i < num_qubits_; i++) {
for (uint_t i = 0; i < num_qubits_; i++) {
if (i != qubits[0]) {
for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) {
if (qubits_sp_[i]->modes()[j] == tmp_modes[i]) {
@ -1465,7 +1463,7 @@ double TensorNet<data_t>::expval_pauli(const reg_t &qubits,
}
}
for (int_t i = 0; i < pauli_tensors.size(); i++) {
for (uint_t i = 0; i < pauli_tensors.size(); i++) {
pauli_tensors[i].reset();
}

View File

@ -84,6 +84,7 @@ protected:
uint_t tensor_size_;
uint_t additional_tensor_size_;
uint_t out_size_;
uint_t work_size_limit_;
uint_t work_size_;
uint_t sampling_buffer_size_;
@ -484,6 +485,12 @@ uint_t RawTensorData<data_t>::optimize_contraction(void) {
cutensornetStatus_t err;
cudaSetDevice(device_id_);
size_t freeMem, totalMem;
int nid = omp_get_num_threads();
HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem));
work_size_limit_ = (freeMem / nid) * 0.9;
/*******************************
* Find "optimal" contraction order and slicing
*******************************/
@ -510,7 +517,7 @@ uint_t RawTensorData<data_t>::optimize_contraction(void) {
cutensornetGetErrorString(err));
err = cutensornetContractionOptimize(hTensorNet_, tn_desc_, optimizer_config_,
work_size_, optimizer_info_);
work_size_limit_, optimizer_info_);
if (err != CUTENSORNET_STATUS_SUCCESS)
assert_error("cutensornetContractionOptimize",
cutensornetGetErrorString(err));
@ -540,27 +547,26 @@ void RawTensorData<data_t>::create_contraction_plan(bool use_autotune) {
assert_error("cutensornetCreateWorkspaceDescriptor",
cutensornetGetErrorString(err));
uint64_t requiredWorkspaceSize = 0;
err = cutensornetWorkspaceComputeSizes(hTensorNet_, tn_desc_, optimizer_info_,
work_desc_);
int64_t requiredWorkspaceSize = 0;
err = cutensornetWorkspaceComputeContractionSizes(
hTensorNet_, tn_desc_, optimizer_info_, work_desc_);
if (err != CUTENSORNET_STATUS_SUCCESS)
assert_error("cutensornetWorkspaceComputeSizes",
cutensornetGetErrorString(err));
err = cutensornetWorkspaceGetSize(
err = cutensornetWorkspaceGetMemorySize(
hTensorNet_, work_desc_, CUTENSORNET_WORKSIZE_PREF_MIN,
CUTENSORNET_MEMSPACE_DEVICE, &requiredWorkspaceSize);
CUTENSORNET_MEMSPACE_DEVICE, CUTENSORNET_WORKSPACE_SCRATCH,
&requiredWorkspaceSize);
if (err != CUTENSORNET_STATUS_SUCCESS)
assert_error("cutensornetWorkspaceGetSize", cutensornetGetErrorString(err));
if (work_size_ < requiredWorkspaceSize) {
throw std::runtime_error("ERROR : TensorNet::contractor required memory "
"size for workspace is not enough");
}
allocate_work(requiredWorkspaceSize);
err = cutensornetWorkspaceSet(
err = cutensornetWorkspaceSetMemory(
hTensorNet_, work_desc_, CUTENSORNET_MEMSPACE_DEVICE,
thrust::raw_pointer_cast(dev_work_.data()), work_size_);
CUTENSORNET_WORKSPACE_SCRATCH, thrust::raw_pointer_cast(dev_work_.data()),
work_size_);
if (err != CUTENSORNET_STATUS_SUCCESS)
assert_error("cutensornetWorkspaceSet", cutensornetGetErrorString(err));
@ -967,8 +973,6 @@ void TensorNetContractor_cuTensorNet<data_t>::allocate_additional_tensors(
template <typename data_t>
void TensorNetContractor_cuTensorNet<data_t>::set_additional_tensors(
const std::vector<std::shared_ptr<Tensor<data_t>>> &tensors) {
uint_t size = 0;
remove_additional_tensors();
num_additional_tensors_ = tensors.size();
@ -1021,10 +1025,6 @@ void TensorNetContractor_cuTensorNet<data_t>::set_output(
template <typename data_t>
void TensorNetContractor_cuTensorNet<data_t>::setup_contraction(
bool use_autotune) {
int nid = omp_get_num_threads();
cutensornetStatus_t err;
size_t freeMem, totalMem;
uint_t work_size;
// for MPI distribution
#ifdef AER_MPI
@ -1032,14 +1032,6 @@ void TensorNetContractor_cuTensorNet<data_t>::setup_contraction(
MPI_Comm_rank(MPI_COMM_WORLD, &myrank_);
#endif
// allocate work buffer on GPU
if (!tensor_data_[0].work_allocated()) {
cudaSetDevice(target_gpus_[0]);
HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem));
work_size = (freeMem / nid) * 0.9;
tensor_data_[0].allocate_work(work_size);
}
num_devices_used_ = 1;
// setup first device
@ -1060,12 +1052,6 @@ void TensorNetContractor_cuTensorNet<data_t>::setup_contraction(
if (ns > 0) {
// setup for the device
if (!tensor_data_[i].work_allocated()) {
cudaSetDevice(target_gpus_[i]);
HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem));
work_size = (freeMem / nid) * 0.9;
tensor_data_[i].allocate_work(work_size);
}
tensor_data_[i].copy_tensors_from_device(
tensor_data_[0]); // copy data from the first device
tensor_data_[i].create_contraction_descriptor(

View File

@ -37,6 +37,7 @@ using ResultItr = std::vector<ExperimentResult>::iterator;
template <class state_t>
class Executor : public CircuitExecutor::MultiStateExecutor<state_t> {
using Base = CircuitExecutor::MultiStateExecutor<state_t>;
using Base::sample_measure;
protected:
public:
@ -148,7 +149,7 @@ Executor<state_t>::sample_measure_with_prob(CircuitExecutor::Branch &root,
uint_t nshots = root.num_shots();
reg_t shot_branch(nshots);
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
shot_branch[i] = root.rng_shots()[i].rand_int(probs);
}
@ -182,11 +183,11 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
root.branches()[i]->add_op_after_branch(op);
if (final_state >= 0 && final_state != i) {
Operations::Op op;
op.type = OpType::gate;
op.name = "mcx";
op.qubits = qubits;
root.branches()[i]->add_op_after_branch(op);
Operations::Op op2;
op2.type = OpType::gate;
op2.name = "mcx";
op2.qubits = qubits;
root.branches()[i]->add_op_after_branch(op2);
}
}
}
@ -194,7 +195,7 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
else {
// Diagonal matrix for projecting and renormalizing to measurement outcome
const size_t dim = 1ULL << qubits.size();
for (int_t i = 0; i < dim; i++) {
for (uint_t i = 0; i < dim; i++) {
cvector_t<double> mdiag(dim, 0.);
mdiag[i] = 1. / std::sqrt(meas_probs[i]);
@ -204,20 +205,20 @@ void Executor<state_t>::measure_reset_update(CircuitExecutor::Branch &root,
op.params = mdiag;
root.branches()[i]->add_op_after_branch(op);
if (final_state >= 0 && final_state != i) {
if (final_state >= 0 && final_state != (int_t)i) {
// build vectorized permutation matrix
cvector_t<double> perm(dim * dim, 0.);
perm[final_state * dim + i] = 1.;
perm[i * dim + final_state] = 1.;
for (size_t j = 0; j < dim; j++) {
if (j != final_state && j != i)
if (j != (size_t)final_state && j != i)
perm[j * dim + j] = 1.;
}
Operations::Op op;
op.type = OpType::matrix;
op.qubits = qubits;
op.mats.push_back(Utils::devectorize_matrix(perm));
root.branches()[i]->add_op_after_branch(op);
Operations::Op op2;
op2.type = OpType::matrix;
op2.qubits = qubits;
op2.mats.push_back(Utils::devectorize_matrix(perm));
root.branches()[i]->add_op_after_branch(op2);
}
}
}
@ -230,7 +231,7 @@ void Executor<state_t>::apply_measure(CircuitExecutor::Branch &root,
rvector_t probs = sample_measure_with_prob(root, qubits);
// save result to cregs
for (int_t i = 0; i < probs.size(); i++) {
for (uint_t i = 0; i < probs.size(); i++) {
const reg_t outcome = Utils::int2reg(i, 2, qubits.size());
root.branches()[i]->creg().store_measure(outcome, cmemory, cregister);
}
@ -259,9 +260,9 @@ void Executor<state_t>::apply_initialize(CircuitExecutor::Branch &root,
auto apply_global_phase = [&tmp, params_in, global_phase](int_t i) {
tmp[i] = params_in[i] * global_phase;
};
Utils::apply_omp_parallel_for((qubits.size() > Base::omp_qubit_threshold_),
0, params_in.size(), apply_global_phase,
Base::parallel_state_update_);
Utils::apply_omp_parallel_for(
(qubits.size() > (uint_t)Base::omp_qubit_threshold_), 0,
params_in.size(), apply_global_phase, Base::parallel_state_update_);
}
const cvector_t<double> &params = tmp.empty() ? params_in : tmp;
if (qubits.size() == Base::num_qubits_) {
@ -283,7 +284,7 @@ void Executor<state_t>::apply_initialize(CircuitExecutor::Branch &root,
op.name = "initialize";
op.qubits = qubits;
op.params = params;
for (int_t i = 0; i < root.num_branches(); i++) {
for (uint_t i = 0; i < root.num_branches(); i++) {
root.branches()[i]->add_op_after_branch(op);
}
return; // initialization will be done in next call because of shot
@ -307,10 +308,8 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
// So we only compute probabilities for the first N-1 kraus operators
// and infer the probability of the last one from 1 - sum of the previous
double r;
double accum = 0.;
double p;
bool complete = false;
reg_t shot_branch;
uint_t nshots;
@ -320,7 +319,7 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
nshots = root.num_shots();
shot_branch.resize(nshots);
rshots.resize(nshots);
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
shot_branch[i] = kmats.size() - 1;
rshots[i] = root.rng_shots()[i].rand(0., 1.);
}
@ -336,7 +335,7 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
// check if we need to apply this operator
pmats[j] = p;
for (int_t i = 0; i < nshots; i++) {
for (uint_t i = 0; i < nshots; i++) {
if (shot_branch[i] >= kmats.size() - 1) {
if (accum > rshots[i]) {
shot_branch[i] = j;
@ -345,7 +344,6 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
}
}
if (nshots_multiplied >= nshots) {
complete = true;
break;
}
}
@ -355,13 +353,13 @@ void Executor<state_t>::apply_kraus(CircuitExecutor::Branch &root,
root.creg() = Base::states_[root.state_index()].creg();
root.branch_shots(shot_branch, kmats.size());
for (int_t i = 0; i < kmats.size(); i++) {
for (uint_t i = 0; i < kmats.size(); i++) {
Operations::Op op;
op.type = OpType::matrix;
op.qubits = qubits;
op.mats.push_back(kmats[i]);
p = 1 / std::sqrt(pmats[i]);
for (int_t j = 0; j < op.mats[0].size(); j++)
for (uint_t j = 0; j < op.mats[0].size(); j++)
op.mats[0][j] *= p;
root.branches()[i]->add_op_after_branch(op);
}
@ -385,7 +383,7 @@ void Executor<state_t>::apply_save_density_matrix(CircuitExecutor::Branch &root,
}
std::vector<bool> copied(Base::num_bind_params_, false);
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -408,7 +406,7 @@ void Executor<state_t>::apply_save_probs(CircuitExecutor::Branch &root,
std::vector<bool> copied(Base::num_bind_params_, false);
if (op.type == Operations::OpType::save_probs_ket) {
// Convert to ket dict
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -420,7 +418,7 @@ void Executor<state_t>::apply_save_probs(CircuitExecutor::Branch &root,
}
}
} else {
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -447,7 +445,7 @@ void Executor<state_t>::apply_save_statevector(CircuitExecutor::Branch &root,
if (last_op) {
const auto v = Base::states_[root.state_index()].move_to_vector();
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
(result + ip)
->save_data_pershot(Base::states_[root.state_index()].creg(), key, v,
@ -455,7 +453,7 @@ void Executor<state_t>::apply_save_statevector(CircuitExecutor::Branch &root,
}
} else {
const auto v = Base::states_[root.state_index()].copy_to_vector();
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
(result + ip)
->save_data_pershot(Base::states_[root.state_index()].creg(), key, v,
@ -478,7 +476,7 @@ void Executor<state_t>::apply_save_statevector_dict(
for (auto const &it : state_ket) {
result_state_ket[it.first] = it.second;
}
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
(result + ip)
->save_data_pershot(
@ -496,14 +494,14 @@ void Executor<state_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
throw std::invalid_argument(
"Invalid save_amplitudes instructions (empty params).");
}
const int_t size = op.int_params.size();
const uint_t size = op.int_params.size();
if (op.type == Operations::OpType::save_amps) {
Vector<complex_t> amps(size, false);
for (int_t i = 0; i < size; ++i) {
for (uint_t i = 0; i < size; ++i) {
amps[i] =
Base::states_[root.state_index()].qreg().get_state(op.int_params[i]);
}
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
(result + ip)
->save_data_pershot(
@ -512,12 +510,12 @@ void Executor<state_t>::apply_save_amplitudes(CircuitExecutor::Branch &root,
}
} else {
rvector_t amps_sq(size, 0);
for (int_t i = 0; i < size; ++i) {
for (uint_t i = 0; i < size; ++i) {
amps_sq[i] = Base::states_[root.state_index()].qreg().probability(
op.int_params[i]);
}
std::vector<bool> copied(Base::num_bind_params_, false);
for (int_t i = 0; i < root.num_shots(); i++) {
for (uint_t i = 0; i < root.num_shots(); i++) {
uint_t ip = root.param_index(i);
if (!copied[ip]) {
(result + ip)
@ -539,23 +537,23 @@ Executor<state_t>::sample_measure(state_t &state, const reg_t &qubits,
std::vector<double> rnds;
rnds.reserve(shots);
for (i = 0; i < shots; ++i)
for (i = 0; i < (int_t)shots; ++i)
rnds.push_back(rng[i].rand(0, 1));
std::vector<reg_t> samples = state.qreg().sample_measure(rnds);
std::vector<reg_t> ret(shots);
if (omp_get_num_threads() > 1) {
for (i = 0; i < shots; ++i) {
for (i = 0; i < (int_t)shots; ++i) {
ret[i].resize(qubits.size());
for (j = 0; j < qubits.size(); j++)
for (j = 0; j < (int_t)qubits.size(); j++)
ret[i][j] = samples[i][qubits[j]];
}
} else {
#pragma omp parallel for private(j)
for (i = 0; i < shots; ++i) {
for (i = 0; i < (int_t)shots; ++i) {
ret[i].resize(qubits.size());
for (j = 0; j < qubits.size(); j++)
for (j = 0; j < (int_t)qubits.size(); j++)
ret[i][j] = samples[i][qubits[j]];
}
}

View File

@ -899,27 +899,26 @@ template <class tensor_net_t>
std::vector<reg_t> State<tensor_net_t>::sample_measure(const reg_t &qubits,
uint_t shots,
RngEngine &rng) {
int_t i, j;
// Generate flat register for storing
std::vector<double> rnds(shots);
for (i = 0; i < shots; ++i)
for (uint_t i = 0; i < shots; ++i)
rnds[i] = rng.rand(0, 1);
std::vector<reg_t> samples = BaseState::qreg_.sample_measure(rnds);
std::vector<reg_t> ret(shots);
if (omp_get_num_threads() > 1) {
for (i = 0; i < shots; ++i) {
for (uint_t i = 0; i < shots; ++i) {
ret[i].resize(qubits.size());
for (j = 0; j < qubits.size(); j++)
for (uint_t j = 0; j < qubits.size(); j++)
ret[i][j] = samples[i][qubits[j]];
}
} else {
#pragma omp parallel for private(j)
for (i = 0; i < shots; ++i) {
#pragma omp parallel for
for (int_t i = 0; i < (int_t)shots; ++i) {
ret[i].resize(qubits.size());
for (j = 0; j < qubits.size(); j++)
for (uint_t j = 0; j < qubits.size(); j++)
ret[i][j] = samples[i][qubits[j]];
}
}
@ -963,7 +962,7 @@ void State<tensor_net_t>::initialize_from_vector(
BaseState::qreg_.initialize();
reg_t qubits(BaseState::qreg_.num_qubits());
for (int_t i = 0; i < BaseState::qreg_.num_qubits(); i++)
for (uint_t i = 0; i < BaseState::qreg_.num_qubits(); i++)
qubits[i] = i;
BaseState::qreg_.initialize_component(qubits, params);
}

View File

@ -84,14 +84,14 @@ void Executor<state_t>::set_config(const Config &config) {
template <class state_t>
void Executor<state_t>::initialize_qreg(uint_t num_qubits) {
int_t iChunk;
uint_t iChunk;
for (iChunk = 0; iChunk < Base::states_.size(); iChunk++) {
Base::states_[iChunk].qreg().set_num_qubits(Base::chunk_bits_);
}
if (Base::chunk_omp_parallel_ && Base::num_groups_ > 1) {
#pragma omp parallel for private(iChunk)
for (int_t ig = 0; ig < Base::num_groups_; ig++) {
for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) {
for (iChunk = Base::top_state_of_group_[ig];
iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) {
uint_t irow, icol;

View File

@ -369,7 +369,6 @@ void State<unitary_matrix_t>::initialize_qreg(uint_t num_qubits,
template <class unitary_matrix_t>
void State<unitary_matrix_t>::initialize_omp() {
uint_t i;
BaseState::qreg_.set_omp_threshold(omp_qubit_threshold_);
if (BaseState::threads_ > 0)
BaseState::qreg_.set_omp_threads(
@ -414,7 +413,7 @@ void State<unitary_matrix_t>::apply_gate(const Operations::Op &op) {
}
if (qubits_out.size() > 0) {
uint_t mask = 0;
for (int i = 0; i < qubits_out.size(); i++) {
for (uint_t i = 0; i < qubits_out.size(); i++) {
mask |= (1ull << (qubits_out[i] - BaseState::qreg_.num_qubits()));
}
if ((BaseState::qreg_.chunk_index() & mask) == mask) {

View File

@ -237,7 +237,6 @@ void UnitaryMatrix<data_t>::initialize() {
// Zero the underlying vector
BaseVector::zero();
// Set to be identity matrix
const int_t nrows = rows_; // end for k loop
auto initialize_proc = [this](int_t i) {
BaseVector::data_[i * (rows_ + 1)] = 1.0;
};
@ -261,7 +260,7 @@ void UnitaryMatrix<data_t>::initialize_from_matrix(
").");
}
auto initialize_proc = [this, &mat](int_t row) {
for (int_t col = 0; col < rows_; ++col) {
for (uint_t col = 0; col < rows_; ++col) {
BaseVector::data_[row + rows_ * col] = mat(row, col);
}
};

View File

@ -212,13 +212,11 @@ UnitaryMatrixThrust<data_t>::copy_to_matrix() const {
cvector_t<data_t> qreg = BaseVector::vector();
int_t i;
uint_t irow, icol;
#pragma omp parallel for private( \
i, irow, icol) if (BaseVector::num_qubits_ > BaseVector::omp_threshold_ && \
BaseVector::omp_threads_ > 1) \
#pragma omp parallel for if (BaseVector::num_qubits_ > \
BaseVector::omp_threshold_ && \
BaseVector::omp_threads_ > 1) \
num_threads(BaseVector::omp_threads_)
for (i = 0; i < csize; i++) {
for (int_t i = 0; i < (int_t)csize; i++) {
ret[i] = qreg[i];
}
return ret;

View File

@ -100,7 +100,7 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
const opset_t &allowed_opset,
ExperimentResult &result) const {
// convert operations for batch shots execution
for (int_t i = 0; i < circ.ops.size(); i++) {
for (uint_t i = 0; i < circ.ops.size(); i++) {
if (circ.ops[i].has_bind_params) {
if (circ.ops[i].type == Operations::OpType::gate) {
gate_to_matrix(circ.ops[i], circ.num_bind_params);
@ -108,8 +108,8 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
// convert matrix to cvector_t in params
uint_t matrix_size = circ.ops[i].mats[0].size();
circ.ops[i].params.resize(matrix_size * circ.num_bind_params);
for (int_t j = 0; j < circ.num_bind_params; j++) {
for (int_t k = 0; k < matrix_size; k++)
for (uint_t j = 0; j < circ.num_bind_params; j++) {
for (uint_t k = 0; k < matrix_size; k++)
circ.ops[i].params[j * matrix_size + k] = circ.ops[i].mats[j][k];
}
circ.ops[i].mats.clear();
@ -120,7 +120,7 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
// convert global phase to diagonal matrix
if (circ.global_phase_for_params.size() == circ.num_bind_params) {
bool has_global_phase = false;
for (int_t j = 0; j < circ.num_bind_params; j++) {
for (uint_t j = 0; j < circ.num_bind_params; j++) {
if (!Linalg::almost_equal(circ.global_phase_for_params[j], 0.0)) {
has_global_phase = true;
break;
@ -132,7 +132,7 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
phase_op.type = Operations::OpType::diagonal_matrix;
phase_op.has_bind_params = true;
phase_op.params.resize(2 * circ.num_bind_params);
for (int_t j = 0; j < circ.num_bind_params; j++) {
for (uint_t j = 0; j < circ.num_bind_params; j++) {
auto t = std::exp(complex_t(0.0, circ.global_phase_for_params[j]));
phase_op.params[j * 2] = t;
phase_op.params[j * 2 + 1] = t;
@ -173,64 +173,64 @@ void BatchConverter::gate_to_matrix(Operations::Op &op,
auto store_matrix = [&matrix_array, matrix_size](int_t iparam,
cvector_t mat) {
for (int_t j = 0; j < matrix_size; j++)
for (uint_t j = 0; j < matrix_size; j++)
matrix_array[iparam * matrix_size + j] = mat[j];
};
switch (it->second) {
case ParamGates::mcr:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i,
Linalg::VMatrix::r(op.params[i * 2], op.params[i * 2 + 1]));
break;
case ParamGates::mcrx:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::rx(std::real(op.params[i])));
break;
case ParamGates::mcry:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::ry(std::real(op.params[i])));
break;
case ParamGates::mcrz:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::rz_diag(std::real(op.params[i])));
break;
case ParamGates::rxx:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::rxx(std::real(op.params[i])));
break;
case ParamGates::ryy:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::ryy(std::real(op.params[i])));
break;
case ParamGates::rzz:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::rzz_diag(std::real(op.params[i])));
break;
case ParamGates::rzx:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::rzx(std::real(op.params[i])));
break;
case ParamGates::mcu3:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::u3(std::real(op.params[i * 3]),
std::real(op.params[i * 3 + 1]),
std::real(op.params[i * 3 + 2])));
break;
case ParamGates::mcu:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::u4(std::real(op.params[i * 4]),
std::real(op.params[i * 4 + 1]),
std::real(op.params[i * 4 + 2]),
std::real(op.params[i * 4 + 3])));
break;
case ParamGates::mcu2:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::u2(std::real(op.params[i * 2]),
std::real(op.params[i * 2 + 1])));
break;
case ParamGates::mcp:
for (int_t i = 0; i < num_params; i++)
for (uint_t i = 0; i < num_params; i++)
store_matrix(i, Linalg::VMatrix::phase_diag(std::real(op.params[i])));
break;
default:

View File

@ -68,16 +68,16 @@ public:
void set_num_processes(int np) { num_processes_ = np; }
protected:
mutable int block_bits_; // qubits less than this will be blocked
mutable int qubits_;
mutable uint_t block_bits_; // qubits less than this will be blocked
mutable uint_t qubits_;
mutable reg_t qubitMap_;
mutable reg_t qubitSwapped_;
mutable bool blocking_enabled_;
mutable bool sample_measure_ = false;
mutable bool restore_qubit_map_ = false;
int memory_blocking_bits_ = 0;
uint_t memory_blocking_bits_ = 0;
bool density_matrix_ = false;
int num_processes_ = 1;
uint_t num_processes_ = 1;
bool block_circuit(Circuit &circ, bool doSwap) const;
@ -150,7 +150,6 @@ void CacheBlocking::set_blocking(int bits, size_t min_memory, uint_t n_place,
size_t complex_size, bool is_matrix) {
int chunk_bits = bits;
uint_t scale = is_matrix ? 2 : 1;
size_t size;
// get largest possible chunk bits
while ((complex_size << (scale * chunk_bits)) > min_memory) {
@ -215,7 +214,7 @@ void CacheBlocking::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
// loop over operations to find max number of parameters for cross-qubits
// operations
int_t max_params = 1;
uint_t max_params = 1;
for (uint_t i = 0; i < circ.ops.size(); i++) {
if (is_blockable_operation(circ.ops[i]) &&
is_cross_qubits_op(circ.ops[i])) {
@ -302,7 +301,7 @@ void CacheBlocking::define_blocked_qubits(std::vector<Operations::Op> &ops,
reg_t &blockedQubits,
bool crossQubitOnly) const {
uint_t i, j, iq;
int nq, nb;
uint_t nq;
bool exist;
for (i = 0; i < ops.size(); i++) {
if (blockedQubits.size() >= block_bits_)
@ -384,7 +383,7 @@ bool CacheBlocking::can_reorder(
}
bool CacheBlocking::block_circuit(Circuit &circ, bool doSwap) const {
uint_t i, n;
uint_t n;
std::vector<Operations::Op> out;
std::vector<Operations::Op> queue;
std::vector<Operations::Op> queue_next;
@ -523,11 +522,8 @@ uint_t CacheBlocking::add_ops(std::vector<Operations::Op> &ops,
std::vector<Operations::Op> &queue, bool doSwap,
bool first, bool crossQubitOnly) const {
uint_t i, j, iq;
int nqubitUsed = 0;
reg_t blockedQubits;
int nq;
bool exist;
uint_t nq;
uint_t pos_begin, num_gates_added;
bool end_block_inserted;
@ -807,7 +803,7 @@ bool CacheBlocking::split_pauli(const Operations::Op &op,
reg_t qubits_out_chunk;
std::string pauli_in_chunk;
std::string pauli_out_chunk;
int_t i, j, n;
uint_t i, j, n;
bool inside;
// get inner/outer chunk pauli string
@ -857,7 +853,7 @@ bool CacheBlocking::split_op(const Operations::Op &op,
std::vector<Operations::Op> &queue) const {
reg_t qubits_in_chunk;
reg_t qubits_out_chunk;
int_t i, j, n;
uint_t i, j, n;
bool inside;
n = op.qubits.size();

View File

@ -67,7 +67,7 @@ public:
}
} else {
// loop for runtime parameter binding
for (int_t p = 0; p < num_params_; p++) {
for (uint_t p = 0; p < num_params_; p++) {
std::vector<op_t> ops;
ops.reserve(fusioned_ops.size());
for (auto &op : fusioned_ops) {
@ -449,18 +449,18 @@ bool NQubitFusion<N>::aggregate_operations(oplist_t &ops,
std::vector<std::pair<uint_t, std::vector<op_t>>> targets;
bool fused = false;
for (uint_t op_idx = fusion_start; op_idx < fusion_end; ++op_idx) {
for (int op_idx = fusion_start; op_idx < fusion_end; ++op_idx) {
// skip operations to be ignored
if (!method.can_apply(ops[op_idx], max_fused_qubits) ||
ops[op_idx].type == optype_t::nop)
continue;
// 1. find a N-qubit operation
if (ops[op_idx].qubits.size() != N)
if (ops[op_idx].qubits.size() != N) {
continue;
}
std::vector<uint_t> fusing_op_idxs = {op_idx};
std::vector<uint_t> fusing_op_idxs = {(uint_t)op_idx};
std::vector<uint_t> fusing_qubits;
fusing_qubits.insert(fusing_qubits.end(), ops[op_idx].qubits.begin(),
ops[op_idx].qubits.end());
@ -895,14 +895,14 @@ void Fusion::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise,
if (parallelization_ > 1) {
#pragma omp parallel for num_threads(parallelization_)
for (int_t i = 0; i < parallelization_; i++) {
for (int_t i = 0; i < (int_t)parallelization_; i++) {
int_t start = unit * i;
int_t end = std::min(start + unit, (int_t)circ.ops.size());
optimize_circuit(circ, noise, allowed_opset, start, end, fuser,
method);
}
} else {
for (int_t i = 0; i < parallelization_; i++) {
for (uint_t i = 0; i < parallelization_; i++) {
int_t start = unit * i;
int_t end = std::min(start + unit, (int_t)circ.ops.size());
optimize_circuit(circ, noise, allowed_opset, start, end, fuser,