Handle custom operations with overlapping names in QPY (#11646)

* Handle custom operations with overlapping names in QPY

This commit fixes a longstanding bug and limitation in QPY around the
use of custom operations. When QPY encounters a non-standard (meaning not
in Qiskit itself, or a direct instance of Gate or Instruction)
instruction it is not able to represent the exact instance perfectly in
that payload. This is because it would require more code that represents
those classes or data not contained inside Qiskit itself, which would be
an opportunity for arbitrary code execution which is something QPY was
designed to prevent (as it's a limitation with pickle serialization). So
to serialize these objects in a circuit, QPY stores as much data as it
can extract from the data model from the Instruction and Gate classes
and builds a table of custom instructions that will be recreated as
Instruction or Gate instances on deserialization. In all previous QPY
format versions that table was keyed on the .name attribute of the
custom instructions in the circuit, the thinking being the name is a
unique op code during circuit compilation and if there are multiple
circuit elements with the same name they should be the same object. With
this assumption it enabled the QPY payloads generated with repeated
custom instructions to be smaller and more efficient because we don't
need to store potentially repeated data. While that assumption is true
during most of compilation it was ignoring that for a bare quantum
circuit (or during the initial stages of compilation) this isn't
necessarily true and there are compiler passes that canonicalize custom
operations prior to that being true. To make it worse this assumption
was causing conflicts in the QPY payload and cause an inaccurate
reproduction of the original circuit when deserializing a QPY payload in
many cases when custom gates were used.

This commit fixes this limitation by introducing a new QPY format
version 11, which changes the key used in the custom instruction table
to instead of being just the name to `{name}_{uuid}` where each instance
of a custom instruction has a different uuid value. This means that
there are never any overlapping definitions in the table and we don't
have to worry about the case where two custom instructions have the same
name.

Fixes #8941

* Connect version option from interface to writers

* Add negative test for bug in qpy version 10

* Unify uuid logic in version 11
This commit is contained in:
Matthew Treinish 2024-01-31 09:16:43 -05:00 committed by GitHub
parent e0b435ec19
commit 588f2df46b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 153 additions and 17 deletions

View File

@ -162,6 +162,16 @@ There is a circuit payload for each circuit (where the total number is dictated
by ``num_circuits`` in the file header). There is no padding between the
circuits in the data.
.. _qpy_version_11:
Version 11
==========
Version 11 is identical to Version 10 except that for names in the CUSTOM_INSTRUCTION blocks
have a suffix of the form ``"_{uuid_hex}"`` where ``uuid_hex`` is a uuid
hexadecimal string such as returned by :attr:`.UUID.hex`. For example:
``"b3ecab5b4d6a4eb6bc2b2dbf18d83e1e"``.
.. _qpy_version_10:
Version 10

View File

@ -373,6 +373,9 @@ def _parse_custom_operation(
) = custom_operations[gate_name]
else:
type_str, num_qubits, num_clbits, definition = custom_operations[gate_name]
# Strip the trailing "_{uuid}" from the gate name if the version >=11
if version >= 11:
gate_name = "_".join(gate_name.split("_")[:-1])
type_key = type_keys.CircuitInstruction(type_str)
if type_key == type_keys.CircuitInstruction.INSTRUCTION:
@ -572,7 +575,7 @@ def _dumps_instruction_parameter(param, index_map, use_symengine):
# pylint: disable=too-many-boolean-expressions
def _write_instruction(file_obj, instruction, custom_operations, index_map, use_symengine):
def _write_instruction(file_obj, instruction, custom_operations, index_map, use_symengine, version):
if isinstance(instruction.operation, Instruction):
gate_class_name = instruction.operation.base_class.__name__
else:
@ -591,15 +594,17 @@ def _write_instruction(file_obj, instruction, custom_operations, index_map, use_
or isinstance(instruction.operation, library.BlueprintCircuit)
):
gate_class_name = instruction.operation.name
if version >= 11:
# Assign a uuid to each instance of a custom operation
gate_class_name = f"{gate_class_name}_{uuid.uuid4().hex}"
# ucr*_dg gates can have different numbers of parameters,
# the uuid is appended to avoid storing a single definition
# in circuits with multiple ucr*_dg gates.
if instruction.operation.name in ["ucrx_dg", "ucry_dg", "ucrz_dg"]:
gate_class_name += "_" + str(uuid.uuid4())
elif instruction.operation.name in {"ucrx_dg", "ucry_dg", "ucrz_dg"}:
gate_class_name = f"{gate_class_name}_{uuid.uuid4()}"
if gate_class_name not in custom_operations:
custom_operations[gate_class_name] = instruction.operation
custom_operations_list.append(gate_class_name)
custom_operations[gate_class_name] = instruction.operation
custom_operations_list.append(gate_class_name)
elif gate_class_name == "ControlledGate":
# controlled gates can have the same name but different parameter
@ -724,7 +729,7 @@ def _write_pauli_evolution_gate(file_obj, evolution_gate):
file_obj.write(synth_data)
def _write_custom_operation(file_obj, name, operation, custom_operations, use_symengine):
def _write_custom_operation(file_obj, name, operation, custom_operations, use_symengine, version):
type_key = type_keys.CircuitInstruction.assign(operation)
has_definition = False
size = 0
@ -768,6 +773,7 @@ def _write_custom_operation(file_obj, name, operation, custom_operations, use_sy
custom_operations,
{},
use_symengine,
version,
)
base_gate_raw = base_gate_buffer.getvalue()
name_raw = name.encode(common.ENCODE)
@ -1002,7 +1008,9 @@ def _read_layout_v2(file_obj, circuit):
circuit._layout._output_qubit_list = circuit.qubits
def write_circuit(file_obj, circuit, metadata_serializer=None, use_symengine=False):
def write_circuit(
file_obj, circuit, metadata_serializer=None, use_symengine=False, version=common.QPY_VERSION
):
"""Write a single QuantumCircuit object in the file like object.
Args:
@ -1016,6 +1024,7 @@ def write_circuit(file_obj, circuit, metadata_serializer=None, use_symengine=Fal
native mechanism. This is a faster serialization alternative, but not supported in all
platforms. Please check that your target platform is supported by the symengine library
before setting this option, as it will be required by qpy to deserialize the payload.
version (int): The QPY format version to use for serializing this circuit
"""
metadata_raw = json.dumps(
circuit.metadata, separators=(",", ":"), cls=metadata_serializer
@ -1056,7 +1065,7 @@ def write_circuit(file_obj, circuit, metadata_serializer=None, use_symengine=Fal
index_map["c"] = {bit: index for index, bit in enumerate(circuit.clbits)}
for instruction in circuit.data:
_write_instruction(
instruction_buffer, instruction, custom_operations, index_map, use_symengine
instruction_buffer, instruction, custom_operations, index_map, use_symengine, version
)
with io.BytesIO() as custom_operations_buffer:
@ -1068,7 +1077,12 @@ def write_circuit(file_obj, circuit, metadata_serializer=None, use_symengine=Fal
operation = custom_operations[name]
new_custom_operations.extend(
_write_custom_operation(
custom_operations_buffer, name, operation, custom_operations, use_symengine
custom_operations_buffer,
name,
operation,
custom_operations,
use_symengine,
version,
)
)

View File

@ -574,7 +574,9 @@ def read_schedule_block(file_obj, version, metadata_deserializer=None, use_symen
return block
def write_schedule_block(file_obj, block, metadata_serializer=None, use_symengine=False):
def write_schedule_block(
file_obj, block, metadata_serializer=None, use_symengine=False, version=common.QPY_VERSION
): # pylint: disable=unused-argument
"""Write a single ScheduleBlock object in the file like object.
Args:
@ -588,6 +590,7 @@ def write_schedule_block(file_obj, block, metadata_serializer=None, use_symengin
native mechanism. This is a faster serialization alternative, but not supported in all
platforms. Please check that your target platform is supported by the symengine library
before setting this option, as it will be required by qpy to deserialize the payload.
version (int): The QPY format version to use for serializing this circuit block
Raises:
TypeError: If any of the instructions is invalid data format.
"""

View File

@ -20,7 +20,7 @@ import struct
from qiskit.qpy import formats
QPY_VERSION = 10
QPY_VERSION = 11
QPY_COMPATIBILITY_VERSION = 10
ENCODE = "utf8"

View File

@ -199,7 +199,11 @@ def dump(
for program in programs:
writer(
file_obj, program, metadata_serializer=metadata_serializer, use_symengine=use_symengine
file_obj,
program,
metadata_serializer=metadata_serializer,
use_symengine=use_symengine,
version=version,
)

View File

@ -0,0 +1,19 @@
---
upgrade:
- |
The latest format version of QPY is now :ref:`qpy_version_11` and this
is what is emitted by default when running :func:`.qpy.dump`.
fixes:
- |
Fixed an issue with the QPY serialization when a :class:`.QuantumCircuit`
contained multiple custom instructions instances that have the same
:attr:`~.Instruction.name` attribute. In QPY format versions before
:ref:`qpy_version_11` the QPY payload did not differentiate between
these instances and would only serialize the properties of the first
instance in a circuit. This could potentially cause an incorrect
deserialization if the other properties of the custom instruction were
different but the names were the same. This has been fixed in
QPY :ref:`qpy_version_11` so that each instance of a custom instruction
is serialized individually and there will no longer be a potential
conflict with overlapping names.
Fixes `#8941 <https://github.com/Qiskit/qiskit/issues/8941>`__

View File

@ -20,7 +20,6 @@ import pathlib
import pickle
import shutil
import tempfile
import unittest
import ddt
@ -657,8 +656,6 @@ class TestGateDefinition(QiskitTestCase):
loaded = qpy.load(fptr)[0]
self.assertEqual(loaded, qc)
# See https://github.com/Qiskit/qiskit-terra/issues/8941
@unittest.expectedFailure
def test_qpy_double_call_roundtrip(self):
program = """
include "qelib1.inc";

View File

@ -17,7 +17,7 @@ import struct
from ddt import ddt, data
from qiskit.circuit import QuantumCircuit, QuantumRegister, Qubit
from qiskit.circuit import QuantumCircuit, QuantumRegister, Qubit, Parameter, Gate
from qiskit.providers.fake_provider import FakeHanoi, FakeSherbrooke
from qiskit.exceptions import QiskitError
from qiskit.qpy import dump, load, formats, QPY_COMPATIBILITY_VERSION
@ -146,6 +146,45 @@ class TestLayout(QpyCircuitTestCase):
qc._layout = TranspileLayout(None, None, None)
self.assert_roundtrip_equal(qc)
def test_overlapping_definitions(self):
"""Test serialization of custom gates with overlapping definitions."""
class MyParamGate(Gate):
"""Custom gate class with a parameter."""
def __init__(self, phi):
super().__init__("my_gate", 1, [phi])
def _define(self):
qc = QuantumCircuit(1)
qc.rx(self.params[0], 0)
self.definition = qc
theta = Parameter("theta")
two_theta = 2 * theta
qc = QuantumCircuit(1)
qc.append(MyParamGate(1.1), [0])
qc.append(MyParamGate(1.2), [0])
qc.append(MyParamGate(3.14159), [0])
qc.append(MyParamGate(theta), [0])
qc.append(MyParamGate(two_theta), [0])
with io.BytesIO() as qpy_file:
dump(qc, qpy_file)
qpy_file.seek(0)
new_circ = load(qpy_file)[0]
# Custom gate classes are lowered to Gate to avoid arbitrary code
# execution on deserialization. To compare circuit equality we
# need to go instruction by instruction and check that they're
# equivalent instead of doing a circuit equality check
for new_inst, old_inst in zip(new_circ.data, qc.data):
new_gate = new_inst.operation
old_gate = old_inst.operation
self.assertIsInstance(new_gate, Gate)
self.assertEqual(new_gate.name, old_gate.name)
self.assertEqual(new_gate.params, old_gate.params)
self.assertEqual(new_gate.definition, old_gate.definition)
@data(0, 1, 2, 3)
def test_custom_register_name(self, opt_level):
"""Test layout preserved with custom register names."""
@ -194,6 +233,56 @@ class TestLayout(QpyCircuitTestCase):
)
self.assertEqual(tqc.layout.final_layout, new_circuit.layout.final_layout)
class TestVersionArg(QpyCircuitTestCase):
"""Test explicitly setting a qpy version in dump()."""
def test_custom_gate_name_overlap_persists_with_minimum_version(self):
"""Assert the fix in version 11 doesn't get used if an older version is request."""
class MyParamGate(Gate):
"""Custom gate class with a parameter."""
def __init__(self, phi):
super().__init__("my_gate", 1, [phi])
def _define(self):
qc = QuantumCircuit(1)
qc.rx(self.params[0], 0)
self.definition = qc
theta = Parameter("theta")
two_theta = 2 * theta
qc = QuantumCircuit(1)
qc.append(MyParamGate(1.1), [0])
qc.append(MyParamGate(1.2), [0])
qc.append(MyParamGate(3.14159), [0])
qc.append(MyParamGate(theta), [0])
qc.append(MyParamGate(two_theta), [0])
with io.BytesIO() as qpy_file:
dump(qc, qpy_file, version=10)
qpy_file.seek(0)
new_circ = load(qpy_file)[0]
# Custom gate classes are lowered to Gate to avoid arbitrary code
# execution on deserialization. To compare circuit equality we
# need to go instruction by instruction and check that they're
# equivalent instead of doing a circuit equality check
first_gate = None
for new_inst, old_inst in zip(new_circ.data, qc.data):
new_gate = new_inst.operation
old_gate = old_inst.operation
self.assertIsInstance(new_gate, Gate)
self.assertEqual(new_gate.name, old_gate.name)
self.assertEqual(new_gate.params, old_gate.params)
if first_gate is None:
first_gate = new_gate
continue
# This is incorrect behavior. This test is explicitly validating
# that the version kwarg being set to 10 causes the buggy behavior
# on that version of qpy
self.assertEqual(new_gate.definition, first_gate.definition)
def test_invalid_version_value(self):
"""Assert we raise an error with an invalid version request."""
qc = QuantumCircuit(2)