// This code is a Qiskit project.
// (C) Copyright IBM 2024.
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.
import { CheerioAPI, Cheerio, Element } from "cheerio";
import { unified } from "unified";
import rehypeParse from "rehype-parse";
import rehypeRemark from "rehype-remark";
import remarkStringify from "remark-stringify";
import { ApiType } from "./Metadata";
import {
} from "../stringUtils";
export type ComponentProps = {
id?: string;
attributeTypeHint?: string;
attributeValue?: string;
githubSourceLink?: string;
rawSignature?: string;
modifiers?: string;
extraRawSignatures?: string[];
isDedicatedPage?: boolean;
const APITYPE_TO_TAG: Record<string, string> = {
class: "class",
exception: "class",
attribute: "attribute",
property: "attribute",
function: "function",
method: "function",
export async function processMdxComponent(
$: CheerioAPI,
$main: Cheerio<any>,
signatures: Cheerio<Element>[],
$dl: Cheerio<any>,
priorApiType: ApiType | undefined,
apiType: ApiType,
id: string,
): Promise<[string, string]> {
const tagName = APITYPE_TO_TAG[apiType];
const $firstSignature = signatures.shift()!;
const componentProps = prepareProps(
const extraProps = signatures.flatMap(
($overloadedSignature) =>
prepareProps($, $main, $overloadedSignature, $dl, apiType, apiType, id) ??
addExtraSignatures(componentProps, extraProps);
return [await createOpeningTag(tagName, componentProps), `</${tagName}>`];
// ------------------------------------------------------------------
// Prepare props for MDX components
// ------------------------------------------------------------------
function prepareProps(
$: CheerioAPI,
$main: Cheerio<any>,
$child: Cheerio<Element>,
$dl: Cheerio<any>,
priorApiType: ApiType | undefined,
apiType: ApiType,
id: string,
): ComponentProps {
const prepClassOrException = () =>
prepareClassOrExceptionProps($, $child, $dl, githubSourceLink, id);
const prepFunction = () =>
prepareFunctionProps($, $child, $dl, githubSourceLink, id);
const prepMethod = () =>
prepareMethodProps($, $child, $dl, priorApiType, githubSourceLink, id);
const prepAttributeOrProperty = () =>
const preparePropsPerApiType: Record<string, () => ComponentProps> = {
class: prepClassOrException,
exception: prepClassOrException,
property: prepAttributeOrProperty,
attribute: prepAttributeOrProperty,
method: prepMethod,
function: prepFunction,
const githubSourceLink = prepareGitHubLink($child, apiType === "method");
// Remove the attributes and properties modifiers as we don't show their signatures,
// but we still use them to create their headers
if (apiType == "attribute" || apiType == "property") {
findByText($, $child, "em.property", apiType).remove();
if (!(apiType in preparePropsPerApiType)) {
throw new Error(`Unhandled Python type: ${apiType}`);
return preparePropsPerApiType[apiType]();
function prepareClassOrExceptionProps(
$: CheerioAPI,
$child: Cheerio<any>,
$dl: Cheerio<any>,
githubSourceLink: string | undefined,
id: string,
): ComponentProps {
const modifiers = getAndRemoveModifiers($child);
const props = {
rawSignature: $child.html()!,
let pageHeading = $dl.siblings("h1").text();
// Manually created class pages like Qiskit 1.1+'s `QuantumCircuit`
// sometimes have ' class' in their h1.
pageHeading = removeSuffix(pageHeading, " class");
if (id.endsWith(pageHeading) && pageHeading != "") {
// Page is already dedicated to the class
return {
isDedicatedPage: true,
const headerLevel = getHeaderLevel($, $dl);
const name = getLastPartFromFullIdentifier(id);
const htag = `h${headerLevel}`;
$(`<${htag} data-header-type="class-header">${name}</${htag}>`).insertBefore(
return props;
function prepareMethodProps(
$: CheerioAPI,
$child: Cheerio<any>,
$dl: Cheerio<any>,
priorApiType: string | undefined,
githubSourceLink: string | undefined,
id: string,
): ComponentProps {
const modifiers = getAndRemoveModifiers($child);
const props = {
rawSignature: $child.html()!,
const name = getLastPartFromFullIdentifier(id);
if (id) {
if (!priorApiType) {
return {
isDedicatedPage: true,
} else if ($child.attr("id")) {
const headerLevel = getHeaderLevel($, $dl);
const htag = `h${headerLevel}`;
`<${htag} data-header-type="method-header">${name}</${htag}>`,
return props;
function prepareAttributeOrPropertyProps(
$: CheerioAPI,
$child: Cheerio<any>,
$dl: Cheerio<any>,
priorApiType: string | undefined,
githubSourceLink: string | undefined,
id: string,
): ComponentProps {
const text = $child.text();
// Index of the default value of the attribute
let equalIndex = text.indexOf("=");
if (equalIndex == -1) {
equalIndex = text.length;
// Index of the attribute's type. The type should be
// found before the default value
let colonIndex = text.slice(0, equalIndex).indexOf(":");
if (colonIndex == -1) {
colonIndex = text.length;
// The attributes have the following shape: name [: type] [= value]
// We skip the first character to leave off the `:` and the `=` in
// both type hint and default value
const name = text.slice(0, Math.min(colonIndex, equalIndex)).trim();
const attributeTypeHint = text
.slice(Math.min(colonIndex + 1, equalIndex), equalIndex)
const attributeValue = text.slice(equalIndex + 1, text.length).trim();
const props = {
if (!priorApiType && id) {
return {
isDedicatedPage: true,
// Else, the attribute is embedded on the class
const headerLevel = getHeaderLevel($, $dl);
const htag = `h${headerLevel}`;
`<${htag} data-header-type="attribute-header">${name}</${htag}>`,
return props;
function prepareFunctionProps(
$: CheerioAPI,
$child: Cheerio<any>,
$dl: Cheerio<any>,
githubSourceLink: string | undefined,
id: string,
): ComponentProps {
const modifiers = getAndRemoveModifiers($child);
const props = {
rawSignature: $child.html()!,
const pageHeading = $dl.siblings("h1").text();
if (id.endsWith(pageHeading) && pageHeading != "") {
// Page is already dedicated to apiType; no heading needed
return {
isDedicatedPage: true,
const headerLevel = getHeaderLevel($, $dl);
const name = getLastPartFromFullIdentifier(id);
const htag = `h${headerLevel}`;
$(`<${htag} data-header-type="method-header">${name}</${htag}>`).insertBefore(
return props;
// ------------------------------------------------------------------
// Generate MDX components
// ------------------------------------------------------------------
* Creates the opening tag of the API components. The function sets all possible
* props values even if they are empty or undefined. All the props without value
* will be removed when generating the markdown file in `htmlToMd.ts`.
export async function createOpeningTag(
tagName: string,
props: ComponentProps,
): Promise<string> {
const attributeTypeHint = props.attributeTypeHint?.replaceAll(
const attributeValue = props.attributeValue?.replaceAll(
const signature = await htmlSignatureToMd(props.rawSignature!);
const extraSignatures: string[] = [];
for (const sig of props.extraRawSignatures ?? []) {
extraSignatures.push(`"${await htmlSignatureToMd(sig!)}"`);
return `<${tagName}
extraSignatures='[${extraSignatures.join(", ")}]'
* Removes the original link from sphinx.ext.viewcode and returns the HTML string for our own link.
* This returns the HTML string, rather than directly inserting into the HTML, because the insertion
* logic is most easily handled by the calling code.
* This function works the same regardless of whether the Sphinx build used `sphinx.ext.viewcode`
* or `sphinx.ext.linkcode` because they have the same HTML structure.
* If the link corresponds to a method, we only return a link if it has line numbers included,
* which implies that the link came from `sphinx.ext.linkcode` rather than `sphinx.ext.viewcode`.
* That's because the owning class will already have a link to the relevant file; it's
* noisy and not helpful to repeat the same link without line numbers for the individual methods.
export function prepareGitHubLink(
$child: Cheerio<any>,
isMethod: boolean,
): string | undefined {
const originalLink = $child.find(".viewcode-link").closest("a");
if (originalLink.length === 0) {
return undefined;
const href = originalLink.attr("href")!;
return !isMethod || href.includes(".py#") ? href : undefined;
* Find the element that both matches the `selector` and whose content is the same as `text`
export function findByText(
$: CheerioAPI,
$main: Cheerio<any>,
selector: string,
text: string,
): Cheerio<any> {
return $main.find(selector).filter((i, el) => $(el).text().trim() === text);
function getAndRemoveModifiers($child: Cheerio<any>): string {
const rawModifiers = $child.find("em.property");
const modifiers = rawModifiers.text().trim();
return modifiers;
export function addExtraSignatures(
componentProps: ComponentProps,
extraRawSignatures: ComponentProps[],
): void {
componentProps.extraRawSignatures = [
...extraRawSignatures.flatMap((sigProps) => sigProps.rawSignature ?? []),
* Converts a given HTML into markdown
export async function htmlSignatureToMd(
signatureHtml: string,
): Promise<string> {
if (!signatureHtml) {
return "";
const html = `<code>${signatureHtml}</code>`;
const file = await unified()
return String(file)
.replaceAll("\n", "")
.replaceAll("'", APOSTROPHE_HEX_CODE)
.replaceAll('"', '\\"')
.replace(/^`/, "")
.replace(/`$/, "");
function getHeaderLevel($: CheerioAPI, $dl: Cheerio<any>): number {
// We don't allow the header to be h1 or h2 because it's too large design-wise for API components.
// We try to ensure that the API docs are set up so there is always at least an h2 above the API
// component, but this is not always the case, especially with historical API docs. That means that
// we sometimes jump from h1 to h3. That's bad, but the tradeoff we're making to avoid using h2 for
// API components.
const minLevel = 3;
const priorHeaderLevel = getPriorHeaderLevel($, $dl);
if (priorHeaderLevel) {
if (+priorHeaderLevel == 6) {
throw new Error("API component cannot set non-existent header: <h7>");
return Math.max(minLevel, +priorHeaderLevel + 1);
return minLevel;
function getPriorHeaderLevel(
$: CheerioAPI,
$dl: Cheerio<any>,
): string | undefined {
const siblings = $dl.siblings();
for (const sibling of siblings) {
const $sibling = $(sibling);
if ($sibling.data("header-type")) {
// A component usually has other components as siblings in the API docs with their respective
// headers previously created by this script. We need to skip the generated headers to avoid cases
// where we have multiple methods, attributes, or classes at the same level. Components nested in
// a class should search for the previous header in a parent node.
const tagName = $sibling.get(0)?.tagName;
if (tagName?.match(/h[1-6]/)) {
return tagName.substring(1);
// If there's no header among the siblings, we look for the closest inline class in some ancestor node.
// The parent of a component is always a <div>, and the previous element of that <div> is the header
// we are looking for.
return $dl.closest("class").parent().prev().get(0)?.tagName.substring(1);