qiskit-documentation/scripts/js/lib/api/updateLinks.ts

219 lines
7.8 KiB
TypeScript

// This code is a Qiskit project.
//
// (C) Copyright IBM 2023.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.
import { join } from "path";
import { initial, keyBy, keys, last } from "lodash-es";
import { Root } from "mdast";
import { visit } from "unist-util-visit";
import isAbsoluteUrl from "is-absolute-url";
import { unified } from "unified";
import remarkParse from "remark-parse";
import remarkMath from "remark-math";
import remarkGfm from "remark-gfm";
import remarkMdx from "remark-mdx";
import remarkStringify from "remark-stringify";
import { removePart, removePrefix, removeSuffix } from "../stringUtils.js";
import { HtmlToMdResultWithUrl } from "./HtmlToMdResult.js";
import { remarkStringifyOptions } from "./commonParserConfig.js";
import { ObjectsInv } from "./objectsInv.js";
import { transformSpecialCaseUrl } from "./specialCaseResults.js";
import { kebabCaseAndShortenPage } from "./normalizeResultUrls.js";
import { DOCS_BASE_PATH } from "./conversionPipeline.js";
export interface Link {
url: string; // Where the link goes
text?: string; // What the user sees
}
/**
* Anchors generated from markdown headings are always lower case but, if these
* headings are API references, Sphinx sometimes expects them to include
* uppercase characters.
*
* As a heuristic, we assume urls containing periods are anchors to HTML id
* tags (which preserve Sphinx's original casing), and anchors with no periods
* are from markdown headings (which must be lower-cased). This seems to work
* ok.
*/
function lowerCaseIfMarkdownAnchor(url: string): string {
if (!url.includes("#")) {
return url;
}
const [base, anchor] = url.split("#");
if (anchor.includes(".")) {
return url;
}
const newAnchor = anchor.toLowerCase();
return `${base}#${newAnchor}`;
}
export function normalizeUrl(
url: string,
resultsByName: { [key: string]: HtmlToMdResultWithUrl },
itemNames: Set<string>,
kwargs: { kebabCaseAndShorten: boolean; pkgName: string },
): string {
if (isAbsoluteUrl(url)) return url;
// We add the base path to the internal links if needed
if (
url.startsWith("/") &&
!url.startsWith(DOCS_BASE_PATH) &&
!url.endsWith(DOCS_BASE_PATH)
) {
url = `${DOCS_BASE_PATH}${url}`;
}
// Absolute URLs are already normalized, except those pointing to the same API docs.
// For those cases, we need to transform them to kebab-case.
// Todo: Transform URLs pointing to other APIs, when they all use kebab-case.
if (
url.startsWith("/") &&
!url.startsWith(`${DOCS_BASE_PATH}/api/${kwargs.pkgName}`)
)
return url;
url = transformSpecialCaseUrl(url);
url = removePart(url, "/", ["stubs", "apidocs", "apidoc", ".."]);
const urlParts = url.split("/");
const initialUrlParts = initial(urlParts);
const [page, hash] = last(urlParts)!.split("#") as [
string,
string | undefined,
];
const normalizedPage = kwargs.kebabCaseAndShorten
? kebabCaseAndShortenPage(page, kwargs.pkgName)
: page;
const normalizedUrlWithoutHash = [...initialUrlParts, normalizedPage].join(
"/",
);
// Default case. We'll then check if the hash should be transformed
// for a few edge cases.
url = hash ? `${normalizedUrlWithoutHash}#${hash}` : normalizedUrlWithoutHash;
// qiskit_ibm_runtime.RuntimeJob
// qiskit_ibm_runtime.RuntimeJob#qiskit_ibm_runtime.RuntimeJob
if (itemNames.has(page)) {
if (hash === page) {
url = normalizedUrlWithoutHash;
}
// Rather than linking to the component like `Function` or `Attribute`, we link to the header.
// This is necessary because until we implement https://github.com/Qiskit/documentation/issues/1395, the
// anchor for the component would take you too low in the page, given that the header is above the component.
// qiskit_ibm_runtime.RuntimeJob#qiskit_ibm_runtime.RuntimeJob.job -> qiskit_ibm_runtime.RuntimeJob#job
//
// TODO(#2217): Remove this special case and use the full ID instead.
if (hash?.startsWith(`${page}.`)) {
let member = removePrefix(hash, `${page}.`);
// Also check for inline classes, which often show up on module pages.
// qiskit_addon_obp.utils.truncating#qiskit_addon_obp.utils.truncating.TruncationErrorBudget.p_norm
// -> qiskit_addon_obp.utils.truncating#p_norm, whereas without this check
// it would be qiskit_addon_obp.utils.truncating#TruncationErrorBudget.p_norm.
if (member.includes(".")) {
member = member.split(".", 2)[1];
}
url = `${normalizedUrlWithoutHash}#${member}`;
}
}
// qiskit_ibm_runtime.QiskitRuntimeService.job -> qiskit_ibm_runtime.QiskitRuntimeService#job
const pathParts = page.split(".");
const member = last(pathParts);
const initialPathParts = initial(pathParts);
const parentName = initialPathParts.join(".");
if ("class" === resultsByName[parentName]?.meta.apiType) {
const normalizedParentName = kwargs.kebabCaseAndShorten
? kebabCaseAndShortenPage(parentName, kwargs.pkgName)
: parentName;
url = [...initialUrlParts, normalizedParentName].join("/") + "#" + member;
}
return lowerCaseIfMarkdownAnchor(url);
}
export function relativizeLink(link: Link): Link | undefined {
const priorPrefixToNewPrefix = new Map([
["https://qiskit.org/documentation/apidoc/", "/api/qiskit"],
["https://qiskit.org/documentation/stubs/", "/api/qiskit"],
["https://docs.quantum.ibm.com/", ""],
["https://docs.quantum-computing.ibm.com/", ""],
["https://quantum.cloud.ibm.com/docs", "/docs"],
["https://quantum.cloud.ibm.com/learning", "/learning"],
]);
const priorPrefix = Array.from(priorPrefixToNewPrefix.keys()).find((prefix) =>
link.url.startsWith(prefix),
);
if (!priorPrefix) {
return;
}
let [url, anchor] = link.url.split("#");
url = removePrefix(url, priorPrefix);
url = removeSuffix(url, ".html");
if (anchor && anchor !== url) {
url = `${url}#${anchor}`;
}
const newText = link.url === link.text ? url : undefined;
const newPrefix = priorPrefixToNewPrefix.get(priorPrefix)!;
const relativeUrl = removePrefix(join(newPrefix, url), "/");
return { url: `/${relativeUrl}`, text: newText };
}
export async function updateLinks(
results: HtmlToMdResultWithUrl[],
kwargs: { kebabCaseAndShorten: boolean; pkgName: string },
maybeObjectsInv?: ObjectsInv,
): Promise<void> {
const resultsByName = keyBy(results, (result) => result.meta.apiName!);
const itemNames = new Set(keys(resultsByName));
for (const result of results) {
const output = await unified()
.use(remarkParse)
.use(remarkMath)
.use(remarkGfm)
.use(remarkMdx)
.use(() => async (tree: Root) => {
visit(tree, "link", (node) => {
const textNode =
node.children?.[0]?.type === "text"
? node.children?.[0]
: undefined;
const relativizedLink = relativizeLink({
url: node.url,
text: textNode?.value,
});
if (relativizedLink) {
node.url = relativizedLink.url;
if (textNode && relativizedLink.text) {
textNode.value = relativizedLink.text;
}
}
node.url = normalizeUrl(node.url, resultsByName, itemNames, kwargs);
});
})
.use(remarkStringify, remarkStringifyOptions)
.process(result.markdown);
result.markdown = output?.toString();
}
maybeObjectsInv?.updateUris((uri: string) =>
normalizeUrl(uri, resultsByName, itemNames, kwargs),
);
}