Add internal link checker for other repositories (#1759)

The Functions repositories need to check their internal links are valid.

To keep things clean, I added a new command file specifically for other
repos, rather than modifying `checkInternalLinks.ts`.

The main challenge is that they won't have access to which other docs
pages exist, like `/guides/hello-world`, since those docs won't live in
the repository. To solve this, we save and distribute a snapshot of the
current state of links in `scripts/config/internal-links.json`. This
snapshot does risk falling out of date, which means we will occasionally
want to regenerate it. I went with bundling the file—rather than
something like fetching it from GitHub—to keep things simple (e.g. no
network flakes) and to keep the build deterministic.

Example:

```markdown
# Page in another repo

[Another page](/guides/my-function)     (this is a file in the other repo)
[Another page](/guides/hello-world)
[Another page](/guides/hello-world2)
```

```
❯ npm run check:internal-links

> my-function-docs@0.1.0 check:internal-links
> node node_modules/qiskit-documentation/dist/commands/checkInternalLinksFromOtherRepo.js



Checking internal links for docs
 Could not find link '/guides/hello-world2'. Appears in:
    docs/guides/index.mdx     Did you mean '/guides/hello-world'?

Some internal links appear broken 💔
```

---------

Co-authored-by: Frank Harkins <frankharkins@hotmail.co.uk>
This commit is contained in:
Eric Arellano 2024-07-25 09:15:18 -04:00 committed by GitHub
parent 2bcb6aa2d9
commit ae20a0ca34
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 26512 additions and 2 deletions

View File

@ -7,7 +7,8 @@
"type": "module",
"files": [
"dist",
"scripts/config/cspell"
"scripts/config/cspell",
"scripts/config/internal-links.json"
],
"scripts": {
"prepare": "tsc",
@ -27,7 +28,8 @@
"check:stale-images": "tsx scripts/js/commands/checkStaleImages.ts",
"regen-apis": "tsx scripts/js/commands/api/regenerateApiDocs.ts",
"gen-api": "tsx scripts/js/commands/api/updateApiDocs.ts",
"make-historical": "tsx scripts/js/commands/api/convertApiDocsToHistorical.ts"
"make-historical": "tsx scripts/js/commands/api/convertApiDocsToHistorical.ts",
"save-internal-links": "tsx scripts/js/commands/saveInternalLinks.ts"
},
"dependencies": {
"cheerio": "^1.0.0-rc.12",

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,78 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.
/**
* This file is like checkInternalLinks.ts, but meant to work in other repositories reusing
* our script infrastructure.
*
* Whereas normally checkInternalLinks.ts has real-time access to what files are present in docs/
* and public/, the other repositories use a snapshot.
*/
import { readFile } from "fs/promises";
import yargs from "yargs/yargs";
import { hideBin } from "yargs/helpers";
import { globby } from "globby";
import { File } from "../lib/links/InternalLink.js";
import { FileBatch } from "../lib/links/FileBatch.js";
interface Arguments {
[x: string]: unknown;
savedLinks: string;
}
const readArgs = (): Arguments => {
return yargs(hideBin(process.argv))
.version(false)
.option("saved-links", {
type: "string",
default:
"node_modules/qiskit-documentation/scripts/config/internal-links.json",
description:
"Path to JSON file with links from qiskit/documentation repo.",
})
.parseSync();
};
async function main() {
const args = readArgs();
const fileBatch = await FileBatch.fromGlobs(
["docs/**/*.{ipynb,mdx}"],
[],
"docs",
);
const publicFiles = (await globby("public/{images,videos}/**/*")).map(
(fp) => new File(fp, new Set()),
);
const savedFilesContent = await readFile(args.savedLinks, {
encoding: "utf-8",
});
const savedFiles = JSON.parse(savedFilesContent).map(
(entry: any) => new File(entry.path, new Set(entry.anchors)),
);
const allValidLinks = await fileBatch.checkInternalLinks([
...publicFiles,
...savedFiles,
]);
if (!allValidLinks) {
console.error("\nSome internal links appear broken 💔\n");
process.exit(1);
}
console.log("\nNo internal links appear broken ✅\n");
}
main().then(() => process.exit());

View File

@ -0,0 +1,40 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.
import { writeFile } from "fs/promises";
import { FileBatch } from "../lib/links/FileBatch.js";
async function main() {
const fileBatch = await FileBatch.fromGlobs(
[
"docs/**/*.{ipynb,mdx}",
"!docs/api/*/[0-9]*/*",
"!docs/api/*/dev/*",
"!docs/api/qiskit-ibm-provider/**/*",
"!docs/api/qiskit/release-notes/*",
],
[],
"docs",
);
const [loadedFiles] = await fileBatch.load();
const normalizedFiles = loadedFiles.map((file) => ({
path: file.path,
anchors: Array.from(file.anchors).sort(),
}));
const fp = "scripts/config/internal-links.json";
await writeFile(fp, JSON.stringify(normalizedFiles, null, 2));
console.log(`✅ Wrote internal links to ${fp}`);
}
main().then(() => process.exit());