Rollup merge of #129426 - notriddle:smaller-index-2024-08-22, r=GuillaumeGomez

rustdoc-search: use tighter json for names and parents

File size
---------

```console
$ du -hs doc.old/search-index1.82.0.js doc/search-index1.82.0.js
3.2M    doc.old/search-index1.82.0.js
2.8M    doc/search-index1.82.0.js
$ gzip doc/search-index1.82.0.js
$ gzip doc.old/search-index1.82.0.js
$ du -hs doc.old/search-index1.82.0.js.gz doc/search-index1.82.0.js.gz
464K    doc.old/search-index1.82.0.js.gz
456K    doc/search-index1.82.0.js.gz
$ du -hs compiler-doc.old/search-index.js compiler-doc/search-index.js
8.5M    compiler-doc.old/search-index.js
6.5M    compiler-doc/search-index.js
$ gzip compiler-doc/search-index1.82.0.js
$ gzip compiler-doc.old/search-index1.82.0.js
$ du -hs compiler-doc.old/search-index.js.gz compiler-doc/search-index.js.gz
1.4M    compiler-doc.old/search-index.js.gz
1.4M    compiler-doc/search-index.js.gz
```

Performance
-----------

Firefox profile: [before](https://profiler.firefox.com/public/jf1741wycma0n38asdf7kdtw8egs0pqakbr03jg/calltree/?globalTrackOrder=0w3&implementation=js&thread=3&v=10), [after](https://profiler.firefox.com/public/p4fptad7vncsfgrgk9a18yx7m6w8kdpgfy15f8r/calltree/?globalTrackOrder=0w3&implementation=js&thread=3&v=10)

CLI profiler scripts comparison: https://notriddle.com/rustdoc-html-demo-9/smaller-index-2024-08-22/index.html

| Benchmark | Before     | After      | % Diff
| --------- | ----------:| ----------:| ------:
| arti      | 225692 KiB | 218744 KiB | 3%
| cortex-m  | 58276 KiB  | 57852 KiB  | 0%
| sqlx      | 123132 KiB | 125448 KiB | -2%
| stm32f4   | 556828 KiB | 548996 KiB | 1%
| ripgrep   | 86964 KiB  | 86180 KiB  | 1%
This commit is contained in:
Guillaume Gomez 2024-08-23 12:32:17 +02:00 committed by GitHub
commit db65b6c544
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 40 additions and 13 deletions

View File

@ -579,12 +579,14 @@ pub(crate) fn build_index<'tcx>(
let mut names = Vec::with_capacity(self.items.len());
let mut types = String::with_capacity(self.items.len());
let mut full_paths = Vec::with_capacity(self.items.len());
let mut parents = Vec::with_capacity(self.items.len());
let mut parents = String::with_capacity(self.items.len());
let mut parents_backref_queue = VecDeque::new();
let mut functions = String::with_capacity(self.items.len());
let mut deprecated = Vec::with_capacity(self.items.len());
let mut backref_queue = VecDeque::new();
let mut type_backref_queue = VecDeque::new();
let mut last_name = None;
for (index, item) in self.items.iter().enumerate() {
let n = item.ty as u8;
let c = char::try_from(n + b'A').expect("item types must fit in ASCII");
@ -597,17 +599,39 @@ pub(crate) fn build_index<'tcx>(
"`{}` is missing idx",
item.name
);
// 0 is a sentinel, everything else is one-indexed
parents.push(item.parent_idx.map(|x| x + 1).unwrap_or(0));
assert!(
parents_backref_queue.len() <= 16,
"the string encoding only supports 16 slots of lookback"
);
let parent: i32 = item.parent_idx.map(|x| x + 1).unwrap_or(0).try_into().unwrap();
if let Some(idx) = parents_backref_queue.iter().position(|p: &i32| *p == parent) {
parents.push(
char::try_from('0' as u32 + u32::try_from(idx).unwrap())
.expect("last possible value is '?'"),
);
} else if parent == 0 {
write_vlqhex_to_string(parent, &mut parents);
} else {
parents_backref_queue.push_front(parent);
write_vlqhex_to_string(parent, &mut parents);
if parents_backref_queue.len() > 16 {
parents_backref_queue.pop_back();
}
}
names.push(item.name.as_str());
if Some(item.name.as_str()) == last_name {
names.push("");
} else {
names.push(item.name.as_str());
last_name = Some(item.name.as_str());
}
if !item.path.is_empty() {
full_paths.push((index, &item.path));
}
match &item.search_type {
Some(ty) => ty.write_to_string(&mut functions, &mut backref_queue),
Some(ty) => ty.write_to_string(&mut functions, &mut type_backref_queue),
None => functions.push('`'),
}

View File

@ -3546,7 +3546,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
// Used to de-duplicate inlined and re-exported stuff
const itemReexports = new Map(crateCorpus.r);
// an array of (Number) the parent path index + 1 to `paths`, or 0 if none
const itemParentIdxs = crateCorpus.i;
const itemParentIdxDecoder = new VlqHexDecoder(crateCorpus.i, noop => noop);
// a map Number, string for impl disambiguators
const implDisambiguator = new Map(crateCorpus.b);
// an array of [(Number) item type,
@ -3593,6 +3593,8 @@ ${item.displayPath}<span class="${type}">${name}</span>\
// faster analysis operations
lastPath = "";
len = itemTypes.length;
let lastName = "";
let lastWord = "";
for (let i = 0; i < len; ++i) {
const bitIndex = i + 1;
if (descIndex >= descShard.len &&
@ -3608,10 +3610,8 @@ ${item.displayPath}<span class="${type}">${name}</span>\
descIndex = 0;
descShardList.push(descShard);
}
let word = "";
if (typeof itemNames[i] === "string") {
word = itemNames[i].toLowerCase();
}
const name = itemNames[i] === "" ? lastName : itemNames[i];
const word = itemNames[i] === "" ? lastWord : itemNames[i].toLowerCase();
const path = itemPaths.has(i) ? itemPaths.get(i) : lastPath;
const type = itemFunctionDecoder.next();
if (type !== null) {
@ -3633,15 +3633,16 @@ ${item.displayPath}<span class="${type}">${name}</span>\
}
// This object should have exactly the same set of fields as the "crateRow"
// object defined above.
const itemParentIdx = itemParentIdxDecoder.next();
const row = {
crate,
ty: itemTypes.charCodeAt(i) - 65, // 65 = "A"
name: itemNames[i],
name,
path,
descShard,
descIndex,
exactPath: itemReexports.has(i) ? itemPaths.get(itemReexports.get(i)) : path,
parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined,
parent: itemParentIdx > 0 ? paths[itemParentIdx - 1] : undefined,
type,
id,
word,
@ -3655,6 +3656,8 @@ ${item.displayPath}<span class="${type}">${name}</span>\
if (!searchIndexEmptyDesc.get(crate).contains(bitIndex)) {
descIndex += 1;
}
lastName = name;
lastWord = word;
}
if (aliases) {