rustdoc-search: fix bugs when unboxing and reordering combine

This commit is contained in:
Michael Howell 2023-09-09 16:49:29 -07:00
parent 9ccb217422
commit 269cb57947
6 changed files with 393 additions and 253 deletions

View File

@ -3,6 +3,17 @@
"use strict";
// polyfill
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/toSpliced
if (!Array.prototype.toSpliced) {
// Can't use arrow functions, because we want `this`
Array.prototype.toSpliced = function() {
const me = this.slice();
Array.prototype.splice.apply(me, arguments);
return me;
};
}
(function() {
// This mapping table should match the discriminants of
// `rustdoc::formats::item_type::ItemType` type in Rust.
@ -1301,278 +1312,311 @@ function initSearch(rawSearchIndex) {
* This function checks generics in search query `queryElem` can all be found in the
* search index (`fnType`),
*
* @param {FunctionType} fnType - The object to check.
* @param {QueryElement} queryElem - The element from the parsed query.
* @param {[FunctionType]} whereClause - Trait bounds for generic items.
* @param {Map<number,number>|null} mgensIn - Map functions generics to query generics.
* This function returns `true` if it matches, and also writes the results to mgensInout.
* It returns `false` if no match is found, and leaves mgensInout untouched.
*
* @param {FunctionType} fnType - The object to check.
* @param {QueryElement} queryElem - The element from the parsed query.
* @param {[FunctionType]} whereClause - Trait bounds for generic items.
* @param {Map<number,number>|null} mgensInout - Map functions generics to query generics.
*
* @return {boolean} - Returns true if a match, false otherwise.
*/
function checkGenerics(fnType, queryElem, whereClause, mgensIn) {
return unifyFunctionTypes(fnType.generics, queryElem.generics, whereClause, mgensIn);
function checkGenerics(fnType, queryElem, whereClause, mgensInout) {
return unifyFunctionTypes(
fnType.generics,
queryElem.generics,
whereClause,
mgensInout,
mgens => {
if (mgensInout) {
for (const [fid, qid] of mgens.entries()) {
mgensInout.set(fid, qid);
}
}
return true;
}
);
}
/**
* This function checks if a list of search query `queryElems` can all be found in the
* search index (`fnTypes`).
*
* @param {Array<FunctionType>} fnTypes - The objects to check.
* @param {Array<QueryElement>} queryElems - The elements from the parsed query.
* @param {[FunctionType]} whereClause - Trait bounds for generic items.
* @param {Map<number,number>|null} mgensIn - Map function generics to query generics.
* This function returns `true` on a match, or `false` if none. If `solutionCb` is
* supplied, it will call that function with mgens, and that callback can accept or
* reject the result bu returning `true` or `false`. If the callback returns false,
* then this function will try with a different solution, or bail with false if it
* runs out of candidates.
*
* @param {Array<FunctionType>} fnTypes - The objects to check.
* @param {Array<QueryElement>} queryElems - The elements from the parsed query.
* @param {[FunctionType]} whereClause - Trait bounds for generic items.
* @param {Map<number,number>|null} mgensIn
* - Map functions generics to query generics (never modified).
* @param {null|Map<number,number> -> bool} solutionCb - Called for each `mgens` solution.
*
* @return {boolean} - Returns true if a match, false otherwise.
*/
function unifyFunctionTypes(fnTypes, queryElems, whereClause, mgensIn) {
// This search engine implements order-agnostic unification. There
// should be no missing duplicates (generics have "bag semantics"),
// and the row is allowed to have extras.
function unifyFunctionTypes(fnTypesIn, queryElems, whereClause, mgensIn, solutionCb) {
/**
* @type Map<integer, integer>
*/
let mgens = new Map(mgensIn);
if (queryElems.length === 0) {
return true;
return !solutionCb || solutionCb(mgens);
}
if (!fnTypes || fnTypes.length === 0) {
if (!fnTypesIn || fnTypesIn.length === 0) {
return false;
}
const ql = queryElems.length;
let fl = fnTypesIn.length;
/**
* @type Map<integer, QueryElement[]>
* @type Array<FunctionType>
*/
const queryElemSet = new Map();
const mgens = new Map(mgensIn);
const addQueryElemToQueryElemSet = queryElem => {
let currentQueryElemList;
const qid = queryElem.id;
if (queryElemSet.has(qid)) {
currentQueryElemList = queryElemSet.get(qid);
} else {
currentQueryElemList = [];
queryElemSet.set(qid, currentQueryElemList);
}
currentQueryElemList.push(queryElem);
};
for (const queryElem of queryElems) {
addQueryElemToQueryElemSet(queryElem);
}
let fnTypes = fnTypesIn.slice();
/**
* @type Map<integer, FunctionType[]>
* loop works by building up a solution set in the working arrays
* fnTypes gets mutated in place to make this work, while queryElems
* is left alone
*
* vvvvvvv `i` points here
* queryElems = [ good, good, good, unknown, unknown ],
* fnTypes = [ good, good, good, unknown, unknown ],
* ---------------- ^^^^^^^^^^^^^^^^ `j` iterates after `i`,
* | looking for candidates
* everything before `i` is the
* current working solution
*
* Everything in the current working solution is known to be a good
* match, but it might not be the match we wind up going with, because
* there might be more than one candidate match, and we need to try them all
* before giving up. So, to handle this, it backtracks on failure.
*
* @type Array<{
* "fnTypesScratch": Array<FunctionType>,
* "queryElemsOffset": integer,
* "fnTypesOffset": integer
* }>
*/
const fnTypeSet = new Map();
/**
* If `fnType` is a concrete type with generics, replace it with its generics.
*
* If `fnType` is a generic type parameter, replace it with any traits that it's
* constrained by. This converts `T where T: Trait` into the same representation
* that's directly used by return-position `impl Trait` and by `dyn Trait` in
* all positions, so you can directly write `-> Trait` and get all three.
*
* This seems to correspond to two transforms described in
* http://ndmitchell.com/downloads/slides-hoogle_fast_type_searching-09_aug_2008.pdf
* One of them is actual unboxing (turning `Maybe a` into `a`), while the other is
* equivalent to converting a type parameter into an existential, which Hoogle doesn't
* seem to actually do? I notice that these two searches produce the same result:
* https://hoogle.haskell.org/?hoogle=Eq+-%3E+Set+Eq+-%3E+Set+Eq
* https://hoogle.haskell.org/?hoogle=Ord+-%3E+Set+Ord+-%3E+Set+Ord
*
* Haskell is a bit of a foreign language to me. I just want to be able to look up
* Option combinators without having to actually remember their (mostly arbitrary)
* names, and Haskell claims to already have that problem solved...
*
* @type Map<integer, FunctionType[]>
*/
const unbox = function unbox(fnType) {
if (fnType.id < 0) {
const genid = (-fnType.id) - 1;
if (whereClause && whereClause[genid]) {
for (const trait of whereClause[genid]) {
addFnTypeToFnTypeSet(trait);
const backtracking = [];
let i = 0;
let j = 0;
const backtrack = () => {
while (backtracking.length !== 0) {
// this session failed, but there are other possible solutions
// to backtrack, reset to (a copy of) the old array, do the swap or unboxing
const {
fnTypesScratch,
mgensScratch,
queryElemsOffset,
fnTypesOffset,
unbox,
} = backtracking.pop();
mgens = new Map(mgensScratch);
const fnType = fnTypesScratch[fnTypesOffset];
const queryElem = queryElems[queryElemsOffset];
if (unbox) {
if (fnType.id < 0) {
if (mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) {
continue;
}
mgens.set(fnType.id, 0);
}
const generics = fnType.id < 0 ?
whereClause[(-fnType.id) - 1] :
fnType.generics;
fnTypes = fnTypesScratch.toSpliced(fnTypesOffset, 1, ...generics);
fl = fnTypes.length;
// re-run the matching algorithm on this item
i = queryElemsOffset - 1;
} else {
if (fnType.id < 0) {
if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) {
continue;
}
mgens.set(fnType.id, queryElem.id);
}
fnTypes = fnTypesScratch.slice();
fl = fnTypes.length;
const tmp = fnTypes[queryElemsOffset];
fnTypes[queryElemsOffset] = fnTypes[fnTypesOffset];
fnTypes[fnTypesOffset] = tmp;
// this is known as a good match; go to the next one
i = queryElemsOffset;
}
mgens.set(fnType.id, 0);
} else {
for (const innerFnType of fnType.generics) {
addFnTypeToFnTypeSet(innerFnType);
return true;
}
return false;
};
for (i = 0; i !== ql; ++i) {
const queryElem = queryElems[i];
/**
* list of potential function types that go with the current query element.
* @type Array<integer>
*/
const matchCandidates = [];
let fnTypesScratch = null;
let mgensScratch = null;
// don't try anything before `i`, because they've already been
// paired off with the other query elements
for (j = i; j !== fl; ++j) {
const fnType = fnTypes[j];
if (unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) {
if (!fnTypesScratch) {
fnTypesScratch = fnTypes.slice();
}
unifyFunctionTypes(
fnType.generics,
queryElem.generics,
whereClause,
mgens,
mgensScratch => {
matchCandidates.push({
fnTypesScratch,
mgensScratch,
queryElemsOffset: i,
fnTypesOffset: j,
unbox: false,
});
return false; // "reject" all candidates to gather all of them
}
);
}
if (unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens)) {
if (!fnTypesScratch) {
fnTypesScratch = fnTypes.slice();
}
if (!mgensScratch) {
mgensScratch = new Map(mgens);
}
backtracking.push({
fnTypesScratch,
mgensScratch,
queryElemsOffset: i,
fnTypesOffset: j,
unbox: true,
});
}
}
};
/**
* @param {FunctionType} fnType
*/
const addFnTypeToFnTypeSet = function addFnTypeToFnTypeSet(fnType) {
const queryContainsArrayOrSliceElem = queryElemSet.has(typeNameIdOfArrayOrSlice);
// qsid is an index into the `queryElemSet`, while `fnType.id` is an index into
// the `fnTypeSet`. These are the same, except for generics, where they get mapped.
// If qsid = 0, it means the generic type parameter has undergone instance
// substitution, and
let qsid = fnType.id;
if (fnType.id < 0) {
if (mgens.has(fnType.id)) {
qsid = mgens.get(fnType.id);
if (matchCandidates.length === 0) {
if (backtrack()) {
continue;
} else {
qsid = null;
searching: for (const qid of queryElemSet.keys()) {
if (qid < 0) {
for (const qidMapped of mgens.values()) {
if (qid === qidMapped) {
continue searching;
}
}
mgens.set(fnType.id, qid);
qsid = qid;
return false;
}
}
// use the current candidate
const {fnTypesOffset: candidate, mgensScratch: mgensNew} = matchCandidates.pop();
if (fnTypes[candidate].id < 0 && queryElems[i].id < 0) {
mgens.set(fnTypes[candidate].id, queryElems[i].id);
}
for (const [fid, qid] of mgensNew) {
mgens.set(fid, qid);
}
// `i` and `j` are paired off
// `queryElems[i]` is left in place
// `fnTypes[j]` is swapped with `fnTypes[i]` to pair them off
const tmp = fnTypes[candidate];
fnTypes[candidate] = fnTypes[i];
fnTypes[i] = tmp;
// write other candidates to backtracking queue
for (const otherCandidate of matchCandidates) {
backtracking.push(otherCandidate);
}
// If we're on the last item, check the solution with the callback
// backtrack if the callback says its unsuitable
while (i === (ql - 1) && solutionCb && !solutionCb(mgens)) {
if (!backtrack()) {
return false;
}
}
}
return true;
}
function unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens) {
// type filters look like `trait:Read` or `enum:Result`
if (!typePassesFilter(queryElem.typeFilter, fnType.ty)) {
return false;
}
// fnType.id < 0 means generic
// queryElem.id < 0 does too
// mgens[fnType.id] = queryElem.id
// or, if mgens[fnType.id] = 0, then we've matched this generic with a bare trait
// and should make that same decision everywhere it appears
if (fnType.id < 0 && queryElem.id < 0) {
if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) {
return false;
}
for (const [fid, qid] of mgens.entries()) {
if (fnType.id !== fid && queryElem.id === qid) {
return false;
}
if (fnType.id === fid && queryElem.id !== qid) {
return false;
}
}
} else if (fnType.id !== null) {
if (queryElem.id === typeNameIdOfArrayOrSlice &&
(fnType.id === typeNameIdOfSlice || fnType.id === typeNameIdOfArray)
) {
// [] matches primitive:array or primitive:slice
// if it matches, then we're fine, and this is an appropriate match candidate
} else if (fnType.id !== queryElem.id) {
return false;
}
// If the query elem has generics, and the function doesn't,
// it can't match.
if (fnType.generics.length === 0 && queryElem.generics.length !== 0) {
return false;
}
// If the query element is a path (it contains `::`), we need to check if this
// path is compatible with the target type.
const queryElemPathLength = queryElem.pathWithoutLast.length;
if (queryElemPathLength > 0) {
const fnTypePath = fnType.path !== undefined && fnType.path !== null ?
fnType.path.split("::") : [];
// If the path provided in the query element is longer than this type,
// no need to check it since it won't match in any case.
if (queryElemPathLength > fnTypePath.length) {
return false;
}
let i = 0;
for (const path of fnTypePath) {
if (path === queryElem.pathWithoutLast[i]) {
i += 1;
if (i >= queryElemPathLength) {
break;
}
}
}
}
if (qsid === null || !(
queryElemSet.has(qsid) ||
(qsid === typeNameIdOfSlice && queryContainsArrayOrSliceElem) ||
(qsid === typeNameIdOfArray && queryContainsArrayOrSliceElem)
)) {
unbox(fnType);
return;
}
let currentQueryElemList = queryElemSet.get(qsid) || [];
let matchIdx = currentQueryElemList.findIndex(queryElem => {
return typePassesFilter(queryElem.typeFilter, fnType.ty) &&
checkGenerics(fnType, queryElem, whereClause, mgens);
});
if (matchIdx === -1 &&
(qsid === typeNameIdOfSlice || qsid === typeNameIdOfArray) &&
queryContainsArrayOrSliceElem
) {
currentQueryElemList = queryElemSet.get(typeNameIdOfArrayOrSlice) || [];
matchIdx = currentQueryElemList.findIndex(queryElem => {
return typePassesFilter(queryElem.typeFilter, fnType.ty) &&
checkGenerics(fnType, queryElem, whereClause, mgens);
});
}
// None of the query elems match the function type.
if (matchIdx === -1) {
unbox(fnType);
return;
}
let currentFnTypeList;
if (fnTypeSet.has(fnType.id)) {
currentFnTypeList = fnTypeSet.get(fnType.id);
} else {
currentFnTypeList = [];
fnTypeSet.set(fnType.id, currentFnTypeList);
}
currentFnTypeList.push(fnType);
};
for (const fnType of fnTypes) {
addFnTypeToFnTypeSet(fnType);
}
const doHandleQueryElemList = (currentFnTypeList, queryElemList) => {
if (queryElemList.length === 0) {
return true;
}
// Multiple items in one list might match multiple items in another.
// Since an item with fewer generics can match an item with more, we
// need to check all combinations for a potential match.
const queryElem = queryElemList.pop();
const l = currentFnTypeList.length;
for (let i = 0; i < l; i += 1) {
const fnType = currentFnTypeList[i];
if (!typePassesFilter(queryElem.typeFilter, fnType.ty)) {
continue;
}
const queryElemPathLength = queryElem.pathWithoutLast.length;
// If the query element is a path (it contains `::`), we need to check if this
// path is compatible with the target type.
if (queryElemPathLength > 0) {
const fnTypePath = fnType.path !== undefined && fnType.path !== null ?
fnType.path.split("::") : [];
// If the path provided in the query element is longer than this type,
// no need to check it since it won't match in any case.
if (queryElemPathLength > fnTypePath.length) {
continue;
}
let i = 0;
for (const path of fnTypePath) {
if (path === queryElem.pathWithoutLast[i]) {
i += 1;
if (i >= queryElemPathLength) {
break;
}
}
}
if (i < queryElemPathLength) {
// If we didn't find all parts of the path of the query element inside
// the fn type, then it's not the right one.
continue;
}
}
if (queryElem.generics.length === 0
|| checkGenerics(fnType, queryElem, whereClause, mgens)
) {
currentFnTypeList.splice(i, 1);
const result = doHandleQueryElemList(currentFnTypeList, queryElemList);
if (result) {
return true;
}
currentFnTypeList.splice(i, 0, fnType);
}
}
return false;
};
/**
* @param {number} id
* @param {[QueryElement]} queryElemList
*/
const handleQueryElemList = (id, queryElemList) => {
let fsid = id;
if (fsid < 0) {
fsid = null;
if (!mgens) {
if (i < queryElemPathLength) {
// If we didn't find all parts of the path of the query element inside
// the fn type, then it's not the right one.
return false;
}
for (const [fid, qsid] of mgens) {
if (id === qsid) {
fsid = fid;
break;
}
}
}
if (fsid === null || !fnTypeSet.has(fsid)) {
if (fsid === typeNameIdOfArrayOrSlice) {
return handleQueryElemList(typeNameIdOfSlice, queryElemList) ||
handleQueryElemList(typeNameIdOfArray, queryElemList);
}
return false;
}
const currentFnTypeList = fnTypeSet.get(fsid);
if (currentFnTypeList.length < queryElemList.length) {
// It's not possible for all the query elems to find a match.
return false;
}
const result = doHandleQueryElemList(currentFnTypeList, queryElemList);
if (result) {
// Found a solution.
// Any items that weren't used for it can be unboxed, and might form
// part of the solution for another item.
for (const innerFnType of currentFnTypeList) {
unbox(innerFnType);
}
fnTypeSet.delete(fsid);
}
return result;
};
let queryElemSetSize = Number.MAX_VALUE;
while (queryElemSetSize > queryElemSet.size) {
queryElemSetSize = queryElemSet.size;
for (const [id, queryElemList] of queryElemSet) {
if (handleQueryElemList(id, queryElemList)) {
queryElemSet.delete(id);
}
}
}
if (queryElemSetSize === 0) {
for (const [fid, qid] of mgens) {
mgensIn.set(fid, qid);
return true;
}
function unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens) {
if (fnType.id < 0 && queryElem.id >= 0) {
if (!whereClause) {
return false;
}
return true;
} else {
return false;
// mgens[fnType.id] === 0 indicates that we committed to unboxing this generic
// mgens[fnType.id] === null indicates that we haven't decided yet
if (mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) {
return false;
}
// This is only a potential unbox if the search query appears in the where clause
// for example, searching `Read -> usize` should find
// `fn read_all<R: Read>(R) -> Result<usize>`
// generic `R` is considered "unboxed"
return checkIfInList(whereClause[(-fnType.id) - 1], queryElem, whereClause);
} else if (fnType.generics && fnType.generics.length > 0) {
return checkIfInList(fnType.generics, queryElem, whereClause);
}
return false;
}
/**
@ -1907,23 +1951,19 @@ function initSearch(rawSearchIndex) {
}
// If the result is too "bad", we return false and it ends this search.
let mgens;
if (row.type.where_clause && row.type.where_clause.length > 0) {
mgens = new Map();
}
if (!unifyFunctionTypes(
row.type.inputs,
parsedQuery.elems,
row.type.where_clause,
mgens
)) {
return;
}
if (!unifyFunctionTypes(
row.type.output,
parsedQuery.returned,
row.type.where_clause,
mgens
null,
mgens => {
return unifyFunctionTypes(
row.type.output,
parsedQuery.returned,
row.type.where_clause,
mgens
);
}
)) {
return;
}
@ -2060,7 +2100,7 @@ function initSearch(rawSearchIndex) {
in_returned = row.type && unifyFunctionTypes(
row.type.output,
parsedQuery.returned,
row.type.where_clause, new Map()
row.type.where_clause
);
if (in_returned) {
addIntoResults(

View File

@ -1,3 +1,5 @@
// ignore-order
const FILTER_CRATE = "std";
const EXPECTED = [
@ -36,8 +38,9 @@ const EXPECTED = [
],
},
{
'query': 'option<t>, option<u> -> option<t, u>',
'query': 'option<t>, option<u> -> option<t>',
'others': [
{ 'path': 'std::option::Option', 'name': 'and' },
{ 'path': 'std::option::Option', 'name': 'zip' },
],
},

View File

@ -0,0 +1,22 @@
// ignore-order
const FILTER_CRATE = "std";
const EXPECTED = [
{
'query': 'vec::intoiter<T> -> [T]',
'others': [
{ 'path': 'std::vec::IntoIter', 'name': 'as_slice' },
{ 'path': 'std::vec::IntoIter', 'name': 'as_mut_slice' },
{ 'path': 'std::vec::IntoIter', 'name': 'next_chunk' },
],
},
{
'query': 'vec::intoiter<T> -> []',
'others': [
{ 'path': 'std::vec::IntoIter', 'name': 'as_slice' },
{ 'path': 'std::vec::IntoIter', 'name': 'as_mut_slice' },
{ 'path': 'std::vec::IntoIter', 'name': 'next_chunk' },
],
},
];

View File

@ -79,6 +79,7 @@ const EXPECTED = [
{ 'path': 'generics_match_ambiguity', 'name': 'baac' },
{ 'path': 'generics_match_ambiguity', 'name': 'baaf' },
{ 'path': 'generics_match_ambiguity', 'name': 'baag' },
{ 'path': 'generics_match_ambiguity', 'name': 'baah' },
],
},
{

View File

@ -0,0 +1,38 @@
// exact-check
const EXPECTED = [
{
'query': 'Inside<T> -> Out1<T>',
'others': [
{ 'path': 'generics_unbox', 'name': 'alpha' },
],
},
{
'query': 'Inside<T> -> Out3<T>',
'others': [
{ 'path': 'generics_unbox', 'name': 'beta' },
{ 'path': 'generics_unbox', 'name': 'gamma' },
],
},
{
'query': 'Inside<T> -> Out4<T>',
'others': [
{ 'path': 'generics_unbox', 'name': 'beta' },
{ 'path': 'generics_unbox', 'name': 'gamma' },
],
},
{
'query': 'Inside<T> -> Out3<U, T>',
'others': [
{ 'path': 'generics_unbox', 'name': 'beta' },
{ 'path': 'generics_unbox', 'name': 'gamma' },
],
},
{
'query': 'Inside<T> -> Out4<U, T>',
'others': [
{ 'path': 'generics_unbox', 'name': 'beta' },
{ 'path': 'generics_unbox', 'name': 'gamma' },
],
},
];

View File

@ -0,0 +1,36 @@
pub struct Out<A, B = ()> {
a: A,
b: B,
}
pub struct Out1<A, const N: usize> {
a: [A; N],
}
pub struct Out2<A, const N: usize> {
a: [A; N],
}
pub struct Out3<A, B> {
a: A,
b: B,
}
pub struct Out4<A, B> {
a: A,
b: B,
}
pub struct Inside<T>(T);
pub fn alpha<const N: usize, T>(_: Inside<T>) -> Out<Out1<T, N>, Out2<T, N>> {
loop {}
}
pub fn beta<T, U>(_: Inside<T>) -> Out<Out3<T, U>, Out4<U, T>> {
loop {}
}
pub fn gamma<T, U>(_: Inside<T>) -> Out<Out3<U, T>, Out4<T, U>> {
loop {}
}