rustdoc-search: parse and search with ML-style HOF

Option::map, for example, looks like this:

    option<t>, (t -> u) -> option<u>

This syntax searches all of the HOFs in Rust: traits Fn, FnOnce,
and FnMut, and bare fn primitives.
This commit is contained in:
Michael Howell 2024-01-06 13:17:51 -07:00
parent 5aad51d015
commit 7f427f86bd
7 changed files with 649 additions and 49 deletions

View File

@ -63,11 +63,12 @@ Before describing the syntax in more detail, here's a few sample searches of
the standard library and functions that are included in the results list:
| Query | Results |
|-------|--------|
|-------|---------|
| [`usize -> vec`][] | `slice::repeat` and `Vec::with_capacity` |
| [`vec, vec -> bool`][] | `Vec::eq` |
| [`option<T>, fnonce -> option<U>`][] | `Option::map` and `Option::and_then` |
| [`option<T>, fnonce -> option<T>`][] | `Option::filter` and `Option::inspect` |
| [`option<T>, (fnonce (T) -> bool) -> option<T>`][optionfilter] | `Option::filter` |
| [`option<T>, (T -> bool) -> option<T>`][optionfilter2] | `Option::filter` |
| [`option -> default`][] | `Option::unwrap_or_default` |
| [`stdout, [u8]`][stdoutu8] | `Stdout::write` |
| [`any -> !`][] | `panic::panic_any` |
@ -77,7 +78,8 @@ the standard library and functions that are included in the results list:
[`usize -> vec`]: ../../std/vec/struct.Vec.html?search=usize%20-%3E%20vec&filter-crate=std
[`vec, vec -> bool`]: ../../std/vec/struct.Vec.html?search=vec,%20vec%20-%3E%20bool&filter-crate=std
[`option<T>, fnonce -> option<U>`]: ../../std/vec/struct.Vec.html?search=option<T>%2C%20fnonce%20->%20option<U>&filter-crate=std
[`option<T>, fnonce -> option<T>`]: ../../std/vec/struct.Vec.html?search=option<T>%2C%20fnonce%20->%20option<T>&filter-crate=std
[optionfilter]: ../../std/vec/struct.Vec.html?search=option<T>%2C+(fnonce+(T)+->+bool)+->+option<T>&filter-crate=std
[optionfilter2]: ../../std/vec/struct.Vec.html?search=option<T>%2C+(T+->+bool)+->+option<T>&filter-crate=std
[`option -> default`]: ../../std/vec/struct.Vec.html?search=option%20-%3E%20default&filter-crate=std
[`any -> !`]: ../../std/vec/struct.Vec.html?search=any%20-%3E%20!&filter-crate=std
[stdoutu8]: ../../std/vec/struct.Vec.html?search=stdout%2C%20[u8]&filter-crate=std

View File

@ -4,6 +4,7 @@ use std::collections::{BTreeMap, VecDeque};
use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
use rustc_middle::ty::TyCtxt;
use rustc_span::def_id::DefId;
use rustc_span::sym;
use rustc_span::symbol::Symbol;
use serde::ser::{Serialize, SerializeSeq, SerializeStruct, Serializer};
use thin_vec::ThinVec;
@ -566,6 +567,7 @@ fn get_index_type_id(
// The type parameters are converted to generics in `simplify_fn_type`
clean::Slice(_) => Some(RenderTypeId::Primitive(clean::PrimitiveType::Slice)),
clean::Array(_, _) => Some(RenderTypeId::Primitive(clean::PrimitiveType::Array)),
clean::BareFunction(_) => Some(RenderTypeId::Primitive(clean::PrimitiveType::Fn)),
clean::Tuple(ref n) if n.is_empty() => {
Some(RenderTypeId::Primitive(clean::PrimitiveType::Unit))
}
@ -584,7 +586,7 @@ fn get_index_type_id(
}
}
// Not supported yet
clean::BareFunction(_) | clean::Generic(_) | clean::ImplTrait(_) | clean::Infer => None,
clean::Generic(_) | clean::ImplTrait(_) | clean::Infer => None,
}
}
@ -785,6 +787,42 @@ fn simplify_fn_type<'tcx, 'a>(
);
}
res.push(get_index_type(arg, ty_generics, rgen));
} else if let Type::BareFunction(ref bf) = *arg {
let mut ty_generics = Vec::new();
for ty in bf.decl.inputs.values.iter().map(|arg| &arg.type_) {
simplify_fn_type(
self_,
generics,
ty,
tcx,
recurse + 1,
&mut ty_generics,
rgen,
is_return,
cache,
);
}
// The search index, for simplicity's sake, represents fn pointers and closures
// the same way: as a tuple for the parameters, and an associated type for the
// return type.
let mut ty_output = Vec::new();
simplify_fn_type(
self_,
generics,
&bf.decl.output,
tcx,
recurse + 1,
&mut ty_output,
rgen,
is_return,
cache,
);
let ty_bindings = vec![(RenderTypeId::AssociatedType(sym::Output), ty_output)];
res.push(RenderType {
id: get_index_type_id(&arg, rgen),
bindings: Some(ty_bindings),
generics: Some(ty_generics),
});
} else {
// This is not a type parameter. So for example if we have `T, U: Option<T>`, and we're
// looking at `Option`, we enter this "else" condition, otherwise if it's `T`, we don't.

View File

@ -272,6 +272,22 @@ function initSearch(rawSearchIndex) {
* Special type name IDs for searching by both tuple and unit (`()` syntax).
*/
let typeNameIdOfTupleOrUnit;
/**
* Special type name IDs for searching `fn`.
*/
let typeNameIdOfFn;
/**
* Special type name IDs for searching `fnmut`.
*/
let typeNameIdOfFnMut;
/**
* Special type name IDs for searching `fnonce`.
*/
let typeNameIdOfFnOnce;
/**
* Special type name IDs for searching higher order functions (`->` syntax).
*/
let typeNameIdOfHof;
/**
* Add an item to the type Name->ID map, or, if one already exists, use it.
@ -464,6 +480,21 @@ function initSearch(rawSearchIndex) {
}
}
function makePrimitiveElement(name, extra) {
return Object.assign({
name,
id: null,
fullPath: [name],
pathWithoutLast: [],
pathLast: name,
normalizedPathLast: name,
generics: [],
bindings: new Map(),
typeFilter: "primitive",
bindingName: null,
}, extra);
}
/**
* @param {ParsedQuery} query
* @param {ParserState} parserState
@ -501,18 +532,7 @@ function initSearch(rawSearchIndex) {
}
const bindingName = parserState.isInBinding;
parserState.isInBinding = null;
return {
name: "never",
id: null,
fullPath: ["never"],
pathWithoutLast: [],
pathLast: "never",
normalizedPathLast: "never",
generics: [],
bindings: new Map(),
typeFilter: "primitive",
bindingName,
};
return makePrimitiveElement("never", { bindingName });
}
const quadcolon = /::\s*::/.exec(path);
if (path.startsWith("::")) {
@ -671,28 +691,19 @@ function initSearch(rawSearchIndex) {
let start = parserState.pos;
let end;
if ("[(".indexOf(parserState.userQuery[parserState.pos]) !== -1) {
let endChar = ")";
let name = "()";
let friendlyName = "tuple";
let endChar = ")";
let name = "()";
let friendlyName = "tuple";
if (parserState.userQuery[parserState.pos] === "[") {
endChar = "]";
name = "[]";
friendlyName = "slice";
}
if (parserState.userQuery[parserState.pos] === "[") {
endChar = "]";
name = "[]";
friendlyName = "slice";
}
parserState.pos += 1;
const { foundSeparator } = getItemsBefore(query, parserState, generics, endChar);
const typeFilter = parserState.typeFilter;
const isInBinding = parserState.isInBinding;
if (typeFilter !== null && typeFilter !== "primitive") {
throw [
"Invalid search type: primitive ",
name,
" and ",
typeFilter,
" both specified",
];
}
const bindingName = parserState.isInBinding;
parserState.typeFilter = null;
parserState.isInBinding = null;
for (const gen of generics) {
@ -702,23 +713,26 @@ if (parserState.userQuery[parserState.pos] === "[") {
}
if (name === "()" && !foundSeparator && generics.length === 1 && typeFilter === null) {
elems.push(generics[0]);
} else if (name === "()" && generics.length === 1 && generics[0].name === "->") {
// `primitive:(a -> b)` parser to `primitive:"->"<output=b, (a,)>`
// not `primitive:"()"<"->"<output=b, (a,)>>`
generics[0].typeFilter = typeFilter;
elems.push(generics[0]);
} else {
if (typeFilter !== null && typeFilter !== "primitive") {
throw [
"Invalid search type: primitive ",
name,
" and ",
typeFilter,
" both specified",
];
}
parserState.totalElems += 1;
if (isInGenerics) {
parserState.genericsElems += 1;
}
elems.push({
name: name,
id: null,
fullPath: [name],
pathWithoutLast: [],
pathLast: name,
normalizedPathLast: name,
generics,
bindings: new Map(),
typeFilter: "primitive",
bindingName: isInBinding,
});
elems.push(makePrimitiveElement(name, { bindingName, generics }));
}
} else {
const isStringElem = parserState.userQuery[start] === "\"";
@ -805,6 +819,19 @@ if (parserState.userQuery[parserState.pos] === "[") {
const oldIsInBinding = parserState.isInBinding;
parserState.isInBinding = null;
// ML-style Higher Order Function notation
//
// a way to search for any closure or fn pointer regardless of
// which closure trait is used
//
// Looks like this:
//
// `option<t>, (t -> u) -> option<u>`
// ^^^^^^
//
// The Rust-style closure notation is implemented in getNextElem
let hofParameters = null;
let extra = "";
if (endChar === ">") {
extra = "<";
@ -825,6 +852,21 @@ if (parserState.userQuery[parserState.pos] === "[") {
throw ["Unexpected ", endChar, " after ", "="];
}
break;
} else if (endChar !== "" && isReturnArrow(parserState)) {
// ML-style HOF notation only works when delimited in something,
// otherwise a function arrow starts the return type of the top
if (parserState.isInBinding) {
throw ["Unexpected ", "->", " after ", "="];
}
hofParameters = [...elems];
elems.length = 0;
parserState.pos += 2;
foundStopChar = true;
foundSeparator = false;
continue;
} else if (c === " ") {
parserState.pos += 1;
continue;
} else if (isSeparatorCharacter(c)) {
parserState.pos += 1;
foundStopChar = true;
@ -904,6 +946,27 @@ if (parserState.userQuery[parserState.pos] === "[") {
// in any case.
parserState.pos += 1;
if (hofParameters) {
// Commas in a HOF don't cause wrapping parens to become a tuple.
// If you want a one-tuple with a HOF in it, write `((a -> b),)`.
foundSeparator = false;
// HOFs can't have directly nested bindings.
if ([...elems, ...hofParameters].some(x => x.bindingName) || parserState.isInBinding) {
throw ["Unexpected ", "=", " within ", "->"];
}
// HOFs are represented the same way closures are.
// The arguments are wrapped in a tuple, and the output
// is a binding, even though the compiler doesn't technically
// represent fn pointers that way.
const hofElem = makePrimitiveElement("->", {
generics: hofParameters,
bindings: new Map([["output", [...elems]]]),
typeFilter: null,
});
elems.length = 0;
elems[0] = hofElem;
}
parserState.typeFilter = oldTypeFilter;
parserState.isInBinding = oldIsInBinding;
@ -1635,6 +1698,12 @@ if (parserState.userQuery[parserState.pos] === "[") {
) {
// () matches primitive:tuple or primitive:unit
// if it matches, then we're fine, and this is an appropriate match candidate
} else if (queryElem.id === typeNameIdOfHof &&
(fnType.id === typeNameIdOfFn || fnType.id === typeNameIdOfFnMut ||
fnType.id === typeNameIdOfFnOnce)
) {
// -> matches fn, fnonce, and fnmut
// if it matches, then we're fine, and this is an appropriate match candidate
} else if (fnType.id !== queryElem.id || queryElem.id === null) {
return false;
}
@ -1829,6 +1898,7 @@ if (parserState.userQuery[parserState.pos] === "[") {
typePassesFilter(elem.typeFilter, row.ty) && elem.generics.length === 0 &&
// special case
elem.id !== typeNameIdOfArrayOrSlice && elem.id !== typeNameIdOfTupleOrUnit
&& elem.id !== typeNameIdOfHof
) {
return row.id === elem.id || checkIfInList(
row.generics,
@ -2991,7 +3061,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
*/
function buildFunctionTypeFingerprint(type, output, fps) {
let input = type.id;
// All forms of `[]`/`()` get collapsed down to one thing in the bloom filter.
// All forms of `[]`/`()`/`->` get collapsed down to one thing in the bloom filter.
// Differentiating between arrays and slices, if the user asks for it, is
// still done in the matching algorithm.
if (input === typeNameIdOfArray || input === typeNameIdOfSlice) {
@ -3000,6 +3070,10 @@ ${item.displayPath}<span class="${type}">${name}</span>\
if (input === typeNameIdOfTuple || input === typeNameIdOfUnit) {
input = typeNameIdOfTupleOrUnit;
}
if (input === typeNameIdOfFn || input === typeNameIdOfFnMut ||
input === typeNameIdOfFnOnce) {
input = typeNameIdOfHof;
}
// http://burtleburtle.net/bob/hash/integer.html
// ~~ is toInt32. It's used before adding, so
// the number stays in safe integer range.
@ -3103,6 +3177,10 @@ ${item.displayPath}<span class="${type}">${name}</span>\
typeNameIdOfUnit = buildTypeMapIndex("unit");
typeNameIdOfArrayOrSlice = buildTypeMapIndex("[]");
typeNameIdOfTupleOrUnit = buildTypeMapIndex("()");
typeNameIdOfFn = buildTypeMapIndex("fn");
typeNameIdOfFnMut = buildTypeMapIndex("fnmut");
typeNameIdOfFnOnce = buildTypeMapIndex("fnonce");
typeNameIdOfHof = buildTypeMapIndex("->");
// Function type fingerprints are 128-bit bloom filters that are used to
// estimate the distance between function and query.

View File

@ -114,7 +114,7 @@ const PARSED = [
original: "(p -> p",
returned: [],
userQuery: "(p -> p",
error: "Unexpected `-` after `(`",
error: "Unclosed `(`",
},
{
query: "::a::b",
@ -330,7 +330,7 @@ const PARSED = [
original: 'a<->',
returned: [],
userQuery: 'a<->',
error: 'Unexpected `-` after `<`',
error: 'Unclosed `<`',
},
{
query: "a<a>:",

View File

@ -0,0 +1,376 @@
const PARSED = [
{
query: "(-> F<P>)",
elems: [{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [],
bindings: [
[
"output",
[{
name: "f",
fullPath: ["f"],
pathWithoutLast: [],
pathLast: "f",
generics: [
{
name: "p",
fullPath: ["p"],
pathWithoutLast: [],
pathLast: "p",
generics: [],
},
],
typeFilter: -1,
}],
],
],
typeFilter: -1,
}],
foundElems: 1,
original: "(-> F<P>)",
returned: [],
userQuery: "(-> f<p>)",
error: null,
},
{
query: "(-> P)",
elems: [{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [],
bindings: [
[
"output",
[{
name: "p",
fullPath: ["p"],
pathWithoutLast: [],
pathLast: "p",
generics: [],
typeFilter: -1,
}],
],
],
typeFilter: -1,
}],
foundElems: 1,
original: "(-> P)",
returned: [],
userQuery: "(-> p)",
error: null,
},
{
query: "(->,a)",
elems: [{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [],
bindings: [
[
"output",
[{
name: "a",
fullPath: ["a"],
pathWithoutLast: [],
pathLast: "a",
generics: [],
typeFilter: -1,
}],
],
],
typeFilter: -1,
}],
foundElems: 1,
original: "(->,a)",
returned: [],
userQuery: "(->,a)",
error: null,
},
{
query: "(F<P> ->)",
elems: [{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [{
name: "f",
fullPath: ["f"],
pathWithoutLast: [],
pathLast: "f",
generics: [
{
name: "p",
fullPath: ["p"],
pathWithoutLast: [],
pathLast: "p",
generics: [],
},
],
typeFilter: -1,
}],
bindings: [
[
"output",
[],
],
],
typeFilter: -1,
}],
foundElems: 1,
original: "(F<P> ->)",
returned: [],
userQuery: "(f<p> ->)",
error: null,
},
{
query: "(P ->)",
elems: [{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [{
name: "p",
fullPath: ["p"],
pathWithoutLast: [],
pathLast: "p",
generics: [],
typeFilter: -1,
}],
bindings: [
[
"output",
[],
],
],
typeFilter: -1,
}],
foundElems: 1,
original: "(P ->)",
returned: [],
userQuery: "(p ->)",
error: null,
},
{
query: "(,a->)",
elems: [{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [{
name: "a",
fullPath: ["a"],
pathWithoutLast: [],
pathLast: "a",
generics: [],
typeFilter: -1,
}],
bindings: [
[
"output",
[],
],
],
typeFilter: -1,
}],
foundElems: 1,
original: "(,a->)",
returned: [],
userQuery: "(,a->)",
error: null,
},
{
query: "(aaaaa->a)",
elems: [{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [{
name: "aaaaa",
fullPath: ["aaaaa"],
pathWithoutLast: [],
pathLast: "aaaaa",
generics: [],
typeFilter: -1,
}],
bindings: [
[
"output",
[{
name: "a",
fullPath: ["a"],
pathWithoutLast: [],
pathLast: "a",
generics: [],
typeFilter: -1,
}],
],
],
typeFilter: -1,
}],
foundElems: 1,
original: "(aaaaa->a)",
returned: [],
userQuery: "(aaaaa->a)",
error: null,
},
{
query: "(aaaaa, b -> a)",
elems: [{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [
{
name: "aaaaa",
fullPath: ["aaaaa"],
pathWithoutLast: [],
pathLast: "aaaaa",
generics: [],
typeFilter: -1,
},
{
name: "b",
fullPath: ["b"],
pathWithoutLast: [],
pathLast: "b",
generics: [],
typeFilter: -1,
},
],
bindings: [
[
"output",
[{
name: "a",
fullPath: ["a"],
pathWithoutLast: [],
pathLast: "a",
generics: [],
typeFilter: -1,
}],
],
],
typeFilter: -1,
}],
foundElems: 1,
original: "(aaaaa, b -> a)",
returned: [],
userQuery: "(aaaaa, b -> a)",
error: null,
},
{
query: "primitive:(aaaaa, b -> a)",
elems: [{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [
{
name: "aaaaa",
fullPath: ["aaaaa"],
pathWithoutLast: [],
pathLast: "aaaaa",
generics: [],
typeFilter: -1,
},
{
name: "b",
fullPath: ["b"],
pathWithoutLast: [],
pathLast: "b",
generics: [],
typeFilter: -1,
},
],
bindings: [
[
"output",
[{
name: "a",
fullPath: ["a"],
pathWithoutLast: [],
pathLast: "a",
generics: [],
typeFilter: -1,
}],
],
],
typeFilter: 1,
}],
foundElems: 1,
original: "primitive:(aaaaa, b -> a)",
returned: [],
userQuery: "primitive:(aaaaa, b -> a)",
error: null,
},
{
query: "x, trait:(aaaaa, b -> a)",
elems: [
{
name: "x",
fullPath: ["x"],
pathWithoutLast: [],
pathLast: "x",
generics: [],
typeFilter: -1,
},
{
name: "->",
fullPath: ["->"],
pathWithoutLast: [],
pathLast: "->",
generics: [
{
name: "aaaaa",
fullPath: ["aaaaa"],
pathWithoutLast: [],
pathLast: "aaaaa",
generics: [],
typeFilter: -1,
},
{
name: "b",
fullPath: ["b"],
pathWithoutLast: [],
pathLast: "b",
generics: [],
typeFilter: -1,
},
],
bindings: [
[
"output",
[{
name: "a",
fullPath: ["a"],
pathWithoutLast: [],
pathLast: "a",
generics: [],
typeFilter: -1,
}],
],
],
typeFilter: 10,
}
],
foundElems: 2,
original: "x, trait:(aaaaa, b -> a)",
returned: [],
userQuery: "x, trait:(aaaaa, b -> a)",
error: null,
},
];

94
tests/rustdoc-js/hof.js Normal file
View File

@ -0,0 +1,94 @@
// exact-check
const EXPECTED = [
// ML-style higher-order function notation
{
'query': 'bool, (u32 -> !) -> ()',
'others': [
{"path": "hof", "name": "fn_ptr"},
],
},
{
'query': 'u8, (u32 -> !) -> ()',
'others': [
{"path": "hof", "name": "fn_once"},
],
},
{
'query': 'i8, (u32 -> !) -> ()',
'others': [
{"path": "hof", "name": "fn_mut"},
],
},
{
'query': 'char, (u32 -> !) -> ()',
'others': [
{"path": "hof", "name": "fn_"},
],
},
{
'query': '(first<u32> -> !) -> ()',
'others': [
{"path": "hof", "name": "fn_ptr"},
],
},
{
'query': '(second<u32> -> !) -> ()',
'others': [
{"path": "hof", "name": "fn_once"},
],
},
{
'query': '(third<u32> -> !) -> ()',
'others': [
{"path": "hof", "name": "fn_mut"},
],
},
{
'query': '(u32 -> !) -> ()',
'others': [
{"path": "hof", "name": "fn_"},
{"path": "hof", "name": "fn_ptr"},
{"path": "hof", "name": "fn_mut"},
{"path": "hof", "name": "fn_once"},
],
},
{
'query': 'u32 -> !',
// not a HOF query
'others': [],
},
{
'query': '(str, str -> i8) -> ()',
'others': [
{"path": "hof", "name": "multiple"},
],
},
{
'query': '(str ->) -> ()',
'others': [
{"path": "hof", "name": "multiple"},
],
},
{
'query': '(-> i8) -> ()',
'others': [
{"path": "hof", "name": "multiple"},
],
},
{
'query': '(str -> str) -> ()',
// params and return are not the same
'others': [],
},
{
'query': '(i8 ->) -> ()',
// params and return are not the same
'others': [],
},
{
'query': '(-> str) -> ()',
// params and return are not the same
'others': [],
},
];

12
tests/rustdoc-js/hof.rs Normal file
View File

@ -0,0 +1,12 @@
#![feature(never_type)]
pub struct First<T>(T);
pub struct Second<T>(T);
pub struct Third<T>(T);
pub fn fn_ptr(_: fn (First<u32>) -> !, _: bool) {}
pub fn fn_once(_: impl FnOnce (Second<u32>) -> !, _: u8) {}
pub fn fn_mut(_: impl FnMut (Third<u32>) -> !, _: i8) {}
pub fn fn_(_: impl Fn (u32) -> !, _: char) {}
pub fn multiple(_: impl Fn(&'static str, &'static str) -> i8) {}