Auto merge of #115286 - saethlin:detangler, r=petrochenkov

Skip rendering metadata strings from include_str!/include_bytes!

The const rendering code in rustdoc completely ignores consts from expansions, but the compiler was rendering all consts. So some consts (namely those from `include_bytes!`) were rendered then ignored.

Most of the diff here is from moving `print_const_expr` from rustdoc into `rustc_hir_pretty` so that it can be used in rustdoc and when building rmeta files.
This commit is contained in:
bors 2023-09-02 04:53:19 +00:00
commit 1fb6947abc
7 changed files with 107 additions and 113 deletions

View File

@ -42,6 +42,6 @@ pub mod locator;
pub use fs::{emit_wrapper_file, METADATA_FILENAME};
pub use native_libs::find_native_static_library;
pub use rmeta::{encode_metadata, EncodedMetadata, METADATA_HEADER};
pub use rmeta::{encode_metadata, rendered_const, EncodedMetadata, METADATA_HEADER};
fluent_messages! { "../messages.ftl" }

View File

@ -17,8 +17,8 @@ use rustc_hir::def_id::{
CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_ID, CRATE_DEF_INDEX, LOCAL_CRATE,
};
use rustc_hir::definitions::DefPathData;
use rustc_hir::intravisit;
use rustc_hir::lang_items::LangItem;
use rustc_hir_pretty::id_to_string;
use rustc_middle::middle::debugger_visualizer::DebuggerVisualizerFile;
use rustc_middle::middle::dependency_format::Linkage;
use rustc_middle::middle::exported_symbols::{
@ -1615,7 +1615,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
record!(self.tables.mir_const_qualif[def_id.to_def_id()] <- qualifs);
let body_id = tcx.hir().maybe_body_owned_by(def_id);
if let Some(body_id) = body_id {
let const_data = self.encode_rendered_const_for_body(body_id);
let const_data = rendered_const(self.tcx, body_id);
record!(self.tables.rendered_const[def_id.to_def_id()] <- const_data);
}
}
@ -1683,14 +1683,6 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
}
}
fn encode_rendered_const_for_body(&mut self, body_id: hir::BodyId) -> String {
let hir = self.tcx.hir();
let body = hir.body(body_id);
rustc_hir_pretty::to_string(&(&hir as &dyn intravisit::Map<'_>), |s| {
s.print_expr(&body.value)
})
}
#[instrument(level = "debug", skip(self))]
fn encode_info_for_macro(&mut self, def_id: LocalDefId) {
let tcx = self.tcx;
@ -2292,3 +2284,97 @@ pub fn provide(providers: &mut Providers) {
..*providers
}
}
/// Build a textual representation of an unevaluated constant expression.
///
/// If the const expression is too complex, an underscore `_` is returned.
/// For const arguments, it's `{ _ }` to be precise.
/// This means that the output is not necessarily valid Rust code.
///
/// Currently, only
///
/// * literals (optionally with a leading `-`)
/// * unit `()`
/// * blocks (`{ … }`) around simple expressions and
/// * paths without arguments
///
/// are considered simple enough. Simple blocks are included since they are
/// necessary to disambiguate unit from the unit type.
/// This list might get extended in the future.
///
/// Without this censoring, in a lot of cases the output would get too large
/// and verbose. Consider `match` expressions, blocks and deeply nested ADTs.
/// Further, private and `doc(hidden)` fields of structs would get leaked
/// since HIR datatypes like the `body` parameter do not contain enough
/// semantic information for this function to be able to hide them
/// at least not without significant performance overhead.
///
/// Whenever possible, prefer to evaluate the constant first and try to
/// use a different method for pretty-printing. Ideally this function
/// should only ever be used as a fallback.
pub fn rendered_const<'tcx>(tcx: TyCtxt<'tcx>, body: hir::BodyId) -> String {
let hir = tcx.hir();
let value = &hir.body(body).value;
#[derive(PartialEq, Eq)]
enum Classification {
Literal,
Simple,
Complex,
}
use Classification::*;
fn classify(expr: &hir::Expr<'_>) -> Classification {
match &expr.kind {
hir::ExprKind::Unary(hir::UnOp::Neg, expr) => {
if matches!(expr.kind, hir::ExprKind::Lit(_)) { Literal } else { Complex }
}
hir::ExprKind::Lit(_) => Literal,
hir::ExprKind::Tup([]) => Simple,
hir::ExprKind::Block(hir::Block { stmts: [], expr: Some(expr), .. }, _) => {
if classify(expr) == Complex { Complex } else { Simple }
}
// Paths with a self-type or arguments are too “complex” following our measure since
// they may leak private fields of structs (with feature `adt_const_params`).
// Consider: `<Self as Trait<{ Struct { private: () } }>>::CONSTANT`.
// Paths without arguments are definitely harmless though.
hir::ExprKind::Path(hir::QPath::Resolved(_, hir::Path { segments, .. })) => {
if segments.iter().all(|segment| segment.args.is_none()) { Simple } else { Complex }
}
// FIXME: Claiming that those kinds of QPaths are simple is probably not true if the Ty
// contains const arguments. Is there a *concise* way to check for this?
hir::ExprKind::Path(hir::QPath::TypeRelative(..)) => Simple,
// FIXME: Can they contain const arguments and thus leak private struct fields?
hir::ExprKind::Path(hir::QPath::LangItem(..)) => Simple,
_ => Complex,
}
}
let classification = classify(value);
if classification == Literal
&& !value.span.from_expansion()
&& let Ok(snippet) = tcx.sess.source_map().span_to_snippet(value.span) {
// For literals, we avoid invoking the pretty-printer and use the source snippet instead to
// preserve certain stylistic choices the user likely made for the sake legibility like
//
// * hexadecimal notation
// * underscores
// * character escapes
//
// FIXME: This passes through `-/*spacer*/0` verbatim.
snippet
} else if classification == Simple {
// Otherwise we prefer pretty-printing to get rid of extraneous whitespace, comments and
// other formatting artifacts.
id_to_string(&hir, body.hir_id)
} else if tcx.def_kind(hir.body_owner_def_id(body).to_def_id()) == DefKind::AnonConst {
// FIXME: Omit the curly braces if the enclosing expression is an array literal
// with a repeated element (an `ExprKind::Repeat`) as in such case it
// would not actually need any disambiguation.
"{ _ }".to_owned()
} else {
"_".to_owned()
}
}

View File

@ -42,7 +42,7 @@ pub use decoder::provide_extern;
use decoder::DecodeContext;
pub(crate) use decoder::{CrateMetadata, CrateNumMap, MetadataBlob};
use encoder::EncodeContext;
pub use encoder::{encode_metadata, EncodedMetadata};
pub use encoder::{encode_metadata, rendered_const, EncodedMetadata};
use rustc_span::hygiene::SyntaxContextData;
mod decoder;

View File

@ -22,6 +22,7 @@ use rustc_hir::lang_items::LangItem;
use rustc_hir::{BodyId, Mutability};
use rustc_hir_analysis::check::intrinsic::intrinsic_operation_unsafety;
use rustc_index::IndexVec;
use rustc_metadata::rendered_const;
use rustc_middle::ty::fast_reject::SimplifiedType;
use rustc_middle::ty::{self, TyCtxt, Visibility};
use rustc_resolve::rustdoc::{add_doc_fragment, attrs_to_doc_fragments, inner_docs, DocFragment};
@ -35,7 +36,7 @@ use rustc_target::spec::abi::Abi;
use crate::clean::cfg::Cfg;
use crate::clean::external_path;
use crate::clean::inline::{self, print_inlined_const};
use crate::clean::utils::{is_literal_expr, print_const_expr, print_evaluated_const};
use crate::clean::utils::{is_literal_expr, print_evaluated_const};
use crate::core::DocContext;
use crate::formats::cache::Cache;
use crate::formats::item_type::ItemType;
@ -2086,7 +2087,7 @@ impl Discriminant {
/// Will be `None` in the case of cross-crate reexports, and may be
/// simplified
pub(crate) fn expr(&self, tcx: TyCtxt<'_>) -> Option<String> {
self.expr.map(|body| print_const_expr(tcx, body))
self.expr.map(|body| rendered_const(tcx, body))
}
/// Will always be a machine readable number, without underscores or suffixes.
pub(crate) fn value(&self, tcx: TyCtxt<'_>) -> String {
@ -2326,7 +2327,7 @@ impl ConstantKind {
ConstantKind::TyConst { ref expr } => expr.to_string(),
ConstantKind::Extern { def_id } => print_inlined_const(tcx, def_id),
ConstantKind::Local { body, .. } | ConstantKind::Anonymous { body } => {
print_const_expr(tcx, body)
rendered_const(tcx, body)
}
}
}

View File

@ -14,6 +14,7 @@ use rustc_ast::tokenstream::TokenTree;
use rustc_hir as hir;
use rustc_hir::def::{DefKind, Res};
use rustc_hir::def_id::{DefId, LocalDefId, LOCAL_CRATE};
use rustc_metadata::rendered_const;
use rustc_middle::mir;
use rustc_middle::mir::interpret::ConstValue;
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, TyCtxt};
@ -255,7 +256,7 @@ pub(crate) fn print_const(cx: &DocContext<'_>, n: ty::Const<'_>) -> String {
match n.kind() {
ty::ConstKind::Unevaluated(ty::UnevaluatedConst { def, args: _ }) => {
let s = if let Some(def) = def.as_local() {
print_const_expr(cx.tcx, cx.tcx.hir().body_owned_by(def))
rendered_const(cx.tcx, cx.tcx.hir().body_owned_by(def))
} else {
inline::print_inlined_const(cx.tcx, def)
};
@ -367,100 +368,6 @@ pub(crate) fn is_literal_expr(tcx: TyCtxt<'_>, hir_id: hir::HirId) -> bool {
false
}
/// Build a textual representation of an unevaluated constant expression.
///
/// If the const expression is too complex, an underscore `_` is returned.
/// For const arguments, it's `{ _ }` to be precise.
/// This means that the output is not necessarily valid Rust code.
///
/// Currently, only
///
/// * literals (optionally with a leading `-`)
/// * unit `()`
/// * blocks (`{ … }`) around simple expressions and
/// * paths without arguments
///
/// are considered simple enough. Simple blocks are included since they are
/// necessary to disambiguate unit from the unit type.
/// This list might get extended in the future.
///
/// Without this censoring, in a lot of cases the output would get too large
/// and verbose. Consider `match` expressions, blocks and deeply nested ADTs.
/// Further, private and `doc(hidden)` fields of structs would get leaked
/// since HIR datatypes like the `body` parameter do not contain enough
/// semantic information for this function to be able to hide them
/// at least not without significant performance overhead.
///
/// Whenever possible, prefer to evaluate the constant first and try to
/// use a different method for pretty-printing. Ideally this function
/// should only ever be used as a fallback.
pub(crate) fn print_const_expr(tcx: TyCtxt<'_>, body: hir::BodyId) -> String {
let hir = tcx.hir();
let value = &hir.body(body).value;
#[derive(PartialEq, Eq)]
enum Classification {
Literal,
Simple,
Complex,
}
use Classification::*;
fn classify(expr: &hir::Expr<'_>) -> Classification {
match &expr.kind {
hir::ExprKind::Unary(hir::UnOp::Neg, expr) => {
if matches!(expr.kind, hir::ExprKind::Lit(_)) { Literal } else { Complex }
}
hir::ExprKind::Lit(_) => Literal,
hir::ExprKind::Tup([]) => Simple,
hir::ExprKind::Block(hir::Block { stmts: [], expr: Some(expr), .. }, _) => {
if classify(expr) == Complex { Complex } else { Simple }
}
// Paths with a self-type or arguments are too “complex” following our measure since
// they may leak private fields of structs (with feature `adt_const_params`).
// Consider: `<Self as Trait<{ Struct { private: () } }>>::CONSTANT`.
// Paths without arguments are definitely harmless though.
hir::ExprKind::Path(hir::QPath::Resolved(_, hir::Path { segments, .. })) => {
if segments.iter().all(|segment| segment.args.is_none()) { Simple } else { Complex }
}
// FIXME: Claiming that those kinds of QPaths are simple is probably not true if the Ty
// contains const arguments. Is there a *concise* way to check for this?
hir::ExprKind::Path(hir::QPath::TypeRelative(..)) => Simple,
// FIXME: Can they contain const arguments and thus leak private struct fields?
hir::ExprKind::Path(hir::QPath::LangItem(..)) => Simple,
_ => Complex,
}
}
let classification = classify(value);
if classification == Literal
&& !value.span.from_expansion()
&& let Ok(snippet) = tcx.sess.source_map().span_to_snippet(value.span) {
// For literals, we avoid invoking the pretty-printer and use the source snippet instead to
// preserve certain stylistic choices the user likely made for the sake legibility like
//
// * hexadecimal notation
// * underscores
// * character escapes
//
// FIXME: This passes through `-/*spacer*/0` verbatim.
snippet
} else if classification == Simple {
// Otherwise we prefer pretty-printing to get rid of extraneous whitespace, comments and
// other formatting artifacts.
rustc_hir_pretty::id_to_string(&hir, body.hir_id)
} else if tcx.def_kind(hir.body_owner_def_id(body).to_def_id()) == DefKind::AnonConst {
// FIXME: Omit the curly braces if the enclosing expression is an array literal
// with a repeated element (an `ExprKind::Repeat`) as in such case it
// would not actually need any disambiguation.
"{ _ }".to_owned()
} else {
"_".to_owned()
}
}
/// Given a type Path, resolve it to a Type using the TyCtxt
pub(crate) fn resolve_type(cx: &mut DocContext<'_>, path: Path) -> Type {
debug!("resolve_type({path:?})");

View File

@ -8,6 +8,7 @@ use std::fmt;
use rustc_ast::ast;
use rustc_hir::{def::CtorKind, def::DefKind, def_id::DefId};
use rustc_metadata::rendered_const;
use rustc_middle::ty::{self, TyCtxt};
use rustc_span::symbol::sym;
use rustc_span::{Pos, Symbol};
@ -15,7 +16,6 @@ use rustc_target::spec::abi::Abi as RustcAbi;
use rustdoc_json_types::*;
use crate::clean::utils::print_const_expr;
use crate::clean::{self, ItemId};
use crate::formats::item_type::ItemType;
use crate::json::JsonRenderer;
@ -805,7 +805,7 @@ impl FromWithTcx<clean::Static> for Static {
Static {
type_: stat.type_.into_tcx(tcx),
mutable: stat.mutability == ast::Mutability::Mut,
expr: stat.expr.map(|e| print_const_expr(tcx, e)).unwrap_or_default(),
expr: stat.expr.map(|e| rendered_const(tcx, e)).unwrap_or_default(),
}
}
}

View File

@ -47,7 +47,7 @@ pub struct MyTypeWithStr(&'static str);
// @!hasraw show_const_contents/constant.MY_TYPE_WITH_STR.html '; //'
pub const MY_TYPE_WITH_STR: MyTypeWithStr = MyTypeWithStr("show this");
// @hasraw show_const_contents/constant.PI.html '= 3.14159265358979323846264338327950288f32;'
// @hasraw show_const_contents/constant.PI.html '= 3.14159265358979323846264338327950288_f32;'
// @hasraw show_const_contents/constant.PI.html '; // 3.14159274f32'
pub use std::f32::consts::PI;