Auto merge of #113542 - saethlin:adaptive-tables, r=b-naber

Adapt table sizes to the contents

This is an implementation of https://github.com/rust-lang/compiler-team/issues/666

The objective of this PR is to permit the rmeta format to accommodate larger crates that need offsets larger than a `u32` can store without compromising performance for crates that do not need such range. The second commit is a number of tiny optimization opportunities I noticed while looking at perf recordings of the first commit.

The rmeta tables need to have fixed-size elements to permit lazy random access. But the size only needs to be fixed _per table_, not per element type. This PR adds another `usize` to the table header which indicates the table element size. As each element of a table is set, we keep track of the widest encoded table value, then don't bother encoding all the unused trailing bytes on each value. When decoding table elements, we copy them to a full-width array if they are not already full-width.

`LazyArray` needs some special treatment. Most other values that are encoded in tables are indexes or offsets, and those tend to be small so we get to drop a lot of zero bytes off the end. But `LazyArray` encodes _two_ small values in a fixed-width table element: A position of the table and the length of the table. The treatment described above could trim zero bytes off the table length, but any nonzero length shields the position bytes from the optimization. To improve this, we interleave the bytes of position and length. This change is responsible for about half of the crate metadata win on many crates.

Fixes https://github.com/rust-lang/rust/issues/112934 (probably)
Fixes https://github.com/rust-lang/rust/issues/103607
This commit is contained in:
bors 2023-08-30 02:40:37 +00:00
commit d64c84562f
4 changed files with 124 additions and 46 deletions

View File

@ -250,6 +250,7 @@ impl<'a, 'tcx> Metadata<'a, 'tcx> for (CrateMetadataRef<'a>, TyCtxt<'tcx>) {
}
impl<T: ParameterizedOverTcx> LazyValue<T> {
#[inline]
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(self, metadata: M) -> T::Value<'tcx>
where
T::Value<'tcx>: Decodable<DecodeContext<'a, 'tcx>>,
@ -294,6 +295,7 @@ unsafe impl<'a, 'tcx, T: Decodable<DecodeContext<'a, 'tcx>>> TrustedLen
}
impl<T: ParameterizedOverTcx> LazyArray<T> {
#[inline]
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(
self,
metadata: M,
@ -360,8 +362,8 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> {
self.read_lazy_offset_then(|pos| LazyArray::from_position_and_num_elems(pos, len))
}
fn read_lazy_table<I, T>(&mut self, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, len))
fn read_lazy_table<I, T>(&mut self, width: usize, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, width, len))
}
#[inline]
@ -420,6 +422,7 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> {
}
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for CrateNum {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> CrateNum {
let cnum = CrateNum::from_u32(d.read_u32());
d.map_encoded_cnum_to_current(cnum)
@ -427,18 +430,21 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for CrateNum {
}
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefIndex {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefIndex {
DefIndex::from_u32(d.read_u32())
}
}
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnIndex {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ExpnIndex {
ExpnIndex::from_u32(d.read_u32())
}
}
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ast::AttrId {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ast::AttrId {
let sess = d.sess.expect("can't decode AttrId without Session");
sess.parse_sess.attr_id_generator.mk_attr_id()
@ -672,6 +678,7 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyValue<T> {
}
impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {
#[inline]
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
let len = decoder.read_usize();
if len == 0 { LazyArray::default() } else { decoder.read_lazy_array(len) }
@ -680,8 +687,9 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {
impl<'a, 'tcx, I: Idx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
let width = decoder.read_usize();
let len = decoder.read_usize();
decoder.read_lazy_table(len)
decoder.read_lazy_table(width, len)
}
}

View File

@ -131,7 +131,8 @@ impl<'a, 'tcx, T> Encodable<EncodeContext<'a, 'tcx>> for LazyArray<T> {
impl<'a, 'tcx, I, T> Encodable<EncodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) {
e.emit_usize(self.encoded_size);
e.emit_usize(self.width);
e.emit_usize(self.len);
e.emit_lazy_distance(self.position);
}
}

View File

@ -142,7 +142,11 @@ impl<T> LazyArray<T> {
/// eagerly and in-order.
struct LazyTable<I, T> {
position: NonZeroUsize,
encoded_size: usize,
/// The encoded size of the elements of a table is selected at runtime to drop
/// trailing zeroes. This is the number of bytes used for each table element.
width: usize,
/// How many elements are in the table.
len: usize,
_marker: PhantomData<fn(I) -> T>,
}
@ -153,9 +157,10 @@ impl<I: 'static, T: ParameterizedOverTcx> ParameterizedOverTcx for LazyTable<I,
impl<I, T> LazyTable<I, T> {
fn from_position_and_encoded_size(
position: NonZeroUsize,
encoded_size: usize,
width: usize,
len: usize,
) -> LazyTable<I, T> {
LazyTable { position, encoded_size, _marker: PhantomData }
LazyTable { position, width, len, _marker: PhantomData }
}
}

View File

@ -38,6 +38,12 @@ impl IsDefault for u32 {
}
}
impl IsDefault for u64 {
fn is_default(&self) -> bool {
*self == 0
}
}
impl<T> IsDefault for LazyArray<T> {
fn is_default(&self) -> bool {
self.num_elems == 0
@ -89,6 +95,20 @@ impl FixedSizeEncoding for u32 {
}
}
impl FixedSizeEncoding for u64 {
type ByteArray = [u8; 8];
#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
Self::from_le_bytes(*b)
}
#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
*b = self.to_le_bytes();
}
}
macro_rules! fixed_size_enum {
($ty:ty { $(($($pat:tt)*))* }) => {
impl FixedSizeEncoding for Option<$ty> {
@ -300,21 +320,21 @@ impl FixedSizeEncoding for UnusedGenericParams {
// generic `LazyValue<T>` impl, but in the general case we might not need / want
// to fit every `usize` in `u32`.
impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
type ByteArray = [u8; 4];
type ByteArray = [u8; 8];
#[inline]
fn from_bytes(b: &[u8; 4]) -> Self {
let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?;
fn from_bytes(b: &[u8; 8]) -> Self {
let position = NonZeroUsize::new(u64::from_bytes(b) as usize)?;
Some(LazyValue::from_position(position))
}
#[inline]
fn write_to_bytes(self, b: &mut [u8; 4]) {
fn write_to_bytes(self, b: &mut [u8; 8]) {
match self {
None => unreachable!(),
Some(lazy) => {
let position = lazy.position.get();
let position: u32 = position.try_into().unwrap();
let position: u64 = position.try_into().unwrap();
position.write_to_bytes(b)
}
}
@ -323,55 +343,75 @@ impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
impl<T> LazyArray<T> {
#[inline]
fn write_to_bytes_impl(self, b: &mut [u8; 8]) {
let ([position_bytes, meta_bytes], []) = b.as_chunks_mut::<4>() else { panic!() };
fn write_to_bytes_impl(self, b: &mut [u8; 16]) {
let position = (self.position.get() as u64).to_le_bytes();
let len = (self.num_elems as u64).to_le_bytes();
let position = self.position.get();
let position: u32 = position.try_into().unwrap();
position.write_to_bytes(position_bytes);
let len = self.num_elems;
let len: u32 = len.try_into().unwrap();
len.write_to_bytes(meta_bytes);
// Element width is selected at runtime on a per-table basis by omitting trailing
// zero bytes in table elements. This works very naturally when table elements are
// simple numbers but `LazyArray` is a pair of integers. If naively encoded, the second
// element would shield the trailing zeroes in the first. Interleaving the bytes
// of the position and length exposes trailing zeroes in both to the optimization.
// We encode length second because we generally expect it to be smaller.
for i in 0..8 {
b[2 * i] = position[i];
b[2 * i + 1] = len[i];
}
}
fn from_bytes_impl(position_bytes: &[u8; 4], meta_bytes: &[u8; 4]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?;
let len = u32::from_bytes(meta_bytes) as usize;
fn from_bytes_impl(position: &[u8; 8], meta: &[u8; 8]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u64::from_bytes(&position) as usize)?;
let len = u64::from_bytes(&meta) as usize;
Some(LazyArray::from_position_and_num_elems(position, len))
}
}
// Decoding helper for the encoding scheme used by `LazyArray`.
// Interleaving the bytes of the two integers exposes trailing bytes in the first integer
// to the varint scheme that we use for tables.
#[inline]
fn decode_interleaved(encoded: &[u8; 16]) -> ([u8; 8], [u8; 8]) {
let mut first = [0u8; 8];
let mut second = [0u8; 8];
for i in 0..8 {
first[i] = encoded[2 * i];
second[i] = encoded[2 * i + 1];
}
(first, second)
}
impl<T> FixedSizeEncoding for LazyArray<T> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];
#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
if *meta_bytes == [0; 4] {
fn from_bytes(b: &[u8; 16]) -> Self {
let (position, meta) = decode_interleaved(b);
if meta == [0; 8] {
return Default::default();
}
LazyArray::from_bytes_impl(position_bytes, meta_bytes).unwrap()
LazyArray::from_bytes_impl(&position, &meta).unwrap()
}
#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
assert!(!self.is_default());
self.write_to_bytes_impl(b)
}
}
impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];
#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
LazyArray::from_bytes_impl(position_bytes, meta_bytes)
fn from_bytes(b: &[u8; 16]) -> Self {
let (position, meta) = decode_interleaved(b);
LazyArray::from_bytes_impl(&position, &meta)
}
#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
match self {
None => unreachable!(),
Some(lazy) => lazy.write_to_bytes_impl(b),
@ -381,13 +421,14 @@ impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
/// Helper for constructing a table's serialization (also see `Table`).
pub(super) struct TableBuilder<I: Idx, T: FixedSizeEncoding> {
width: usize,
blocks: IndexVec<I, T::ByteArray>,
_marker: PhantomData<T>,
}
impl<I: Idx, T: FixedSizeEncoding> Default for TableBuilder<I, T> {
fn default() -> Self {
TableBuilder { blocks: Default::default(), _marker: PhantomData }
TableBuilder { width: 0, blocks: Default::default(), _marker: PhantomData }
}
}
@ -415,40 +456,63 @@ impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]>> TableBui
// > store bit-masks of which item in each bucket is actually serialized).
let block = self.blocks.ensure_contains_elem(i, || [0; N]);
value.write_to_bytes(block);
if self.width != N {
let width = N - trailing_zeros(block);
self.width = self.width.max(width);
}
}
}
pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable<I, T> {
let pos = buf.position();
let width = self.width;
for block in &self.blocks {
buf.emit_raw_bytes(block);
buf.emit_raw_bytes(&block[..width]);
}
let num_bytes = self.blocks.len() * N;
LazyTable::from_position_and_encoded_size(
NonZeroUsize::new(pos as usize).unwrap(),
num_bytes,
width,
self.blocks.len(),
)
}
}
fn trailing_zeros(x: &[u8]) -> usize {
x.iter().rev().take_while(|b| **b == 0).count()
}
impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]> + ParameterizedOverTcx>
LazyTable<I, T>
where
for<'tcx> T::Value<'tcx>: FixedSizeEncoding<ByteArray = [u8; N]>,
{
/// Given the metadata, extract out the value at a particular index (if any).
#[inline(never)]
pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>>(&self, metadata: M, i: I) -> T::Value<'tcx> {
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.encoded_size);
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.len);
let start = self.position.get();
let bytes = &metadata.blob()[start..start + self.encoded_size];
let (bytes, []) = bytes.as_chunks::<N>() else { panic!() };
bytes.get(i.index()).map_or_else(Default::default, FixedSizeEncoding::from_bytes)
// Access past the end of the table returns a Default
if i.index() >= self.len {
return Default::default();
}
let width = self.width;
let start = self.position.get() + (width * i.index());
let end = start + width;
let bytes = &metadata.blob()[start..end];
if let Ok(fixed) = bytes.try_into() {
FixedSizeEncoding::from_bytes(fixed)
} else {
let mut fixed = [0u8; N];
fixed[..width].copy_from_slice(bytes);
FixedSizeEncoding::from_bytes(&fixed)
}
}
/// Size of the table in entries, including possible gaps.
pub(super) fn size(&self) -> usize {
self.encoded_size / N
self.len
}
}