Tweak thread names for CGU processing.

For non-incremental builds on Unix, currently all the thread names look like `opt regex.f10ba03eb5ec7975-cgu.0`. But they are truncated by `pthread_setname` to `opt regex.f10ba`, hiding the numeric suffix that distinguishes them. This is really annoying when using a profiler like Samply. This commit changes these thread names to a form like `opt cgu.0`, which is much better.
2023-06-22 08:58:48 +10:00 · 2023-06-22 08:58:48 +10:00 · 666b1b68a7
parent 487bdeb519
commit 666b1b68a7
2 changed files with 45 additions and 21 deletions
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@ -698,28 +698,49 @@ impl<B: WriteBackendMethods> WorkItem<B> {

    /// Generate a short description of this work item suitable for use as a thread name.
    fn short_description(&self) -> String {
-        // `pthread_setname()` on *nix is limited to 15 characters and longer names are ignored.
-        // Use very short descriptions in this case to maximize the space available for the module name.
-        // Windows does not have that limitation so use slightly more descriptive names there.
+        // `pthread_setname()` on *nix ignores anything beyond the first 15
+        // bytes. Use short descriptions to maximize the space available for
+        // the module name.
+        #[cfg(not(windows))]
+        fn desc(short: &str, _long: &str, name: &str) -> String {
+            // The short label is three bytes, and is followed by a space. That
+            // leaves 11 bytes for the CGU name. How we obtain those 11 bytes
+            // depends on the the CGU name form.
+            //
+            // - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part
+            //   before the `-cgu.0` is the same for every CGU, so use the
+            //   `cgu.0` part. The number suffix will be different for each
+            //   CGU.
+            //
+            // - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole
+            //   name because each CGU will have a unique ASCII hash, and the
+            //   first 11 bytes will be enough to identify it.
+            //
+            // - Incremental (with `-Zhuman-readable-cgu-names`), e.g.
+            //   `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole
+            //   name. The first 11 bytes won't be enough to uniquely identify
+            //   it, but no obvious substring will, and this is a rarely used
+            //   option so it doesn't matter much.
+            //
+            assert_eq!(short.len(), 3);
+            let name = if let Some(index) = name.find("-cgu.") {
+                &name[index + 1..] // +1 skips the leading '-'.
+            } else {
+                name
+            };
+            format!("{short} {name}")
+        }
+
+        // Windows has no thread name length limit, so use more descriptive names.
+        #[cfg(windows)]
+        fn desc(_short: &str, long: &str, name: &str) -> String {
+            format!("{long} {name}")
+        }
+
        match self {
-            WorkItem::Optimize(m) => {
-                #[cfg(windows)]
-                return format!("optimize module {}", m.name);
-                #[cfg(not(windows))]
-                return format!("opt {}", m.name);
-            }
-            WorkItem::CopyPostLtoArtifacts(m) => {
-                #[cfg(windows)]
-                return format!("copy LTO artifacts for {}", m.name);
-                #[cfg(not(windows))]
-                return format!("copy {}", m.name);
-            }
-            WorkItem::LTO(m) => {
-                #[cfg(windows)]
-                return format!("LTO module {}", m.name());
-                #[cfg(not(windows))]
-                return format!("LTO {}", m.name());
-            }
+            WorkItem::Optimize(m) => desc("opt", "optimize module {}", &m.name),
+            WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for {}", &m.name),
+            WorkItem::LTO(m) => desc("lto", "LTO module {}", m.name()),
        }
    }
 }
--- a/compiler/rustc_monomorphize/src/partitioning.rs
+++ b/compiler/rustc_monomorphize/src/partitioning.rs
@ -431,6 +431,9 @@ fn merge_codegen_units<'tcx>(
        codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));
        let num_digits = codegen_units.len().ilog10() as usize + 1;
        for (index, cgu) in codegen_units.iter_mut().enumerate() {
+            // Note: `WorkItem::short_description` depends on this name ending
+            // with `-cgu.` followed by a numeric suffix. Please keep it in
+            // sync with this code.
            let suffix = format!("{index:0num_digits$}");
            let numbered_codegen_unit_name =
                cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(suffix));