Merge commit '54cbb6e7531f95e086d5c3dd0d5e73bfbe3545ba' into sync_cg_clif-2024-03-08

This commit is contained in:
bjorn3 2024-03-08 20:41:29 +00:00
parent 5ffd498ca1
commit 5ec45d3d7a
19 changed files with 308 additions and 121 deletions

View File

@ -44,9 +44,10 @@ jobs:
env:
TARGET_TRIPLE: x86_64-apple-darwin
# cross-compile from Linux to Windows using mingw
- os: ubuntu-latest
env:
TARGET_TRIPLE: x86_64-pc-windows-gnu
# FIXME The wine version in Ubuntu 22.04 is missing ProcessPrng
#- os: ubuntu-latest
# env:
# TARGET_TRIPLE: x86_64-pc-windows-gnu
- os: ubuntu-latest
env:
TARGET_TRIPLE: aarch64-unknown-linux-gnu
@ -80,11 +81,11 @@ jobs:
if: matrix.os == 'windows-latest' && matrix.env.TARGET_TRIPLE == 'x86_64-pc-windows-gnu'
run: rustup set default-host x86_64-pc-windows-gnu
- name: Install MinGW toolchain and wine
if: matrix.os == 'ubuntu-latest' && matrix.env.TARGET_TRIPLE == 'x86_64-pc-windows-gnu'
run: |
sudo apt-get update
sudo apt-get install -y gcc-mingw-w64-x86-64 wine-stable
#- name: Install MinGW toolchain and wine
# if: matrix.os == 'ubuntu-latest' && matrix.env.TARGET_TRIPLE == 'x86_64-pc-windows-gnu'
# run: |
# sudo apt-get update
# sudo apt-get install -y gcc-mingw-w64-x86-64 wine-stable
- name: Install AArch64 toolchain and qemu
if: matrix.os == 'ubuntu-latest' && matrix.env.TARGET_TRIPLE == 'aarch64-unknown-linux-gnu'

23
.gitignore vendored
View File

@ -1,18 +1,21 @@
/target
/build_system/target
**/*.rs.bk
*.rlib
*.o
perf.data
perf.data.old
*.events
*.string*
# Build artifacts during normal use
/y.bin
/y.bin.dSYM
/y.exe
/y.pdb
/download
/build
/dist
/target
/build_system/target
# Downloaded by certain scripts
/rust
/download
/git-fixed-subtree.sh
# Various things that can be created during development
*.rlib
*.o
perf.data
perf.data.old
*.mm_profdata

56
Cargo.lock generated
View File

@ -46,18 +46,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cranelift-bforest"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d819feeda4c420a18f1e28236ca0ce1177b22bf7c8a44ddee92dfe40de15bcf0"
checksum = "9515fcc42b6cb5137f76b84c1a6f819782d0cf12473d145d3bc5cd67eedc8bc2"
dependencies = [
"cranelift-entity",
]
[[package]]
name = "cranelift-codegen"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9b8d03d5bdbca7e5f72b0e0a0f69933ed1f09e24be6c075aa6fe3f802b0cc0c"
checksum = "1ad827c6071bfe6d22de1bc331296a29f9ddc506ff926d8415b435ec6a6efce0"
dependencies = [
"bumpalo",
"cranelift-bforest",
@ -76,39 +76,39 @@ dependencies = [
[[package]]
name = "cranelift-codegen-meta"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3fd3664e38e51649b17dc30cfdd561273fe2f590dcd013fb75d9eabc6272dfb"
checksum = "10e6b36237a9ca2ce2fb4cc7741d418a080afa1327402138412ef85d5367bef1"
dependencies = [
"cranelift-codegen-shared",
]
[[package]]
name = "cranelift-codegen-shared"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b031ec5e605828975952622b5a77d49126f20ffe88d33719a0af66b23a0fc36"
checksum = "c36bf4bfb86898a94ccfa773a1f86e8a5346b1983ff72059bdd2db4600325251"
[[package]]
name = "cranelift-control"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fada054d017cf2ed8f7ed2336e0517fc1b19e6825be1790de9eb00c94788362b"
checksum = "7cbf36560e7a6bd1409ca91e7b43b2cc7ed8429f343d7605eadf9046e8fac0d0"
dependencies = [
"arbitrary",
]
[[package]]
name = "cranelift-entity"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "177b6f94ae8de6348eb45bf977c79ab9e3c40fc3ac8cb7ed8109560ea39bee7d"
checksum = "a71e11061a75b1184c09bea97c026a88f08b59ade96a7bb1f259d4ea0df2e942"
[[package]]
name = "cranelift-frontend"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebebd23a69a23e3ddea78e98ff3a2de222e88c8e045d81ef4a72f042e0d79dbd"
checksum = "af5d4da63143ee3485c7bcedde0a818727d737d1083484a0ceedb8950c89e495"
dependencies = [
"cranelift-codegen",
"log",
@ -118,15 +118,15 @@ dependencies = [
[[package]]
name = "cranelift-isle"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1571bfc14df8966d12c6121b5325026591a4b4009e22fea0fe3765ab7cd33b96"
checksum = "457a9832b089e26f5eea70dcf49bed8ec6edafed630ce7c83161f24d46ab8085"
[[package]]
name = "cranelift-jit"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f61e236d7622c3c43016e8b0f3ba27136e21ac7de328c7fda902e61db1de851"
checksum = "0af95fe68d5a10919012c8db82b1d59820405b8001c8c6d05f94b08031334fa9"
dependencies = [
"anyhow",
"cranelift-codegen",
@ -144,9 +144,9 @@ dependencies = [
[[package]]
name = "cranelift-module"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f30c6820342015c5009070e3e48d1da7b13521399de904663f1c84f5ee839657"
checksum = "11b0b201fa10a4014062d4c56c307c8d18fdf9a84cb5279efe6080241f42c7a7"
dependencies = [
"anyhow",
"cranelift-codegen",
@ -155,9 +155,9 @@ dependencies = [
[[package]]
name = "cranelift-native"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35a69c37e0c10b46fe5527f2397ac821046efbf5f7ec112c8b84df25712f465b"
checksum = "9b490d579df1ce365e1ea359e24ed86d82289fa785153327c2f6a69a59a731e4"
dependencies = [
"cranelift-codegen",
"libc",
@ -166,9 +166,9 @@ dependencies = [
[[package]]
name = "cranelift-object"
version = "0.104.0"
version = "0.105.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24425a329b4343177d5f1852243841dcec17f929d72c0e7f41262140155e55e7"
checksum = "fb7e821ac6db471bcdbd004e5a4fa0d374f1046bd3a2ce278c332e0b0c01ca63"
dependencies = [
"anyhow",
"cranelift-codegen",
@ -241,9 +241,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.148"
version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "libloading"
@ -410,9 +410,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasmtime-jit-icache-coherence"
version = "17.0.0"
version = "18.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdc26415bb89e9ccd3bdc498fef63aabf665c4c0dd710c107691deb9694955da"
checksum = "33f4121cb29dda08139b2824a734dd095d83ce843f2d613a84eb580b9cfc17ac"
dependencies = [
"cfg-if",
"libc",

View File

@ -8,12 +8,12 @@ crate-type = ["dylib"]
[dependencies]
# These have to be in sync with each other
cranelift-codegen = { version = "0.104", default-features = false, features = ["std", "unwind", "all-arch"] }
cranelift-frontend = { version = "0.104" }
cranelift-module = { version = "0.104" }
cranelift-native = { version = "0.104" }
cranelift-jit = { version = "0.104", optional = true }
cranelift-object = { version = "0.104" }
cranelift-codegen = { version = "0.105.2", default-features = false, features = ["std", "unwind", "all-arch"] }
cranelift-frontend = { version = "0.105.2" }
cranelift-module = { version = "0.105.2" }
cranelift-native = { version = "0.105.2" }
cranelift-jit = { version = "0.105.2", optional = true }
cranelift-object = { version = "0.105.2" }
target-lexicon = "0.12.0"
gimli = { version = "0.28", default-features = false, features = ["write"]}
object = { version = "0.32", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] }

View File

@ -123,11 +123,6 @@ You need to do this steps to successfully compile and use the cranelift backend
You can also set `rust-analyzer.rustc.source` to your rust workspace to get rust-analyzer to understand your changes.
## Configuration
See the documentation on the `BackendConfig` struct in [config.rs](src/config.rs) for all
configuration options.
## Not yet supported
* SIMD ([tracked here](https://github.com/rust-lang/rustc_codegen_cranelift/issues/171), `std::simd` fully works, `std::arch` is partially supported)

View File

@ -1,5 +1,6 @@
use std::ffi::OsStr;
use std::fs;
use std::hash::{Hash, Hasher};
use std::path::{Path, PathBuf};
use std::process::Command;
@ -71,7 +72,11 @@ fn hash_file(file: &std::path::Path) -> u64 {
let contents = std::fs::read(file).unwrap();
#[allow(deprecated)]
let mut hasher = std::hash::SipHasher::new();
std::hash::Hash::hash(&contents, &mut hasher);
// The following is equivalent to
// std::hash::Hash::hash(&contents, &mut hasher);
// but gives the same result independent of host byte order.
hasher.write_usize(contents.len().to_le());
Hash::hash_slice(&contents, &mut hasher);
std::hash::Hasher::finish(&hasher)
}
@ -80,16 +85,26 @@ fn hash_dir(dir: &std::path::Path) -> u64 {
for entry in std::fs::read_dir(dir).unwrap() {
let entry = entry.unwrap();
if entry.file_type().unwrap().is_dir() {
sub_hashes
.insert(entry.file_name().to_str().unwrap().to_owned(), hash_dir(&entry.path()));
sub_hashes.insert(
entry.file_name().to_str().unwrap().to_owned(),
hash_dir(&entry.path()).to_le(),
);
} else {
sub_hashes
.insert(entry.file_name().to_str().unwrap().to_owned(), hash_file(&entry.path()));
sub_hashes.insert(
entry.file_name().to_str().unwrap().to_owned(),
hash_file(&entry.path()).to_le(),
);
}
}
#[allow(deprecated)]
let mut hasher = std::hash::SipHasher::new();
std::hash::Hash::hash(&sub_hashes, &mut hasher);
// The following is equivalent to
// std::hash::Hash::hash(&sub_hashes, &mut hasher);
// but gives the same result independent of host byte order.
hasher.write_usize(sub_hashes.len().to_le());
for elt in sub_hashes {
elt.hash(&mut hasher);
}
std::hash::Hasher::finish(&hasher)
}

View File

@ -133,8 +133,8 @@ pub(crate) static REGEX: CargoProject = CargoProject::new(&REGEX_REPO.source_dir
pub(crate) static PORTABLE_SIMD_REPO: GitRepo = GitRepo::github(
"rust-lang",
"portable-simd",
"97007cc2e70df8c97326ce896a79e2f0ce4dd98b",
"e54a16035cedf205",
"5794c837bc605c4cd9dbb884285976dfdb293cce",
"a64d8fdd0ed0d9c4",
"portable-simd",
);

View File

@ -39,6 +39,6 @@ index 42a26ae..5ac1042 100644
+#![cfg(test)]
#![feature(alloc_layout_extra)]
#![feature(array_chunks)]
#![feature(array_methods)]
#![feature(array_windows)]
--
2.21.0 (Apple Git-122)

View File

@ -1,26 +0,0 @@
From dd82e95c9de212524e14fc60155de1ae40156dfc Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 24 Nov 2019 15:34:06 +0100
Subject: [PATCH] [core] Ignore failing tests
---
library/core/tests/iter.rs | 4 ++++
library/core/tests/num/bignum.rs | 10 ++++++++++
library/core/tests/num/mod.rs | 5 +++--
library/core/tests/time.rs | 1 +
4 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/atomic.rs b/atomic.rs
index 13b12db..96fe4b9 100644
--- a/atomic.rs
+++ b/atomic.rs
@@ -185,6 +185,7 @@ fn ptr_bitops() {
}
#[test]
+#[cfg_attr(target_arch = "s390x", ignore)] // s390x backend doesn't support stack alignment >8 bytes
#[cfg(any(not(target_arch = "arm"), target_os = "linux"))] // Missing intrinsic in compiler-builtins
fn ptr_bitops_tagging() {
#[repr(align(16))]
--
2.21.0 (Apple Git-122)

View File

@ -150,9 +150,9 @@ dependencies = [
[[package]]
name = "hermit-abi"
version = "0.3.3"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
dependencies = [
"compiler_builtins",
"rustc-std-workspace-alloc",
@ -161,9 +161,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.150"
version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
dependencies = [
"rustc-std-workspace-core",
]
@ -398,6 +398,7 @@ version = "0.0.0"
dependencies = [
"core",
"getopts",
"libc",
"panic_abort",
"panic_unwind",
"std",

View File

@ -1,3 +1,3 @@
[toolchain]
channel = "nightly-2024-01-26"
channel = "nightly-2024-03-08"
components = ["rust-src", "rustc-dev", "llvm-tools"]

View File

@ -1,5 +1,4 @@
ignore = [
"y.rs",
"example/gen_block_iterate.rs", # uses edition 2024
]

View File

@ -10,12 +10,26 @@ pushd rust
command -v rg >/dev/null 2>&1 || cargo install ripgrep
# FIXME(rust-lang/rust#122196) fix stage0 rmake.rs run-make tests and remove
# this workaround
for test in $(ls tests/run-make); do
if [[ -e "tests/run-make/$test/rmake.rs" ]]; then
rm -r "tests/run-make/$test"
fi
done
# FIXME remove this workaround once ICE tests no longer emit an outdated nightly message
for test in $(rg -i --files-with-matches "//@(\[.*\])? failure-status: 101" tests/ui); do
echo "rm $test"
rm $test
done
rm -r tests/ui/{unsized-locals/,lto/,linkage*} || true
for test in $(rg --files-with-matches "lto" tests/{codegen-units,ui,incremental}); do
rm $test
done
for test in $(rg -i --files-with-matches "//(\[\w+\])?~[^\|]*\s*ERR|// error-pattern:|// build-fail|// run-fail|-Cllvm-args" tests/ui); do
for test in $(rg -i --files-with-matches "//(\[\w+\])?~[^\|]*\s*ERR|//@ error-pattern:|//@(\[.*\])? build-fail|//@(\[.*\])? run-fail|-Cllvm-args" tests/ui); do
rm $test
done
@ -43,8 +57,8 @@ rm tests/ui/proc-macro/allowed-signatures.rs
rm tests/ui/proc-macro/no-mangle-in-proc-macro-issue-111888.rs
# vendor intrinsics
rm tests/ui/sse2.rs # CodegenBackend::target_features not yet implemented
rm tests/ui/simd/array-type.rs # "Index argument for `simd_insert` is not a constant"
rm tests/ui/asm/x86_64/evex512-implicit-feature.rs # unimplemented AVX512 x86 vendor intrinsic
# exotic linkages
rm tests/ui/issues/issue-33992.rs # unsupported linkages
@ -62,14 +76,12 @@ rm -r tests/run-pass-valgrind/unsized-locals
# misc unimplemented things
rm tests/ui/intrinsics/intrinsic-nearby.rs # unimplemented nearbyintf32 and nearbyintf64 intrinsics
rm tests/ui/target-feature/missing-plusminus.rs # error not implemented
rm tests/ui/fn/dyn-fn-alignment.rs # wants a 256 byte alignment
rm -r tests/run-make/emit-named-files # requires full --emit support
rm -r tests/run-make/repr128-dwarf # debuginfo test
rm -r tests/run-make/split-debuginfo # same
rm -r tests/run-make/symbols-include-type-name # --emit=asm not supported
rm -r tests/run-make/target-specs # i686 not supported by Cranelift
rm -r tests/run-make/mismatching-target-triples # same
rm tests/ui/asm/x86_64/issue-82869.rs # vector regs in inline asm not yet supported
rm tests/ui/asm/x86_64/issue-96797.rs # const and sym inline asm operands don't work entirely correctly
# requires LTO
@ -109,8 +121,6 @@ rm -r tests/run-make/optimization-remarks-dir # remarks are LLVM specific
rm tests/ui/mir/mir_misc_casts.rs # depends on deduplication of constants
rm tests/ui/mir/mir_raw_fat_ptr.rs # same
rm tests/ui/consts/issue-33537.rs # same
rm tests/ui/layout/valid_range_oob.rs # different ICE message
rm tests/ui/const-generics/generic_const_exprs/issue-80742.rs # gives error instead of ICE with cg_clif
# rustdoc-clif passes extra args, suppressing the help message when no args are passed
rm -r tests/run-make/issue-88756-default-output
@ -119,15 +129,12 @@ rm -r tests/run-make/issue-88756-default-output
# should work when using ./x.py test the way it is intended
# ============================================================
rm -r tests/run-make/remap-path-prefix-dwarf # requires llvm-dwarfdump
rm -r tests/ui/consts/missing_span_in_backtrace.rs # expects sysroot source to be elsewhere
# genuine bugs
# ============
rm tests/incremental/spike-neg1.rs # errors out for some reason
rm tests/incremental/spike-neg2.rs # same
rm tests/ui/simd/simd-bitmask.rs # simd_bitmask doesn't implement [u*; N] return type
rm -r tests/run-make/issue-51671 # wrong filename given in case of --emit=obj
rm -r tests/run-make/issue-30063 # same
rm -r tests/run-make/multiple-emits # same
@ -145,6 +152,7 @@ rm tests/ui/codegen/subtyping-enforces-type-equality.rs # assert_assignable bug
# ======================
rm tests/ui/backtrace.rs # TODO warning
rm tests/ui/process/nofile-limit.rs # TODO some AArch64 linking issue
rm tests/ui/async-await/async-closures/once.rs # FIXME bug in the rustc FnAbi calculation code
rm tests/ui/stdio-is-blocking.rs # really slow with unoptimized libstd

View File

@ -392,18 +392,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
}
pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer {
if align <= 16 {
let abi_align = if self.tcx.sess.target.arch == "s390x" { 8 } else { 16 };
if align <= abi_align {
let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
kind: StackSlotKind::ExplicitSlot,
// FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
// specify stack slot alignment.
size: (size + 15) / 16 * 16,
// FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets
// a way to specify stack slot alignment.
size: (size + abi_align - 1) / abi_align * abi_align,
});
Pointer::stack_slot(stack_slot)
} else {
// Alignment is too big to handle using the above hack. Dynamically realign a stack slot
// instead. This wastes some space for the realignment.
let base_ptr = self.create_stack_slot(size + align, 16).get_addr(self);
let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
kind: StackSlotKind::ExplicitSlot,
// FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets
// a way to specify stack slot alignment.
size: (size + align) / abi_align * abi_align,
});
let base_ptr = self.bcx.ins().stack_addr(self.pointer_type, stack_slot, 0);
let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align));
let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align));
Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset))

View File

@ -372,7 +372,13 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
}
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
data.define(bytes.into_boxed_slice());
if bytes.is_empty() {
// FIXME(bytecodealliance/wasmtime#7918) cranelift-jit has a bug where it causes UB on
// empty data objects
data.define(Box::new([0]));
} else {
data.define(bytes.into_boxed_slice());
}
for &(offset, prov) in alloc.provenance().ptrs().iter() {
let alloc_id = prov.alloc_id();

View File

@ -170,6 +170,65 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
}
}
"llvm.x86.sse.add.ss" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss&ig_expand=171
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
assert_eq!(a.layout(), ret.layout());
let layout = a.layout();
let (_, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
assert!(lane_ty.is_floating_point());
let ret_lane_layout = fx.layout_of(lane_ty);
ret.write_cvalue(fx, a);
let a_lane = a.value_lane(fx, 0).load_scalar(fx);
let b_lane = b.value_lane(fx, 0).load_scalar(fx);
let res = fx.bcx.ins().fadd(a_lane, b_lane);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, 0).write_cvalue(fx, res_lane);
}
"llvm.x86.sse.sqrt.ps" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps&ig_expand=6245
intrinsic_args!(fx, args => (a); intrinsic);
// FIXME use vector instructions when possible
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
fx.bcx.ins().sqrt(lane)
});
}
"llvm.x86.sse.max.ps" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps&ig_expand=4357
intrinsic_args!(fx, args => (a, b); intrinsic);
simd_pair_for_each_lane(
fx,
a,
b,
ret,
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmax(a_lane, b_lane),
);
}
"llvm.x86.sse.min.ps" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps&ig_expand=4489
intrinsic_args!(fx, args => (a, b); intrinsic);
simd_pair_for_each_lane(
fx,
a,
b,
ret,
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmin(a_lane, b_lane),
);
}
"llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
let (x, y, kind) = match args {
[x, y, kind] => (x, y, kind),
@ -1067,6 +1126,122 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
);
}
"llvm.x86.sha1rnds4" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32&ig_expand=5877
intrinsic_args!(fx, args => (a, b, _func); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
let func = if let Some(func) =
crate::constant::mir_operand_get_const_val(fx, &args[2].node)
{
func
} else {
fx.tcx
.dcx()
.span_fatal(span, "Func argument for `_mm_sha1rnds4_epu32` is not a constant");
};
let func = func.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", func));
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String(format!("sha1rnds4 xmm1, xmm2, {func}"))],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha1msg1" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32&ig_expand=5874
intrinsic_args!(fx, args => (a, b); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("sha1msg1 xmm1, xmm2".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha1msg2" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32&ig_expand=5875
intrinsic_args!(fx, args => (a, b); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("sha1msg2 xmm1, xmm2".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha1nexte" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32&ig_expand=5876
intrinsic_args!(fx, args => (a, b); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("sha1nexte xmm1, xmm2".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha256rnds2" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977
intrinsic_args!(fx, args => (a, b, k); intrinsic);

View File

@ -391,12 +391,15 @@ fn codegen_float_intrinsic_call<'tcx>(
| sym::ceilf32
| sym::ceilf64
| sym::truncf32
| sym::truncf64 => {
| sym::truncf64
| sym::sqrtf32
| sym::sqrtf64 => {
let val = match intrinsic {
sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(args[0]),
sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(args[0]),
sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(args[0]),
sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(args[0]),
sym::sqrtf32 | sym::sqrtf64 => fx.bcx.ins().sqrt(args[0]),
_ => unreachable!(),
};

View File

@ -853,7 +853,13 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
};
for lane in 0..lane_count {
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64);
// The bit order of the mask depends on the byte endianness, LSB-first for
// little endian and MSB-first for big endian.
let mask_lane = match fx.tcx.sess.target.endian {
Endian::Big => lane_count - 1 - lane,
Endian::Little => lane,
};
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(mask_lane) as i64);
let m_lane = fx.bcx.ins().band_imm(m_lane, 1);
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
let b_lane = b.value_lane(fx, lane).load_scalar(fx);

6
y.rs
View File

@ -1,6 +0,0 @@
#!/usr/bin/env bash
#![deny(unsafe_code)] /*This line is ignored by bash
# This block is ignored by rustc
echo "Warning: y.rs is a deprecated alias for y.sh" 1>&2
exec ./y.sh "$@"
*/