Skip to content

Commit 3f1e30a

Browse files
committed
Auto merge of #118077 - calebzulawski:sync-portable-simd-2023-11-19, r=workingjubilee
Portable SIMD subtree update Syncs nightly to the latest changes from rust-lang/portable-simd r? `@rust-lang/libs`
2 parents 0908f17 + bcb1c41 commit 3f1e30a

File tree

112 files changed

+2289
-1190
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+2289
-1190
lines changed

library/core/src/lib.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -446,9 +446,10 @@ pub mod arch;
446446
#[unstable(feature = "portable_simd", issue = "86656")]
447447
mod core_simd;
448448

449-
#[doc = include_str!("../../portable-simd/crates/core_simd/src/core_simd_docs.md")]
450449
#[unstable(feature = "portable_simd", issue = "86656")]
451450
pub mod simd {
451+
#![doc = include_str!("../../portable-simd/crates/core_simd/src/core_simd_docs.md")]
452+
452453
#[unstable(feature = "portable_simd", issue = "86656")]
453454
pub use crate::core_simd::simd::*;
454455
}

library/core/src/slice/mod.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -3979,7 +3979,7 @@ impl<T> [T] {
39793979
///
39803980
/// ```
39813981
/// #![feature(portable_simd)]
3982-
/// use core::simd::SimdFloat;
3982+
/// use core::simd::prelude::*;
39833983
///
39843984
/// let short = &[1, 2, 3];
39853985
/// let (prefix, middle, suffix) = short.as_simd::<4>();
@@ -3991,7 +3991,6 @@ impl<T> [T] {
39913991
///
39923992
/// fn basic_simd_sum(x: &[f32]) -> f32 {
39933993
/// use std::ops::Add;
3994-
/// use std::simd::f32x4;
39953994
/// let (prefix, middle, suffix) = x.as_simd();
39963995
/// let sums = f32x4::from_array([
39973996
/// prefix.iter().copied().sum(),

library/core/src/str/pattern.rs

+10-10
Original file line numberDiff line numberDiff line change
@@ -1740,9 +1740,9 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
17401740
debug_assert!(needle.len() > 1);
17411741

17421742
use crate::ops::BitAnd;
1743+
use crate::simd::cmp::SimdPartialEq;
17431744
use crate::simd::mask8x16 as Mask;
17441745
use crate::simd::u8x16 as Block;
1745-
use crate::simd::{SimdPartialEq, ToBitMask};
17461746

17471747
let first_probe = needle[0];
17481748
let last_byte_offset = needle.len() - 1;
@@ -1765,7 +1765,7 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
17651765
};
17661766

17671767
// do a naive search if the haystack is too small to fit
1768-
if haystack.len() < Block::LANES + last_byte_offset {
1768+
if haystack.len() < Block::LEN + last_byte_offset {
17691769
return Some(haystack.windows(needle.len()).any(|c| c == needle));
17701770
}
17711771

@@ -1812,7 +1812,7 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
18121812
let eq_first: Mask = a.simd_eq(first_probe);
18131813
let eq_last: Mask = b.simd_eq(second_probe);
18141814
let both = eq_first.bitand(eq_last);
1815-
let mask = both.to_bitmask();
1815+
let mask = both.to_bitmask() as u16;
18161816

18171817
return mask;
18181818
};
@@ -1822,32 +1822,32 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
18221822
// The loop condition must ensure that there's enough headroom to read LANE bytes,
18231823
// and not only at the current index but also at the index shifted by block_offset
18241824
const UNROLL: usize = 4;
1825-
while i + last_byte_offset + UNROLL * Block::LANES < haystack.len() && !result {
1825+
while i + last_byte_offset + UNROLL * Block::LEN < haystack.len() && !result {
18261826
let mut masks = [0u16; UNROLL];
18271827
for j in 0..UNROLL {
1828-
masks[j] = test_chunk(i + j * Block::LANES);
1828+
masks[j] = test_chunk(i + j * Block::LEN);
18291829
}
18301830
for j in 0..UNROLL {
18311831
let mask = masks[j];
18321832
if mask != 0 {
1833-
result |= check_mask(i + j * Block::LANES, mask, result);
1833+
result |= check_mask(i + j * Block::LEN, mask, result);
18341834
}
18351835
}
1836-
i += UNROLL * Block::LANES;
1836+
i += UNROLL * Block::LEN;
18371837
}
1838-
while i + last_byte_offset + Block::LANES < haystack.len() && !result {
1838+
while i + last_byte_offset + Block::LEN < haystack.len() && !result {
18391839
let mask = test_chunk(i);
18401840
if mask != 0 {
18411841
result |= check_mask(i, mask, result);
18421842
}
1843-
i += Block::LANES;
1843+
i += Block::LEN;
18441844
}
18451845

18461846
// Process the tail that didn't fit into LANES-sized steps.
18471847
// This simply repeats the same procedure but as right-aligned chunk instead
18481848
// of a left-aligned one. The last byte must be exactly flush with the string end so
18491849
// we don't miss a single byte or read out of bounds.
1850-
let i = haystack.len() - last_byte_offset - Block::LANES;
1850+
let i = haystack.len() - last_byte_offset - Block::LEN;
18511851
let mask = test_chunk(i);
18521852
if mask != 0 {
18531853
result |= check_mask(i, mask, result);

library/core/tests/simd.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use core::simd::f32x4;
2-
use core::simd::SimdFloat;
1+
use core::simd::prelude::*;
32

43
#[test]
54
fn testing() {

library/portable-simd/.github/workflows/ci.yml

+38-33
Original file line numberDiff line numberDiff line change
@@ -167,40 +167,33 @@ jobs:
167167
RUSTFLAGS: ${{ matrix.rustflags }}
168168

169169
cross-tests:
170-
name: "${{ matrix.target }} (via cross)"
170+
name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
171171
runs-on: ubuntu-latest
172172
strategy:
173173
fail-fast: false
174-
# TODO: Sadly, we cant configure target-feature in a meaningful way
175-
# because `cross` doesn't tell qemu to enable any non-default cpu
176-
# features, nor does it give us a way to do so.
177-
#
178-
# Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch].
179-
# This is a lot more complex... but in practice it's likely that we can just
180-
# snarf the docker config from around [here][1000-dockerfiles].
181-
#
182-
# [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67
183-
# [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker
184174

185175
matrix:
186176
target:
187-
- i586-unknown-linux-gnu
188-
# 32-bit arm has a few idiosyncracies like having subnormal flushing
189-
# to zero on by default. Ideally we'd set
190177
- armv7-unknown-linux-gnueabihf
191-
- aarch64-unknown-linux-gnu
192-
# Note: The issue above means neither of these mips targets will use
193-
# MSA (mips simd) but MIPS uses a nonstandard binary representation
194-
# for NaNs which makes it worth testing on despite that.
178+
- thumbv7neon-unknown-linux-gnueabihf # includes neon by default
179+
- aarch64-unknown-linux-gnu # includes neon by default
180+
- powerpc-unknown-linux-gnu
181+
- powerpc64le-unknown-linux-gnu # includes altivec by default
182+
- riscv64gc-unknown-linux-gnu
183+
# MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
184+
# non-nightly since https://github.com/rust-lang/rust/pull/113274
195185
# - mips-unknown-linux-gnu
196186
# - mips64-unknown-linux-gnuabi64
197-
- riscv64gc-unknown-linux-gnu
198-
# TODO this test works, but it appears to time out
199-
# - powerpc-unknown-linux-gnu
200-
# TODO this test is broken, but it appears to be a problem with QEMU, not us.
201-
# - powerpc64le-unknown-linux-gnu
202-
# TODO enable this once a new version of cross is released
187+
# Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen.
203188
# - powerpc64-unknown-linux-gnu
189+
target_feature: [default]
190+
include:
191+
- { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
192+
# Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
193+
# This target is somewhat redundant, since ppc64le has altivec as well.
194+
# - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
195+
# We should test this, but cross currently can't run it
196+
# - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
204197

205198
steps:
206199
- uses: actions/checkout@v2
@@ -217,19 +210,35 @@ jobs:
217210
# being part of the tarball means we can't just use the download/latest
218211
# URL :(
219212
run: |
220-
CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz
213+
CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
221214
mkdir -p "$HOME/.bin"
222215
curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
223216
echo "$HOME/.bin" >> $GITHUB_PATH
224217
218+
- name: Configure Emulated CPUs
219+
run: |
220+
echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
221+
# echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
222+
223+
- name: Configure RUSTFLAGS
224+
shell: bash
225+
run: |
226+
case "${{ matrix.target_feature }}" in
227+
default)
228+
echo "RUSTFLAGS=" >> $GITHUB_ENV;;
229+
*)
230+
echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
231+
;;
232+
esac
233+
225234
- name: Test (debug)
226235
run: cross test --verbose --target=${{ matrix.target }}
227236

228237
- name: Test (release)
229238
run: cross test --verbose --target=${{ matrix.target }} --release
230239

231240
features:
232-
name: "Check cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
241+
name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
233242
runs-on: ubuntu-latest
234243
strategy:
235244
fail-fast: false
@@ -240,12 +249,8 @@ jobs:
240249
features:
241250
- ""
242251
- "--features std"
243-
- "--features generic_const_exprs"
244-
- "--features std --features generic_const_exprs"
245252
- "--features all_lane_counts"
246-
- "--features all_lane_counts --features std"
247-
- "--features all_lane_counts --features generic_const_exprs"
248-
- "--features all_lane_counts --features std --features generic_const_exprs"
253+
- "--all-features"
249254

250255
steps:
251256
- uses: actions/checkout@v2
@@ -257,9 +262,9 @@ jobs:
257262
run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV
258263
- name: Check build
259264
if: ${{ matrix.simd == '' }}
260-
run: RUSTFLAGS="-Dwarnings" cargo check --all-targets --no-default-features ${{ matrix.features }}
265+
run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }}
261266
- name: Check AVX
262267
if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }}
263268
run: |
264269
echo "Found AVX features: $CPU_FEATURE"
265-
RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo check --all-targets --no-default-features ${{ matrix.features }}
270+
RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }}

library/portable-simd/.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
/target
2-
Cargo.lock

0 commit comments

Comments
 (0)