Files
cli/vendor/portable-atomic/src/imp/atomic128/s390x.rs

523 lines
21 KiB
Rust

// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
128-bit atomic implementation on s390x.
This architecture provides the following 128-bit atomic instructions:
- LPQ/STPQ: load/store (arch1 or later, i.e., baseline)
- CDSG: CAS (arch1 or later, i.e., baseline)
See "Atomic operation overview by architecture" in atomic-maybe-uninit for a more comprehensive and
detailed description of the atomic and synchronize instructions in this architecture:
https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#s390x
LLVM's minimal supported architecture level is arch8 (z10):
https://github.com/llvm/llvm-project/blob/llvmorg-22.1.0-rc1/llvm/lib/Target/SystemZ/SystemZProcessors.td#L16-L17
This does not appear to have changed since the current s390x backend was added in LLVM 3.3:
https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db
Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
this module and use intrinsics.rs instead.
Refs:
- z/Architecture Principles of Operation, Fifteenth Edition (SA22-7832-14)
https://www.ibm.com/docs/en/module_1678991624569/pdf/SA22-7832-14.pdf
- atomic-maybe-uninit
https://github.com/taiki-e/atomic-maybe-uninit
See tests/asm-test/asm/portable-atomic for generated assembly.
*/
include!("macros.rs");
use core::{arch::asm, sync::atomic::Ordering};
use crate::utils::{Pair, U128};
// bcr 14,0 requires fast-BCR-serialization facility added in arch9 (z196).
#[cfg(any(
target_feature = "fast-serialization",
portable_atomic_target_feature = "fast-serialization",
))]
macro_rules! serialization {
() => {
"bcr 14, 0"
};
}
#[cfg(not(any(
target_feature = "fast-serialization",
portable_atomic_target_feature = "fast-serialization",
)))]
macro_rules! serialization {
() => {
"bcr 15, 0"
};
}
// Use distinct operands on z196 or later, otherwise split to lgr and $op.
#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))]
macro_rules! distinct_op {
($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!($op, "k ", $a0, ", ", $a1, ", ", $a2)
};
}
#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))]
macro_rules! distinct_op {
($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!("lgr ", $a0, ", ", $a1, "\n", $op, " ", $a0, ", ", $a2)
};
}
// Use selgr$cond on z15 or later, otherwise split to locgr$cond and $op.
#[cfg(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
))]
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
macro_rules! select_op {
($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!("selgr", $cond, " ", $a0, ", ", $a1, ", ", $a2)
};
}
#[cfg(not(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
)))]
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
macro_rules! select_op {
($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!("lgr ", $a0, ", ", $a2, "\n", "locgr", $cond, " ", $a0, ", ", $a1)
};
}
// Extracts and checks condition code.
#[inline]
fn extract_cc(r: i64) -> bool {
r.wrapping_add(-268435456) & (1 << 31) != 0
}
#[inline]
unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
debug_assert!(src as usize % 16 == 0);
let (out_hi, out_lo);
// SAFETY: the caller must uphold the safety contract.
unsafe {
// atomic load is always SeqCst.
asm!(
"lpq %r0, 0({src})", // atomic { r0:r1 = *src }
src = in(reg) ptr_reg!(src),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") out_hi,
out("r1") out_lo,
options(nostack, preserves_flags),
);
U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
}
}
#[inline]
unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
debug_assert!(dst as usize % 16 == 0);
let val = U128 { whole: val };
// SAFETY: the caller must uphold the safety contract.
unsafe {
macro_rules! atomic_store {
($acquire:expr) => {
asm!(
"stpq %r0, 0({dst})", // atomic { *dst = r0:r1 }
$acquire, // fence
dst = in(reg) ptr_reg!(dst),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
in("r0") val.pair.hi,
in("r1") val.pair.lo,
options(nostack, preserves_flags),
)
};
}
match order {
// Relaxed and Release stores are equivalent.
Ordering::Relaxed | Ordering::Release => atomic_store!(""),
Ordering::SeqCst => atomic_store!(serialization!()),
_ => unreachable!(),
}
}
}
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut u128,
old: u128,
new: u128,
_success: Ordering,
_failure: Ordering,
) -> Result<u128, u128> {
debug_assert!(dst as usize % 16 == 0);
let old = U128 { whole: old };
let new = U128 { whole: new };
let (prev_hi, prev_lo);
let r;
// SAFETY: the caller must uphold the safety contract.
let prev = unsafe {
// atomic CAS is always SeqCst.
asm!(
"cdsg %r0, %r12, 0({dst})", // atomic { if *dst == r0:r1 { cc = 0; *dst = r12:13 } else { cc = 1; r0:r1 = *dst } }
"ipm {r}", // r[:] = cc
dst = in(reg) ptr_reg!(dst),
r = lateout(reg) r,
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
inout("r0") old.pair.hi => prev_hi,
inout("r1") old.pair.lo => prev_lo,
in("r12") new.pair.hi,
in("r13") new.pair.lo,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
};
if extract_cc(r) { Ok(prev) } else { Err(prev) }
}
// cdsg is always strong.
use self::atomic_compare_exchange as atomic_compare_exchange_weak;
// 128-bit atomic load by two 64-bit atomic loads.
#[cfg(not(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
)))]
#[inline]
unsafe fn byte_wise_atomic_load(src: *const u128) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
let (out_hi, out_lo);
asm!(
"lg {out_hi}, 8({src})", // atomic { out_hi = *src.byte_add(8) }
"lg {out_lo}, 0({src})", // atomic { out_lo = *src }
src = in(reg) src,
out_hi = out(reg) out_hi,
out_lo = out(reg) out_lo,
options(pure, nostack, preserves_flags, readonly),
);
U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
}
}
#[cfg(not(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
)))]
#[inline(always)]
unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128
where
F: FnMut(u128) -> u128,
{
// SAFETY: the caller must uphold the safety contract.
unsafe {
// This is not single-copy atomic reads, but this is ok because subsequent
// CAS will check for consistency.
//
// Note that the C++20 memory model does not allow mixed-sized atomic access,
// so we must use inline assembly to implement byte_wise_atomic_load.
// (i.e., byte-wise atomic based on the standard library's atomic types
// cannot be used here).
let mut prev = byte_wise_atomic_load(dst);
loop {
let next = f(prev);
match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
Ok(x) => return x,
Err(x) => prev = x,
}
}
}
}
#[inline]
unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
let val = U128 { whole: val };
let (mut prev_hi, mut prev_lo);
// SAFETY: the caller must uphold the safety contract.
//
// We could use atomic_update here, but using an inline assembly allows omitting
// the comparison of results and the storing/comparing of condition flags.
//
// Do not use atomic_rmw_cas_3 because it needs extra LGR to implement swap.
unsafe {
// atomic swap is always SeqCst.
asm!(
"lg %r0, 8({dst})", // atomic { r0 = *dst.byte_add(8) }
"lg %r1, 0({dst})", // atomic { r1 = *dst }
"2:", // 'retry:
"cdsg %r0, %r12, 0({dst})", // atomic { if *dst == r0:r1 { cc = 0; *dst = r12:r13 } else { cc = 1; r0:r1 = *dst } }
"jl 2b", // if cc == 1 { jump 'retry }
dst = in(reg) ptr_reg!(dst),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") prev_hi,
out("r1") prev_lo,
in("r12") val.pair.hi,
in("r13") val.pair.lo,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
}
}
/// Atomic RMW by CAS loop (3 arguments)
/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
///
/// `$op` can use the following registers:
/// - val_hi/val_lo pair: val argument (read-only for `$op`)
/// - r0/r1 pair: previous value loaded (read-only for `$op`)
/// - r12/r13 pair: new value that will be stored
// We could use atomic_update here, but using an inline assembly allows omitting
// the comparison of results and the storing/comparing of condition flags.
macro_rules! atomic_rmw_cas_3 {
($name:ident, [$($reg:tt)*], $($op:tt)*) => {
#[inline]
unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
let val = U128 { whole: val };
let (mut prev_hi, mut prev_lo);
// SAFETY: the caller must uphold the safety contract.
unsafe {
// atomic RMW is always SeqCst.
asm!(
"lg %r0, 8({dst})", // atomic { r0 = *dst.byte_add(8) }
"lg %r1, 0({dst})", // atomic { r1 = *dst }
"2:", // 'retry:
$($op)*
"cdsg %r0, %r12, 0({dst})", // atomic { if *dst == r0:r1 { cc = 0; *dst = r12:r13 } else { cc = 1; r0:r1 = *dst } }
"jl 2b", // if cc == 1 { jump 'retry }
dst = in(reg) ptr_reg!(dst),
val_hi = in(reg) val.pair.hi,
val_lo = in(reg) val.pair.lo,
$($reg)*
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") prev_hi,
out("r1") prev_lo,
out("r12") _,
out("r13") _,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
}
}
};
}
/// Atomic RMW by CAS loop (2 arguments)
/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
///
/// `$op` can use the following registers:
/// - r0/r1 pair: previous value loaded (read-only for `$op`)
/// - r12/r13 pair: new value that will be stored
// We could use atomic_update here, but using an inline assembly allows omitting
// the comparison of results and the storing/comparing of condition flags.
macro_rules! atomic_rmw_cas_2 {
($name:ident, [$($reg:tt)*], $($op:tt)*) => {
#[inline]
unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
let (mut prev_hi, mut prev_lo);
// SAFETY: the caller must uphold the safety contract.
unsafe {
// atomic RMW is always SeqCst.
asm!(
"lg %r0, 8({dst})", // atomic { r0 = *dst.byte_add(8) }
"lg %r1, 0({dst})", // atomic { r1 = *dst }
"2:", // 'retry:
$($op)*
"cdsg %r0, %r12, 0({dst})", // atomic { if *dst == r0:r1 { cc = 0; *dst = r12:r13 } else { cc = 1; r0:r1 = *dst } }
"jl 2b", // if cc == 1 { jump 'retry }
dst = in(reg) ptr_reg!(dst),
$($reg)*
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") prev_hi,
out("r1") prev_lo,
out("r12") _,
out("r13") _,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
}
}
};
}
atomic_rmw_cas_3! {
atomic_add, [],
distinct_op!("algr", "%r13", "%r1", "{val_lo}"), // r13 = r1 + val_lo; cc = zero | carry
"lgr %r12, %r0", // r12 = r0
"alcgr %r12, {val_hi}", // r12 += val_hi + carry
}
atomic_rmw_cas_3! {
atomic_sub, [],
distinct_op!("slgr", "%r13", "%r1", "{val_lo}"), // r13 = r1 - val_lo; cc = zero | borrow
"lgr %r12, %r0", // r12 = r0
"slbgr %r12, {val_hi}", // r12 -= val_hi + borrow
}
atomic_rmw_cas_3! {
atomic_and, [],
distinct_op!("ngr", "%r13", "%r1", "{val_lo}"), // r13 = r1 & val_lo
distinct_op!("ngr", "%r12", "%r0", "{val_hi}"), // r12 = r0 & val_hi
}
// Use nngrk on z15 or later.
#[cfg(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
))]
atomic_rmw_cas_3! {
atomic_nand, [],
"nngrk %r13, %r1, {val_lo}", // r13 = !(r1 & val_lo)
"nngrk %r12, %r0, {val_hi}", // r12 = !(r0 & val_hi)
}
#[cfg(not(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
)))]
atomic_rmw_cas_3! {
atomic_nand, [],
distinct_op!("ngr", "%r13", "%r1", "{val_lo}"), // r13 = r1 & val_lo
distinct_op!("ngr", "%r12", "%r0", "{val_hi}"), // r12 = r0 & val_hi
"lcgr %r13, %r13", // r13 = !r13 + 1
"aghi %r13, -1", // r13 -= 1
"lcgr %r12, %r12", // r12 = !r12 + 1
"aghi %r12, -1", // r12 -= 1
}
atomic_rmw_cas_3! {
atomic_or, [],
distinct_op!("ogr", "%r13", "%r1", "{val_lo}"), // r13 = r1 | val_lo
distinct_op!("ogr", "%r12", "%r0", "{val_hi}"), // r12 = r0 | val_hi
}
atomic_rmw_cas_3! {
atomic_xor, [],
distinct_op!("xgr", "%r13", "%r1", "{val_lo}"), // r13 = r1 ^ val_lo
distinct_op!("xgr", "%r12", "%r0", "{val_hi}"), // r12 = r0 ^ val_hi
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_max, [],
"clgr %r1, {val_lo}", // if r1(u) < val_lo(u) { cc = 1 } else if r1(u) > val_lo(u) { cc = 2 } else { cc = 0 }
select_op!("h", "%r12", "%r1", "{val_lo}"), // if cc == 2 { r12 = r1 } else { r12 = val_lo }
"cgr %r0, {val_hi}", // if r0(i) < val_hi(i) { cc = 1 } else if r0(i) > val_hi(i) { cc = 2 } else { cc = 0 }
select_op!("h", "%r13", "%r1", "{val_lo}"), // if cc == 2 { r13 = r1 } else { r13 = val_lo }
"locgre %r13, %r12", // if cc == 0 { r13 = r12 }
select_op!("h", "%r12", "%r0", "{val_hi}"), // if cc == 2 { r12 = r0 } else { r12 = val_hi }
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_umax, [tmp = out(reg) _,],
"clgr %r1, {val_lo}", // if r1(u) < val_lo(u) { cc = 1 } else if r1(u) > val_lo(u) { cc = 2 } else { cc = 0 }
select_op!("h", "{tmp}", "%r1", "{val_lo}"), // if cc == 2 { tmp = r1 } else { tmp = val_lo }
"clgr %r0, {val_hi}", // if r0(u) < val_hi(u) { cc = 1 } else if r0(u) > val_hi(u) { cc = 2 } else { cc = 0 }
select_op!("h", "%r12", "%r0", "{val_hi}"), // if cc == 2 { r12 = r0 } else { r12 = val_hi }
select_op!("h", "%r13", "%r1", "{val_lo}"), // if cc == 2 { r13 = r1 } else { r13 = val_lo }
"cgr %r0, {val_hi}", // if r0(i) < val_hi(i) { cc = 1 } else if r0(i) > val_hi(i) { cc = 2 } else { cc = 0 }
"locgre %r13, {tmp}", // if cc == 0 { r13 = tmp }
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_min, [],
"clgr %r1, {val_lo}", // if r1(u) < val_lo(u) { cc = 1 } else if r1(u) > val_lo(u) { cc = 2 } else { cc = 0 }
select_op!("l", "%r12", "%r1", "{val_lo}"), // if cc == 1 { r12 = r1 } else { r12 = val_lo }
"cgr %r0, {val_hi}", // if r0(i) < val_hi(i) { cc = 1 } else if r0(i) > val_hi(i) { cc = 2 } else { cc = 0 }
select_op!("l", "%r13", "%r1", "{val_lo}"), // if cc == 1 { r13 = r1 } else { r13 = val_lo }
"locgre %r13, %r12", // if cc == 0 { r13 = r12 }
select_op!("l", "%r12", "%r0", "{val_hi}"), // if cc == 1 { r12 = r0 } else { r12 = val_hi }
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_umin, [tmp = out(reg) _,],
"clgr %r1, {val_lo}", // if r1(u) < val_lo(u) { cc = 1 } else if r1(u) > val_lo(u) { cc = 2 } else { cc = 0 }
select_op!("l", "{tmp}", "%r1", "{val_lo}"), // if cc == 1 { tmp = r1 } else { tmp = val_lo }
"clgr %r0, {val_hi}", // if r0(u) < val_hi(u) { cc = 1 } else if r0(u) > val_hi(u) { cc = 2 } else { cc = 0 }
select_op!("l", "%r12", "%r0", "{val_hi}"), // if cc == 1 { r12 = r0 } else { r12 = val_hi }
select_op!("l", "%r13", "%r1", "{val_lo}"), // if cc == 1 { r13 = r1 } else { r13 = val_lo }
"cgr %r0, {val_hi}", // if r0(i) < val_hi(i) { cc = 1 } else if r0(i) > val_hi(i) { cc = 2 } else { cc = 0 }
"locgre %r13, {tmp}", // if cc == 0 { r13 = tmp }
}
// We use atomic_update for atomic min/max on pre-z196 because
// z10 doesn't seem to have a good way to implement 128-bit min/max.
// loc{,g}r requires z196 or later.
// https://godbolt.org/z/EqoMEP8b3
#[cfg(not(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
)))]
atomic_rmw_by_atomic_update!(cmp);
atomic_rmw_cas_2! {
atomic_not, [],
"lcgr %r13, %r1", // r13 = !r1 + 1
"aghi %r13, -1", // r13 -= 1
"lcgr %r12, %r0", // r12 = !r0 + 1
"aghi %r12, -1", // r12 -= 1
}
#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))]
atomic_rmw_cas_2! {
atomic_neg, [zero = in(reg) 0_u64,],
"slgrk %r13, {zero}, %r1", // r13 = 0 - r1; cc = zero | borrow
"lghi %r12, 0", // r12 = 0
"slbgr %r12, %r0", // r12 -= r0 + borrow
}
#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))]
atomic_rmw_cas_2! {
atomic_neg, [],
"lghi %r13, 0", // r13 = 0
"slgr %r13, %r1", // r13 -= r1; cc = zero | borrow
"lghi %r12, 0", // r12 = 0
"slbgr %r12, %r0", // r12 -= r0 + borrow
}
#[inline]
const fn is_lock_free() -> bool {
IS_ALWAYS_LOCK_FREE
}
const IS_ALWAYS_LOCK_FREE: bool = true;
atomic128!(AtomicI128, i128, atomic_max, atomic_min);
atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
#[cfg(test)]
mod tests {
use super::*;
test_atomic_int!(i128);
test_atomic_int!(u128);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
stress_test!(u128);
}