chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

View File

@@ -0,0 +1,9 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_as_bytes_dynamic_size(source: &format::CocoPacket) -> &[u8] {
source.as_bytes()
}

View File

@@ -0,0 +1,5 @@
bench_as_bytes_dynamic_size:
mov rax, rdi
lea rdx, [2*rsi + 5]
and rdx, -2
ret

View File

@@ -0,0 +1,47 @@
Iterations: 100
Instructions: 400
Total Cycles: 137
Total uOps: 400
Dispatch Width: 4
uOps Per Cycle: 2.92
IPC: 2.92
Block RThroughput: 1.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 1 0.50 lea rdx, [2*rsi + 5]
1 1 0.33 and rdx, -2
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 1.33 1.33 - 1.34 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - 0.66 - 0.34 - - mov rax, rdi
- - 0.33 0.67 - - - - lea rdx, [2*rsi + 5]
- - 1.00 - - - - - and rdx, -2
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,9 @@
use zerocopy::*;
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_as_bytes_static_size(source: &format::CocoPacket) -> &[u8] {
source.as_bytes()
}

View File

@@ -0,0 +1,4 @@
bench_as_bytes_static_size:
mov rax, rdi
mov edx, 6
ret

View File

@@ -0,0 +1,45 @@
Iterations: 100
Instructions: 300
Total Cycles: 104
Total uOps: 300
Dispatch Width: 4
uOps Per Cycle: 2.88
IPC: 2.88
Block RThroughput: 1.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 1 0.33 mov edx, 6
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 0.99 1.00 - 1.01 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.99 - - 0.01 - - mov rax, rdi
- - - 1.00 - - - - mov edx, 6
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,24 @@
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
#[derive(FromBytes, KnownLayout, Immutable, SplitAt)]
#[repr(C, align(4))]
pub struct Packet<Magic> {
magic_number: Magic,
milk: u8,
mug_size: u8,
temperature: [u8; 5],
marshmallows: [[u8; 3]],
}
/// A packet begining with the magic number `0xC0C0`.
pub type CocoPacket = Packet<C0C0>;
/// A packet beginning with any two initialized bytes.
pub type LocoPacket = Packet<[u8; 2]>;

View File

@@ -0,0 +1,27 @@
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes, SplitAt)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [[u8; 2]],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);

View File

@@ -0,0 +1,27 @@
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [u8; 2],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);

View File

@@ -0,0 +1,7 @@
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_read_from_bytes_static_size(source: &[u8]) -> Option<format::LocoPacket> {
zerocopy::FromBytes::read_from_bytes(source).ok()
}

View File

@@ -0,0 +1,15 @@
bench_read_from_bytes_static_size:
mov rcx, rsi
cmp rsi, 6
jne .LBB5_2
mov eax, dword ptr [rdi]
movzx ecx, word ptr [rdi + 4]
shl rcx, 32
or rcx, rax
.LBB5_2:
shl rcx, 16
inc rcx
xor eax, eax
cmp rsi, 6
cmove rax, rcx
ret

View File

@@ -0,0 +1,65 @@
Iterations: 100
Instructions: 1300
Total Cycles: 377
Total uOps: 1400
Dispatch Width: 4
uOps Per Cycle: 3.71
IPC: 3.45
Block RThroughput: 3.5
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rcx, rsi
1 1 0.33 cmp rsi, 6
1 1 1.00 jne .LBB5_2
1 5 0.50 * mov eax, dword ptr [rdi]
1 5 0.50 * movzx ecx, word ptr [rdi + 4]
1 1 0.50 shl rcx, 32
1 1 0.33 or rcx, rax
1 1 0.50 shl rcx, 16
1 1 0.33 inc rcx
1 0 0.25 xor eax, eax
1 1 0.33 cmp rsi, 6
2 2 0.67 cmove rax, rcx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 3.66 3.67 - 3.67 1.00 1.00
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.63 0.36 - 0.01 - - mov rcx, rsi
- - 0.05 0.05 - 0.90 - - cmp rsi, 6
- - - - - 1.00 - - jne .LBB5_2
- - - - - - - 1.00 mov eax, dword ptr [rdi]
- - - - - - 1.00 - movzx ecx, word ptr [rdi + 4]
- - 0.97 - - 0.03 - - shl rcx, 32
- - 0.02 0.35 - 0.63 - - or rcx, rax
- - 0.98 - - 0.02 - - shl rcx, 16
- - - 0.98 - 0.02 - - inc rcx
- - - - - - - - xor eax, eax
- - 0.03 0.93 - 0.04 - - cmp rsi, 6
- - 0.98 1.00 - 0.02 - - cmove rax, rcx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_read_from_prefix_static_size(source: &[u8]) -> Option<format::LocoPacket> {
match zerocopy::FromBytes::read_from_prefix(source) {
Ok((packet, _rest)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,14 @@
bench_read_from_prefix_static_size:
cmp rsi, 5
jbe .LBB5_2
mov eax, dword ptr [rdi]
movzx edi, word ptr [rdi + 4]
shl rdi, 32
or rdi, rax
.LBB5_2:
shl rdi, 16
inc rdi
xor eax, eax
cmp rsi, 6
cmovae rax, rdi
ret

View File

@@ -0,0 +1,63 @@
Iterations: 100
Instructions: 1200
Total Cycles: 905
Total uOps: 1300
Dispatch Width: 4
uOps Per Cycle: 1.44
IPC: 1.33
Block RThroughput: 3.3
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 cmp rsi, 5
1 1 1.00 jbe .LBB5_2
1 5 0.50 * mov eax, dword ptr [rdi]
1 5 0.50 * movzx edi, word ptr [rdi + 4]
1 1 0.50 shl rdi, 32
1 1 0.33 or rdi, rax
1 1 0.50 shl rdi, 16
1 1 0.33 inc rdi
1 0 0.25 xor eax, eax
1 1 0.33 cmp rsi, 6
2 2 0.67 cmovae rax, rdi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 3.32 3.32 - 3.36 1.00 1.00
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.05 0.94 - 0.01 - - cmp rsi, 5
- - - - - 1.00 - - jbe .LBB5_2
- - - - - - - 1.00 mov eax, dword ptr [rdi]
- - - - - - 1.00 - movzx edi, word ptr [rdi + 4]
- - 0.71 - - 0.29 - - shl rdi, 32
- - - 0.64 - 0.36 - - or rdi, rax
- - 1.00 - - - - - shl rdi, 16
- - 0.31 0.40 - 0.29 - - inc rdi
- - - - - - - - xor eax, eax
- - 0.34 0.35 - 0.31 - - cmp rsi, 6
- - 0.91 0.99 - 0.10 - - cmovae rax, rdi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_read_from_suffix_static_size(source: &[u8]) -> Option<format::LocoPacket> {
match zerocopy::FromBytes::read_from_suffix(source) {
Ok((_rest, packet)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,15 @@
bench_read_from_suffix_static_size:
mov rcx, rsi
cmp rsi, 6
jb .LBB5_2
mov eax, dword ptr [rdi + rsi - 6]
movzx ecx, word ptr [rdi + rsi - 2]
shl rcx, 32
or rcx, rax
.LBB5_2:
shl rcx, 16
inc rcx
xor eax, eax
cmp rsi, 6
cmovae rax, rcx
ret

View File

@@ -0,0 +1,65 @@
Iterations: 100
Instructions: 1300
Total Cycles: 377
Total uOps: 1400
Dispatch Width: 4
uOps Per Cycle: 3.71
IPC: 3.45
Block RThroughput: 3.5
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rcx, rsi
1 1 0.33 cmp rsi, 6
1 1 1.00 jb .LBB5_2
1 5 0.50 * mov eax, dword ptr [rdi + rsi - 6]
1 5 0.50 * movzx ecx, word ptr [rdi + rsi - 2]
1 1 0.50 shl rcx, 32
1 1 0.33 or rcx, rax
1 1 0.50 shl rcx, 16
1 1 0.33 inc rcx
1 0 0.25 xor eax, eax
1 1 0.33 cmp rsi, 6
2 2 0.67 cmovae rax, rcx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 3.66 3.67 - 3.67 1.00 1.00
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.63 0.36 - 0.01 - - mov rcx, rsi
- - 0.05 0.05 - 0.90 - - cmp rsi, 6
- - - - - 1.00 - - jb .LBB5_2
- - - - - - - 1.00 mov eax, dword ptr [rdi + rsi - 6]
- - - - - - 1.00 - movzx ecx, word ptr [rdi + rsi - 2]
- - 0.97 - - 0.03 - - shl rcx, 32
- - 0.02 0.35 - 0.63 - - or rcx, rax
- - 0.98 - - 0.02 - - shl rcx, 16
- - - 0.98 - 0.02 - - inc rcx
- - - - - - - - xor eax, eax
- - 0.03 0.93 - 0.04 - - cmp rsi, 6
- - 0.98 1.00 - 0.02 - - cmovae rax, rcx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,7 @@
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_bytes_dynamic_padding(source: &[u8]) -> Option<&format::LocoPacket> {
zerocopy::FromBytes::ref_from_bytes(source).ok()
}

View File

@@ -0,0 +1,28 @@
bench_ref_from_bytes_dynamic_padding:
test dil, 3
je .LBB5_2
xor eax, eax
mov rdx, rsi
ret
.LBB5_2:
movabs rax, 9223372036854775804
and rax, rsi
cmp rax, 9
jae .LBB5_4
xor eax, eax
mov rdx, rsi
ret
.LBB5_4:
add rax, -9
movabs rcx, -6148914691236517205
mul rcx
shr rdx
lea rcx, [rdx + 2*rdx]
or rcx, 3
add rcx, 9
xor eax, eax
cmp rsi, rcx
cmove rsi, rdx
cmove rax, rdi
mov rdx, rsi
ret

View File

@@ -0,0 +1,89 @@
Iterations: 100
Instructions: 2500
Total Cycles: 849
Total uOps: 2800
Dispatch Width: 4
uOps Per Cycle: 3.30
IPC: 2.94
Block RThroughput: 7.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 test dil, 3
1 1 1.00 je .LBB5_2
1 0 0.25 xor eax, eax
1 1 0.33 mov rdx, rsi
1 1 1.00 U ret
1 1 0.33 movabs rax, 9223372036854775804
1 1 0.33 and rax, rsi
1 1 0.33 cmp rax, 9
1 1 1.00 jae .LBB5_4
1 0 0.25 xor eax, eax
1 1 0.33 mov rdx, rsi
1 1 1.00 U ret
1 1 0.33 add rax, -9
1 1 0.33 movabs rcx, -6148914691236517205
2 4 1.00 mul rcx
1 1 0.50 shr rdx
1 1 0.50 lea rcx, [rdx + 2*rdx]
1 1 0.33 or rcx, 3
1 1 0.33 add rcx, 9
1 0 0.25 xor eax, eax
1 1 0.33 cmp rsi, rcx
2 2 0.67 cmove rsi, rdx
2 2 0.67 cmove rax, rdi
1 1 0.33 mov rdx, rsi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 8.33 8.32 - 8.35 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.35 0.33 - 0.32 - - test dil, 3
- - - - - 1.00 - - je .LBB5_2
- - - - - - - - xor eax, eax
- - 0.92 0.04 - 0.04 - - mov rdx, rsi
- - - - - 1.00 - - ret
- - 0.32 0.15 - 0.53 - - movabs rax, 9223372036854775804
- - 0.03 0.06 - 0.91 - - and rax, rsi
- - 0.05 0.93 - 0.02 - - cmp rax, 9
- - - - - 1.00 - - jae .LBB5_4
- - - - - - - - xor eax, eax
- - 0.93 0.04 - 0.03 - - mov rdx, rsi
- - - - - 1.00 - - ret
- - 0.37 0.33 - 0.30 - - add rax, -9
- - 0.61 0.09 - 0.30 - - movabs rcx, -6148914691236517205
- - 1.00 1.00 - - - - mul rcx
- - 0.67 - - 0.33 - - shr rdx
- - 0.33 0.67 - - - - lea rcx, [rdx + 2*rdx]
- - 0.34 0.61 - 0.05 - - or rcx, 3
- - 0.36 0.61 - 0.03 - - add rcx, 9
- - - - - - - - xor eax, eax
- - 0.04 0.63 - 0.33 - - cmp rsi, rcx
- - 0.98 0.97 - 0.05 - - cmove rsi, rdx
- - 0.98 0.94 - 0.08 - - cmove rax, rdi
- - 0.05 0.92 - 0.03 - - mov rdx, rsi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,7 @@
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_bytes_dynamic_size(source: &[u8]) -> Option<&format::LocoPacket> {
zerocopy::FromBytes::ref_from_bytes(source).ok()
}

View File

@@ -0,0 +1,20 @@
bench_ref_from_bytes_dynamic_size:
mov rdx, rsi
cmp rsi, 4
setb al
or al, dil
test al, 1
je .LBB5_2
xor eax, eax
ret
.LBB5_2:
lea rcx, [rdx - 4]
mov rsi, rcx
shr rsi
and rcx, -2
add rcx, 4
xor eax, eax
cmp rdx, rcx
cmove rdx, rsi
cmove rax, rdi
ret

View File

@@ -0,0 +1,75 @@
Iterations: 100
Instructions: 1800
Total Cycles: 606
Total uOps: 2000
Dispatch Width: 4
uOps Per Cycle: 3.30
IPC: 2.97
Block RThroughput: 5.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rdx, rsi
1 1 0.33 cmp rsi, 4
1 1 0.50 setb al
1 1 0.33 or al, dil
1 1 0.33 test al, 1
1 1 1.00 je .LBB5_2
1 0 0.25 xor eax, eax
1 1 1.00 U ret
1 1 0.50 lea rcx, [rdx - 4]
1 1 0.33 mov rsi, rcx
1 1 0.50 shr rsi
1 1 0.33 and rcx, -2
1 1 0.33 add rcx, 4
1 0 0.25 xor eax, eax
1 1 0.33 cmp rdx, rcx
2 2 0.67 cmove rdx, rsi
2 2 0.67 cmove rax, rdi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 6.00 6.00 - 6.00 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - 0.99 - 0.01 - - mov rdx, rsi
- - 0.99 0.01 - - - - cmp rsi, 4
- - 1.00 - - - - - setb al
- - 0.99 0.01 - - - - or al, dil
- - - 0.99 - 0.01 - - test al, 1
- - - - - 1.00 - - je .LBB5_2
- - - - - - - - xor eax, eax
- - - - - 1.00 - - ret
- - 1.00 - - - - - lea rcx, [rdx - 4]
- - - 1.00 - - - - mov rsi, rcx
- - 1.00 - - - - - shr rsi
- - 1.00 - - - - - and rcx, -2
- - - 1.00 - - - - add rcx, 4
- - - - - - - - xor eax, eax
- - - - - 1.00 - - cmp rdx, rcx
- - 0.01 1.00 - 0.99 - - cmove rdx, rsi
- - 0.01 1.00 - 0.99 - - cmove rax, rdi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,7 @@
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_bytes_static_size(source: &[u8]) -> Option<&format::LocoPacket> {
zerocopy::FromBytes::ref_from_bytes(source).ok()
}

View File

@@ -0,0 +1,8 @@
bench_ref_from_bytes_static_size:
mov ecx, edi
and ecx, 1
xor rsi, 6
xor eax, eax
or rsi, rcx
cmove rax, rdi
ret

View File

@@ -0,0 +1,53 @@
Iterations: 100
Instructions: 700
Total Cycles: 240
Total uOps: 800
Dispatch Width: 4
uOps Per Cycle: 3.33
IPC: 2.92
Block RThroughput: 2.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov ecx, edi
1 1 0.33 and ecx, 1
1 1 0.33 xor rsi, 6
1 0 0.25 xor eax, eax
1 1 0.33 or rsi, rcx
2 2 0.67 cmove rax, rdi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 2.33 2.33 - 2.34 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.01 0.98 - 0.01 - - mov ecx, edi
- - 0.02 0.66 - 0.32 - - and ecx, 1
- - 0.33 0.66 - 0.01 - - xor rsi, 6
- - - - - - - - xor eax, eax
- - 0.98 0.02 - - - - or rsi, rcx
- - 0.99 0.01 - 1.00 - - cmove rax, rdi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_bytes_with_elems_dynamic_padding(
source: &[u8],
count: usize,
) -> Option<&format::LocoPacket> {
zerocopy::FromBytes::ref_from_bytes_with_elems(source, count).ok()
}

View File

@@ -0,0 +1,30 @@
bench_ref_from_bytes_with_elems_dynamic_padding:
mov rcx, rdx
mov edx, 3
mov rax, rcx
mul rdx
jo .LBB5_5
cmp rax, -10
ja .LBB5_5
mov edx, eax
not edx
and edx, 3
add rdx, rax
add rdx, 9
cmp rsi, rdx
jne .LBB5_5
mov r8d, edi
and r8d, 3
jne .LBB5_5
add rax, 9
cmp rdx, rax
jb .LBB5_5
mov rax, rdi
mov rdx, rcx
ret
.LBB5_5:
xor edi, edi
mov rcx, rsi
mov rax, rdi
mov rdx, rcx
ret

View File

@@ -0,0 +1,95 @@
Iterations: 100
Instructions: 2800
Total Cycles: 944
Total uOps: 2900
Dispatch Width: 4
uOps Per Cycle: 3.07
IPC: 2.97
Block RThroughput: 7.3
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rcx, rdx
1 1 0.33 mov edx, 3
1 1 0.33 mov rax, rcx
2 4 1.00 mul rdx
1 1 1.00 jo .LBB5_5
1 1 0.33 cmp rax, -10
1 1 1.00 ja .LBB5_5
1 1 0.33 mov edx, eax
1 1 0.33 not edx
1 1 0.33 and edx, 3
1 1 0.33 add rdx, rax
1 1 0.33 add rdx, 9
1 1 0.33 cmp rsi, rdx
1 1 1.00 jne .LBB5_5
1 1 0.33 mov r8d, edi
1 1 0.33 and r8d, 3
1 1 1.00 jne .LBB5_5
1 1 0.33 add rax, 9
1 1 0.33 cmp rdx, rax
1 1 1.00 jb .LBB5_5
1 1 0.33 mov rax, rdi
1 1 0.33 mov rdx, rcx
1 1 1.00 U ret
1 0 0.25 xor edi, edi
1 1 0.33 mov rcx, rsi
1 1 0.33 mov rax, rdi
1 1 0.33 mov rdx, rcx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 9.32 9.32 - 9.36 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.64 0.18 - 0.18 - - mov rcx, rdx
- - 0.17 0.83 - - - - mov edx, 3
- - 0.50 0.49 - 0.01 - - mov rax, rcx
- - 1.00 1.00 - - - - mul rdx
- - - - - 1.00 - - jo .LBB5_5
- - 0.82 0.18 - - - - cmp rax, -10
- - - - - 1.00 - - ja .LBB5_5
- - 0.02 0.98 - - - - mov edx, eax
- - 0.82 0.02 - 0.16 - - not edx
- - 0.82 0.17 - 0.01 - - and edx, 3
- - 0.99 - - 0.01 - - add rdx, rax
- - 0.98 0.01 - 0.01 - - add rdx, 9
- - 1.00 - - - - - cmp rsi, rdx
- - - - - 1.00 - - jne .LBB5_5
- - 0.16 0.83 - 0.01 - - mov r8d, edi
- - 0.17 0.17 - 0.66 - - and r8d, 3
- - - - - 1.00 - - jne .LBB5_5
- - 0.02 0.98 - - - - add rax, 9
- - - 0.17 - 0.83 - - cmp rdx, rax
- - - - - 1.00 - - jb .LBB5_5
- - 0.01 0.67 - 0.32 - - mov rax, rdi
- - 0.02 0.98 - - - - mov rdx, rcx
- - - - - 1.00 - - ret
- - - - - - - - xor edi, edi
- - 0.34 0.66 - - - - mov rcx, rsi
- - 0.34 0.50 - 0.16 - - mov rax, rdi
- - 0.50 0.50 - - - - mov rdx, rcx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_bytes_with_elems_dynamic_size(
source: &[u8],
count: usize,
) -> Option<&format::LocoPacket> {
zerocopy::FromBytes::ref_from_bytes_with_elems(source, count).ok()
}

View File

@@ -0,0 +1,16 @@
bench_ref_from_bytes_with_elems_dynamic_size:
movabs rax, 9223372036854775805
cmp rdx, rax
seta cl
mov rax, rdi
or dil, cl
test dil, 1
jne .LBB5_2
lea rcx, [2*rdx + 4]
cmp rsi, rcx
je .LBB5_3
.LBB5_2:
xor eax, eax
mov rdx, rsi
.LBB5_3:
ret

View File

@@ -0,0 +1,65 @@
Iterations: 100
Instructions: 1300
Total Cycles: 439
Total uOps: 1400
Dispatch Width: 4
uOps Per Cycle: 3.19
IPC: 2.96
Block RThroughput: 3.5
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 movabs rax, 9223372036854775805
1 1 0.33 cmp rdx, rax
2 2 1.00 seta cl
1 1 0.33 mov rax, rdi
1 1 0.33 or dil, cl
1 1 0.33 test dil, 1
1 1 1.00 jne .LBB5_2
1 1 0.50 lea rcx, [2*rdx + 4]
1 1 0.33 cmp rsi, rcx
1 1 1.00 je .LBB5_3
1 0 0.25 xor eax, eax
1 1 0.33 mov rdx, rsi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 4.32 4.33 - 4.35 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - 0.99 - 0.01 - - movabs rax, 9223372036854775805
- - 0.33 0.67 - - - - cmp rdx, rax
- - 1.98 - - 0.02 - - seta cl
- - 0.01 0.99 - - - - mov rax, rdi
- - 1.00 - - - - - or dil, cl
- - 0.99 0.01 - - - - test dil, 1
- - - - - 1.00 - - jne .LBB5_2
- - - 1.00 - - - - lea rcx, [2*rdx + 4]
- - 0.01 - - 0.99 - - cmp rsi, rcx
- - - - - 1.00 - - je .LBB5_3
- - - - - - - - xor eax, eax
- - - 0.67 - 0.33 - - mov rdx, rsi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_prefix_dynamic_padding(source: &[u8]) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_prefix(source) {
Ok((packet, _rest)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,22 @@
bench_ref_from_prefix_dynamic_padding:
xor edx, edx
mov eax, 0
test dil, 3
je .LBB5_1
ret
.LBB5_1:
movabs rax, 9223372036854775804
and rsi, rax
cmp rsi, 9
jae .LBB5_3
mov edx, 1
xor eax, eax
ret
.LBB5_3:
add rsi, -9
movabs rcx, -6148914691236517205
mov rax, rsi
mul rcx
shr rdx
mov rax, rdi
ret

View File

@@ -0,0 +1,77 @@
Iterations: 100
Instructions: 1900
Total Cycles: 608
Total uOps: 2000
Dispatch Width: 4
uOps Per Cycle: 3.29
IPC: 3.13
Block RThroughput: 5.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 0 0.25 xor edx, edx
1 1 0.33 mov eax, 0
1 1 0.33 test dil, 3
1 1 1.00 je .LBB5_1
1 1 1.00 U ret
1 1 0.33 movabs rax, 9223372036854775804
1 1 0.33 and rsi, rax
1 1 0.33 cmp rsi, 9
1 1 1.00 jae .LBB5_3
1 1 0.33 mov edx, 1
1 0 0.25 xor eax, eax
1 1 1.00 U ret
1 1 0.33 add rsi, -9
1 1 0.33 movabs rcx, -6148914691236517205
1 1 0.33 mov rax, rsi
2 4 1.00 mul rcx
1 1 0.50 shr rdx
1 1 0.33 mov rax, rdi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 6.00 6.00 - 6.00 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - - - - - xor edx, edx
- - 0.01 0.98 - 0.01 - - mov eax, 0
- - 0.98 0.01 - 0.01 - - test dil, 3
- - - - - 1.00 - - je .LBB5_1
- - - - - 1.00 - - ret
- - 0.01 0.99 - - - - movabs rax, 9223372036854775804
- - - 1.00 - - - - and rsi, rax
- - - 1.00 - - - - cmp rsi, 9
- - - - - 1.00 - - jae .LBB5_3
- - 1.00 - - - - - mov edx, 1
- - - - - - - - xor eax, eax
- - - - - 1.00 - - ret
- - 0.02 0.02 - 0.96 - - add rsi, -9
- - 0.99 0.01 - - - - movabs rcx, -6148914691236517205
- - 0.01 0.99 - - - - mov rax, rsi
- - 1.00 1.00 - - - - mul rcx
- - 1.00 - - - - - shr rdx
- - 0.98 - - 0.02 - - mov rax, rdi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_prefix_dynamic_size(source: &[u8]) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_prefix(source) {
Ok((packet, _rest)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,17 @@
bench_ref_from_prefix_dynamic_size:
xor edx, edx
mov eax, 0
test dil, 1
jne .LBB5_4
cmp rsi, 4
jae .LBB5_3
mov edx, 1
xor eax, eax
ret
.LBB5_3:
add rsi, -4
shr rsi
mov rdx, rsi
mov rax, rdi
.LBB5_4:
ret

View File

@@ -0,0 +1,67 @@
Iterations: 100
Instructions: 1400
Total Cycles: 405
Total uOps: 1400
Dispatch Width: 4
uOps Per Cycle: 3.46
IPC: 3.46
Block RThroughput: 4.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 0 0.25 xor edx, edx
1 1 0.33 mov eax, 0
1 1 0.33 test dil, 1
1 1 1.00 jne .LBB5_4
1 1 0.33 cmp rsi, 4
1 1 1.00 jae .LBB5_3
1 1 0.33 mov edx, 1
1 0 0.25 xor eax, eax
1 1 1.00 U ret
1 1 0.33 add rsi, -4
1 1 0.50 shr rsi
1 1 0.33 mov rdx, rsi
1 1 0.33 mov rax, rdi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 3.99 3.99 - 4.02 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - - - - - xor edx, edx
- - 0.01 0.98 - 0.01 - - mov eax, 0
- - 0.98 0.02 - - - - test dil, 1
- - - - - 1.00 - - jne .LBB5_4
- - 0.02 0.98 - - - - cmp rsi, 4
- - - - - 1.00 - - jae .LBB5_3
- - 0.98 0.01 - 0.01 - - mov edx, 1
- - - - - - - - xor eax, eax
- - - - - 1.00 - - ret
- - 0.01 0.99 - - - - add rsi, -4
- - 1.00 - - - - - shr rsi
- - - 1.00 - - - - mov rdx, rsi
- - 0.99 0.01 - - - - mov rax, rdi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_prefix_static_size(source: &[u8]) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_prefix(source) {
Ok((packet, _rest)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,8 @@
bench_ref_from_prefix_static_size:
xor eax, eax
cmp rsi, 6
mov rcx, rdi
cmovb rcx, rax
test dil, 1
cmove rax, rcx
ret

View File

@@ -0,0 +1,53 @@
Iterations: 100
Instructions: 700
Total Cycles: 274
Total uOps: 900
Dispatch Width: 4
uOps Per Cycle: 3.28
IPC: 2.55
Block RThroughput: 2.3
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 0 0.25 xor eax, eax
1 1 0.33 cmp rsi, 6
1 1 0.33 mov rcx, rdi
2 2 0.67 cmovb rcx, rax
1 1 0.33 test dil, 1
2 2 0.67 cmove rax, rcx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 2.66 2.67 - 2.67 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - - - - - xor eax, eax
- - - 0.01 - 0.99 - - cmp rsi, 6
- - 0.01 0.67 - 0.32 - - mov rcx, rdi
- - 1.00 0.99 - 0.01 - - cmovb rcx, rax
- - 0.66 0.01 - 0.33 - - test dil, 1
- - 0.99 0.99 - 0.02 - - cmove rax, rcx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,13 @@
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_prefix_with_elems_dynamic_padding(
source: &[u8],
count: usize,
) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_prefix_with_elems(source, count) {
Ok((packet, _rest)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,35 @@
bench_ref_from_prefix_with_elems_dynamic_padding:
mov rcx, rdx
mov edx, 3
mov rax, rcx
mul rdx
mov edx, 1
jo .LBB5_5
cmp rax, -10
ja .LBB5_5
lea r8, [rax + 9]
not eax
and eax, 3
add rax, r8
jae .LBB5_3
.LBB5_5:
xor r8d, r8d
mov rax, r8
ret
.LBB5_3:
xor edx, edx
mov r8d, 0
test dil, 3
je .LBB5_4
mov rax, r8
ret
.LBB5_4:
xor edx, edx
cmp rax, rsi
mov eax, 1
cmova rcx, rax
cmova rdi, rdx
mov rdx, rcx
mov r8, rdi
mov rax, r8
ret

View File

@@ -0,0 +1,101 @@
Iterations: 100
Instructions: 3100
Total Cycles: 1110
Total uOps: 3600
Dispatch Width: 4
uOps Per Cycle: 3.24
IPC: 2.79
Block RThroughput: 9.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rcx, rdx
1 1 0.33 mov edx, 3
1 1 0.33 mov rax, rcx
2 4 1.00 mul rdx
1 1 0.33 mov edx, 1
1 1 1.00 jo .LBB5_5
1 1 0.33 cmp rax, -10
1 1 1.00 ja .LBB5_5
1 1 0.50 lea r8, [rax + 9]
1 1 0.33 not eax
1 1 0.33 and eax, 3
1 1 0.33 add rax, r8
1 1 1.00 jae .LBB5_3
1 0 0.25 xor r8d, r8d
1 1 0.33 mov rax, r8
1 1 1.00 U ret
1 0 0.25 xor edx, edx
1 1 0.33 mov r8d, 0
1 1 0.33 test dil, 3
1 1 1.00 je .LBB5_4
1 1 0.33 mov rax, r8
1 1 1.00 U ret
1 0 0.25 xor edx, edx
1 1 0.33 cmp rax, rsi
1 1 0.33 mov eax, 1
3 3 1.00 cmova rcx, rax
3 3 1.00 cmova rdi, rdx
1 1 0.33 mov rdx, rcx
1 1 0.33 mov r8, rdi
1 1 0.33 mov rax, r8
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 11.01 10.98 - 11.01 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.48 0.50 - 0.02 - - mov rcx, rdx
- - 0.02 0.98 - - - - mov edx, 3
- - 0.51 0.48 - 0.01 - - mov rax, rcx
- - 1.00 1.00 - - - - mul rdx
- - 0.49 0.50 - 0.01 - - mov edx, 1
- - - - - 1.00 - - jo .LBB5_5
- - 0.98 0.02 - - - - cmp rax, -10
- - - - - 1.00 - - ja .LBB5_5
- - 0.02 0.98 - - - - lea r8, [rax + 9]
- - 0.98 0.02 - - - - not eax
- - 0.99 0.01 - - - - and eax, 3
- - 0.98 0.01 - 0.01 - - add rax, r8
- - - - - 1.00 - - jae .LBB5_3
- - - - - - - - xor r8d, r8d
- - 0.01 0.98 - 0.01 - - mov rax, r8
- - - - - 1.00 - - ret
- - - - - - - - xor edx, edx
- - 0.48 0.52 - - - - mov r8d, 0
- - 0.02 0.97 - 0.01 - - test dil, 3
- - - - - 1.00 - - je .LBB5_4
- - 0.49 0.50 - 0.01 - - mov rax, r8
- - - - - 1.00 - - ret
- - - - - - - - xor edx, edx
- - 0.51 0.49 - - - - cmp rax, rsi
- - - 1.00 - - - - mov eax, 1
- - 1.04 0.97 - 0.99 - - cmova rcx, rax
- - 0.98 0.53 - 1.49 - - cmova rdi, rdx
- - 0.50 0.50 - - - - mov rdx, rcx
- - 0.51 0.01 - 0.48 - - mov r8, rdi
- - 0.02 0.01 - 0.97 - - mov rax, r8
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,13 @@
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_prefix_with_elems_dynamic_size(
source: &[u8],
count: usize,
) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_prefix_with_elems(source, count) {
Ok((packet, _rest)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,22 @@
bench_ref_from_prefix_with_elems_dynamic_size:
movabs rax, 9223372036854775805
cmp rdx, rax
ja .LBB5_1
mov rcx, rdx
xor edx, edx
mov eax, 0
test dil, 1
jne .LBB5_4
lea rax, [2*rcx + 4]
xor r8d, r8d
cmp rax, rsi
mov edx, 1
cmovbe rdx, rcx
cmova rdi, r8
mov rax, rdi
.LBB5_4:
ret
.LBB5_1:
mov edx, 1
xor eax, eax
ret

View File

@@ -0,0 +1,77 @@
Iterations: 100
Instructions: 1900
Total Cycles: 672
Total uOps: 2300
Dispatch Width: 4
uOps Per Cycle: 3.42
IPC: 2.83
Block RThroughput: 5.8
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 movabs rax, 9223372036854775805
1 1 0.33 cmp rdx, rax
1 1 1.00 ja .LBB5_1
1 1 0.33 mov rcx, rdx
1 0 0.25 xor edx, edx
1 1 0.33 mov eax, 0
1 1 0.33 test dil, 1
1 1 1.00 jne .LBB5_4
1 1 0.50 lea rax, [2*rcx + 4]
1 0 0.25 xor r8d, r8d
1 1 0.33 cmp rax, rsi
1 1 0.33 mov edx, 1
3 3 1.00 cmovbe rdx, rcx
3 3 1.00 cmova rdi, r8
1 1 0.33 mov rax, rdi
1 1 1.00 U ret
1 1 0.33 mov edx, 1
1 0 0.25 xor eax, eax
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 6.66 6.66 - 6.68 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - 0.99 - 0.01 - - movabs rax, 9223372036854775805
- - 0.37 0.63 - - - - cmp rdx, rax
- - - - - 1.00 - - ja .LBB5_1
- - 0.63 0.37 - - - - mov rcx, rdx
- - - - - - - - xor edx, edx
- - 0.01 0.98 - 0.01 - - mov eax, 0
- - 0.98 0.02 - - - - test dil, 1
- - - - - 1.00 - - jne .LBB5_4
- - 0.01 0.99 - - - - lea rax, [2*rcx + 4]
- - - - - - - - xor r8d, r8d
- - 1.00 - - - - - cmp rax, rsi
- - - 0.67 - 0.33 - - mov edx, 1
- - 0.73 0.98 - 1.29 - - cmovbe rdx, rcx
- - 1.60 0.36 - 1.04 - - cmova rdi, r8
- - 0.99 0.01 - - - - mov rax, rdi
- - - - - 1.00 - - ret
- - 0.34 0.66 - - - - mov edx, 1
- - - - - - - - xor eax, eax
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_suffix_dynamic_padding(source: &[u8]) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_suffix(source) {
Ok((_rest, packet)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,23 @@
bench_ref_from_suffix_dynamic_padding:
lea eax, [rsi + rdi]
test al, 3
jne .LBB5_1
movabs rax, 9223372036854775804
and rax, rsi
cmp rax, 9
jae .LBB5_3
.LBB5_1:
xor eax, eax
ret
.LBB5_3:
add rax, -9
movabs rcx, -6148914691236517205
mul rcx
shr rdx
lea rax, [rdx + 2*rdx]
sub rsi, rax
or rax, -4
add rsi, rdi
add rax, rsi
add rax, -8
ret

View File

@@ -0,0 +1,79 @@
Iterations: 100
Instructions: 2000
Total Cycles: 682
Total uOps: 2100
Dispatch Width: 4
uOps Per Cycle: 3.08
IPC: 2.93
Block RThroughput: 5.3
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.50 lea eax, [rsi + rdi]
1 1 0.33 test al, 3
1 1 1.00 jne .LBB5_1
1 1 0.33 movabs rax, 9223372036854775804
1 1 0.33 and rax, rsi
1 1 0.33 cmp rax, 9
1 1 1.00 jae .LBB5_3
1 0 0.25 xor eax, eax
1 1 1.00 U ret
1 1 0.33 add rax, -9
1 1 0.33 movabs rcx, -6148914691236517205
2 4 1.00 mul rcx
1 1 0.50 shr rdx
1 1 0.50 lea rax, [rdx + 2*rdx]
1 1 0.33 sub rsi, rax
1 1 0.33 or rax, -4
1 1 0.33 add rsi, rdi
1 1 0.33 add rax, rsi
1 1 0.33 add rax, -8
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 6.65 6.67 - 6.68 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.90 0.10 - - - - lea eax, [rsi + rdi]
- - 0.93 - - 0.07 - - test al, 3
- - - - - 1.00 - - jne .LBB5_1
- - 0.51 0.47 - 0.02 - - movabs rax, 9223372036854775804
- - - - - 1.00 - - and rax, rsi
- - - 0.09 - 0.91 - - cmp rax, 9
- - - - - 1.00 - - jae .LBB5_3
- - - - - - - - xor eax, eax
- - - - - 1.00 - - ret
- - 0.43 0.47 - 0.10 - - add rax, -9
- - 0.42 0.39 - 0.19 - - movabs rcx, -6148914691236517205
- - 1.00 1.00 - - - - mul rcx
- - 0.69 - - 0.31 - - shr rdx
- - 0.54 0.46 - - - - lea rax, [rdx + 2*rdx]
- - 0.07 0.91 - 0.02 - - sub rsi, rax
- - 0.91 0.05 - 0.04 - - or rax, -4
- - 0.08 0.90 - 0.02 - - add rsi, rdi
- - 0.09 0.91 - - - - add rax, rsi
- - 0.08 0.92 - - - - add rax, -8
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_suffix_dynamic_size(source: &[u8]) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_suffix(source) {
Ok((_rest, packet)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,13 @@
bench_ref_from_suffix_dynamic_size:
mov rdx, rsi
lea ecx, [rsi + rdi]
mov eax, edx
and eax, 1
add rax, rdi
xor esi, esi
sub rdx, 4
cmovb rax, rsi
shr rdx
test cl, 1
cmovne rax, rsi
ret

View File

@@ -0,0 +1,63 @@
Iterations: 100
Instructions: 1200
Total Cycles: 439
Total uOps: 1400
Dispatch Width: 4
uOps Per Cycle: 3.19
IPC: 2.73
Block RThroughput: 3.5
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rdx, rsi
1 1 0.50 lea ecx, [rsi + rdi]
1 1 0.33 mov eax, edx
1 1 0.33 and eax, 1
1 1 0.33 add rax, rdi
1 0 0.25 xor esi, esi
1 1 0.33 sub rdx, 4
2 2 0.67 cmovb rax, rsi
1 1 0.50 shr rdx
1 1 0.33 test cl, 1
2 2 0.67 cmovne rax, rsi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 4.33 4.33 - 4.34 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.02 0.32 - 0.66 - - mov rdx, rsi
- - 0.32 0.68 - - - - lea ecx, [rsi + rdi]
- - 0.66 - - 0.34 - - mov eax, edx
- - 0.02 0.33 - 0.65 - - and eax, 1
- - - 0.99 - 0.01 - - add rax, rdi
- - - - - - - - xor esi, esi
- - 0.65 - - 0.35 - - sub rdx, 4
- - 1.00 1.00 - - - - cmovb rax, rsi
- - 0.66 - - 0.34 - - shr rdx
- - - 0.01 - 0.99 - - test cl, 1
- - 1.00 1.00 - - - - cmovne rax, rsi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,10 @@
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_suffix_static_size(source: &[u8]) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_suffix(source) {
Ok((_rest, packet)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,13 @@
bench_ref_from_suffix_static_size:
lea eax, [rsi + rdi]
cmp rsi, 6
setb cl
or cl, al
test cl, 1
je .LBB5_2
xor eax, eax
ret
.LBB5_2:
lea rax, [rdi + rsi]
add rax, -6
ret

View File

@@ -0,0 +1,61 @@
Iterations: 100
Instructions: 1100
Total Cycles: 338
Total uOps: 1100
Dispatch Width: 4
uOps Per Cycle: 3.25
IPC: 3.25
Block RThroughput: 3.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.50 lea eax, [rsi + rdi]
1 1 0.33 cmp rsi, 6
1 1 0.50 setb cl
1 1 0.33 or cl, al
1 1 0.33 test cl, 1
1 1 1.00 je .LBB5_2
1 0 0.25 xor eax, eax
1 1 1.00 U ret
1 1 0.50 lea rax, [rdi + rsi]
1 1 0.33 add rax, -6
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 3.32 3.33 - 3.35 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.97 0.03 - - - - lea eax, [rsi + rdi]
- - 0.33 0.32 - 0.35 - - cmp rsi, 6
- - 1.00 - - - - - setb cl
- - - 1.00 - - - - or cl, al
- - - 1.00 - - - - test cl, 1
- - - - - 1.00 - - je .LBB5_2
- - - - - - - - xor eax, eax
- - - - - 1.00 - - ret
- - 0.34 0.66 - - - - lea rax, [rdi + rsi]
- - 0.68 0.32 - - - - add rax, -6
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,13 @@
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_suffix_with_elems_dynamic_padding(
source: &[u8],
count: usize,
) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_suffix_with_elems(source, count) {
Ok((_rest, packet)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,34 @@
bench_ref_from_suffix_with_elems_dynamic_padding:
mov rcx, rdx
mov edx, 3
mov rax, rcx
mul rdx
jo .LBB5_1
cmp rax, -10
ja .LBB5_1
lea rdx, [rax + 9]
not eax
and eax, 3
add rax, rdx
jae .LBB5_4
.LBB5_1:
xor r8d, r8d
mov edx, 1
mov rax, r8
ret
.LBB5_4:
lea r9d, [rsi + rdi]
xor edx, edx
mov r8d, 0
test r9b, 3
je .LBB5_5
mov rax, r8
ret
.LBB5_5:
sub rsi, rax
jb .LBB5_1
add rdi, rsi
mov rdx, rcx
mov r8, rdi
mov rax, r8
ret

View File

@@ -0,0 +1,99 @@
Iterations: 100
Instructions: 3000
Total Cycles: 973
Total uOps: 3100
Dispatch Width: 4
uOps Per Cycle: 3.19
IPC: 3.08
Block RThroughput: 8.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rcx, rdx
1 1 0.33 mov edx, 3
1 1 0.33 mov rax, rcx
2 4 1.00 mul rdx
1 1 1.00 jo .LBB5_1
1 1 0.33 cmp rax, -10
1 1 1.00 ja .LBB5_1
1 1 0.50 lea rdx, [rax + 9]
1 1 0.33 not eax
1 1 0.33 and eax, 3
1 1 0.33 add rax, rdx
1 1 1.00 jae .LBB5_4
1 0 0.25 xor r8d, r8d
1 1 0.33 mov edx, 1
1 1 0.33 mov rax, r8
1 1 1.00 U ret
1 1 0.50 lea r9d, [rsi + rdi]
1 0 0.25 xor edx, edx
1 1 0.33 mov r8d, 0
1 1 0.33 test r9b, 3
1 1 1.00 je .LBB5_5
1 1 0.33 mov rax, r8
1 1 1.00 U ret
1 1 0.33 sub rsi, rax
1 1 1.00 jb .LBB5_1
1 1 0.33 add rdi, rsi
1 1 0.33 mov rdx, rcx
1 1 0.33 mov r8, rdi
1 1 0.33 mov rax, r8
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 9.66 9.66 - 9.68 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - 0.99 - 0.01 - - mov rcx, rdx
- - 0.66 0.34 - - - - mov edx, 3
- - 0.34 0.66 - - - - mov rax, rcx
- - 1.00 1.00 - - - - mul rdx
- - - - - 1.00 - - jo .LBB5_1
- - 1.00 - - - - - cmp rax, -10
- - - - - 1.00 - - ja .LBB5_1
- - - 1.00 - - - - lea rdx, [rax + 9]
- - 1.00 - - - - - not eax
- - 1.00 - - - - - and eax, 3
- - 1.00 - - - - - add rax, rdx
- - - - - 1.00 - - jae .LBB5_4
- - - - - - - - xor r8d, r8d
- - 0.33 0.33 - 0.34 - - mov edx, 1
- - 0.33 - - 0.67 - - mov rax, r8
- - - - - 1.00 - - ret
- - 0.33 0.67 - - - - lea r9d, [rsi + rdi]
- - - - - - - - xor edx, edx
- - 0.67 0.33 - - - - mov r8d, 0
- - 0.33 0.34 - 0.33 - - test r9b, 3
- - - - - 1.00 - - je .LBB5_5
- - 0.66 0.01 - 0.33 - - mov rax, r8
- - - - - 1.00 - - ret
- - 0.33 0.67 - - - - sub rsi, rax
- - - - - 1.00 - - jb .LBB5_1
- - - 1.00 - - - - add rdi, rsi
- - 0.01 0.99 - - - - mov rdx, rcx
- - - 1.00 - - - - mov r8, rdi
- - 0.67 0.33 - - - - mov rax, r8
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,13 @@
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_ref_from_suffix_with_elems_dynamic_size(
source: &[u8],
count: usize,
) -> Option<&format::LocoPacket> {
match zerocopy::FromBytes::ref_from_suffix_with_elems(source, count) {
Ok((_rest, packet)) => Some(packet),
_ => None,
}
}

View File

@@ -0,0 +1,23 @@
bench_ref_from_suffix_with_elems_dynamic_size:
movabs rax, 9223372036854775805
cmp rdx, rax
ja .LBB5_1
lea r8d, [rsi + rdi]
xor ecx, ecx
mov eax, 0
test r8b, 1
jne .LBB5_5
lea rax, [2*rdx + 4]
sub rsi, rax
jae .LBB5_4
.LBB5_1:
xor eax, eax
mov edx, 1
ret
.LBB5_4:
add rdi, rsi
mov rcx, rdx
mov rax, rdi
.LBB5_5:
mov rdx, rcx
ret

View File

@@ -0,0 +1,77 @@
Iterations: 100
Instructions: 1900
Total Cycles: 571
Total uOps: 1900
Dispatch Width: 4
uOps Per Cycle: 3.33
IPC: 3.33
Block RThroughput: 5.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 movabs rax, 9223372036854775805
1 1 0.33 cmp rdx, rax
1 1 1.00 ja .LBB5_1
1 1 0.50 lea r8d, [rsi + rdi]
1 0 0.25 xor ecx, ecx
1 1 0.33 mov eax, 0
1 1 0.33 test r8b, 1
1 1 1.00 jne .LBB5_5
1 1 0.50 lea rax, [2*rdx + 4]
1 1 0.33 sub rsi, rax
1 1 1.00 jae .LBB5_4
1 0 0.25 xor eax, eax
1 1 0.33 mov edx, 1
1 1 1.00 U ret
1 1 0.33 add rdi, rsi
1 1 0.33 mov rcx, rdx
1 1 0.33 mov rax, rdi
1 1 0.33 mov rdx, rcx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 5.66 5.66 - 5.68 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.66 0.33 - 0.01 - - movabs rax, 9223372036854775805
- - 0.01 0.99 - - - - cmp rdx, rax
- - - - - 1.00 - - ja .LBB5_1
- - 0.99 0.01 - - - - lea r8d, [rsi + rdi]
- - - - - - - - xor ecx, ecx
- - 0.33 0.33 - 0.34 - - mov eax, 0
- - 0.33 0.34 - 0.33 - - test r8b, 1
- - - - - 1.00 - - jne .LBB5_5
- - 0.34 0.66 - - - - lea rax, [2*rdx + 4]
- - - 1.00 - - - - sub rsi, rax
- - - - - 1.00 - - jae .LBB5_4
- - - - - - - - xor eax, eax
- - 1.00 - - - - - mov edx, 1
- - - - - 1.00 - - ret
- - - 1.00 - - - - add rdi, rsi
- - 1.00 - - - - - mov rcx, rdx
- - 0.32 0.68 - - - - mov rax, rdi
- - 0.68 0.32 - - - - mov rdx, rcx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,12 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_split_at_dynamic_padding(
source: &format::CocoPacket,
len: usize,
) -> Option<Split<&format::CocoPacket>> {
source.split_at(len)
}

View File

@@ -0,0 +1,12 @@
bench_split_at_dynamic_padding:
mov rax, rdi
cmp rcx, rdx
jbe .LBB5_2
xor esi, esi
mov qword ptr [rax], rsi
ret
.LBB5_2:
mov qword ptr [rax + 8], rdx
mov qword ptr [rax + 16], rcx
mov qword ptr [rax], rsi
ret

View File

@@ -0,0 +1,59 @@
Iterations: 100
Instructions: 1000
Total Cycles: 404
Total uOps: 1000
Dispatch Width: 4
uOps Per Cycle: 2.48
IPC: 2.48
Block RThroughput: 4.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 1 0.33 cmp rcx, rdx
1 1 1.00 jbe .LBB5_2
1 0 0.25 xor esi, esi
1 1 1.00 * mov qword ptr [rax], rsi
1 1 1.00 U ret
1 1 1.00 * mov qword ptr [rax + 8], rdx
1 1 1.00 * mov qword ptr [rax + 16], rcx
1 1 1.00 * mov qword ptr [rax], rsi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 0.99 1.00 4.00 3.01 2.00 2.00
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.99 - - 0.01 - - mov rax, rdi
- - - 1.00 - - - - cmp rcx, rdx
- - - - - 1.00 - - jbe .LBB5_2
- - - - - - - - xor esi, esi
- - - - 1.00 - - 1.00 mov qword ptr [rax], rsi
- - - - - 1.00 - - ret
- - - - 1.00 - 1.00 - mov qword ptr [rax + 8], rdx
- - - - 1.00 - - 1.00 mov qword ptr [rax + 16], rcx
- - - - 1.00 - 1.00 - mov qword ptr [rax], rsi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,12 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_split_at_dynamic_size(
source: &format::CocoPacket,
len: usize,
) -> Option<Split<&format::CocoPacket>> {
source.split_at(len)
}

View File

@@ -0,0 +1,12 @@
bench_split_at_dynamic_size:
mov rax, rdi
cmp rcx, rdx
jbe .LBB5_2
xor esi, esi
mov qword ptr [rax], rsi
ret
.LBB5_2:
mov qword ptr [rax + 8], rdx
mov qword ptr [rax + 16], rcx
mov qword ptr [rax], rsi
ret

View File

@@ -0,0 +1,59 @@
Iterations: 100
Instructions: 1000
Total Cycles: 404
Total uOps: 1000
Dispatch Width: 4
uOps Per Cycle: 2.48
IPC: 2.48
Block RThroughput: 4.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 1 0.33 cmp rcx, rdx
1 1 1.00 jbe .LBB5_2
1 0 0.25 xor esi, esi
1 1 1.00 * mov qword ptr [rax], rsi
1 1 1.00 U ret
1 1 1.00 * mov qword ptr [rax + 8], rdx
1 1 1.00 * mov qword ptr [rax + 16], rcx
1 1 1.00 * mov qword ptr [rax], rsi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 0.99 1.00 4.00 3.01 2.00 2.00
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.99 - - 0.01 - - mov rax, rdi
- - - 1.00 - - - - cmp rcx, rdx
- - - - - 1.00 - - jbe .LBB5_2
- - - - - - - - xor esi, esi
- - - - 1.00 - - 1.00 mov qword ptr [rax], rsi
- - - - - 1.00 - - ret
- - - - 1.00 - 1.00 - mov qword ptr [rax + 8], rdx
- - - - 1.00 - - 1.00 mov qword ptr [rax + 16], rcx
- - - - 1.00 - 1.00 - mov qword ptr [rax], rsi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,12 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
unsafe fn bench_split_at_unchecked_dynamic_padding(
source: &format::CocoPacket,
len: usize,
) -> Split<&format::CocoPacket> {
unsafe { source.split_at_unchecked(len) }
}

View File

@@ -0,0 +1,6 @@
bench_split_at_unchecked_dynamic_padding:
mov rax, rdi
mov qword ptr [rdi], rsi
mov qword ptr [rdi + 8], rdx
mov qword ptr [rdi + 16], rcx
ret

View File

@@ -0,0 +1,49 @@
Iterations: 100
Instructions: 500
Total Cycles: 303
Total uOps: 500
Dispatch Width: 4
uOps Per Cycle: 1.65
IPC: 1.65
Block RThroughput: 3.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 1 1.00 * mov qword ptr [rdi], rsi
1 1 1.00 * mov qword ptr [rdi + 8], rdx
1 1 1.00 * mov qword ptr [rdi + 16], rcx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 0.49 0.50 3.00 1.01 1.50 1.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.49 0.50 - 0.01 - - mov rax, rdi
- - - - 1.00 - 0.50 0.50 mov qword ptr [rdi], rsi
- - - - 1.00 - 0.50 0.50 mov qword ptr [rdi + 8], rdx
- - - - 1.00 - 0.50 0.50 mov qword ptr [rdi + 16], rcx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,12 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
unsafe fn bench_split_at_unchecked_dynamic_size(
source: &format::CocoPacket,
len: usize,
) -> Split<&format::CocoPacket> {
unsafe { source.split_at_unchecked(len) }
}

View File

@@ -0,0 +1,6 @@
bench_split_at_unchecked_dynamic_size:
mov rax, rdi
mov qword ptr [rdi], rsi
mov qword ptr [rdi + 8], rdx
mov qword ptr [rdi + 16], rcx
ret

View File

@@ -0,0 +1,49 @@
Iterations: 100
Instructions: 500
Total Cycles: 303
Total uOps: 500
Dispatch Width: 4
uOps Per Cycle: 1.65
IPC: 1.65
Block RThroughput: 3.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 1 1.00 * mov qword ptr [rdi], rsi
1 1 1.00 * mov qword ptr [rdi + 8], rdx
1 1 1.00 * mov qword ptr [rdi + 16], rcx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 0.49 0.50 3.00 1.01 1.50 1.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.49 0.50 - 0.01 - - mov rax, rdi
- - - - 1.00 - 0.50 0.50 mov qword ptr [rdi], rsi
- - - - 1.00 - 0.50 0.50 mov qword ptr [rdi + 8], rdx
- - - - 1.00 - 0.50 0.50 mov qword ptr [rdi + 16], rcx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,11 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_split_via_immutable_dynamic_padding(
split: Split<&format::CocoPacket>,
) -> (&format::CocoPacket, &[[u8; 3]]) {
split.via_immutable()
}

View File

@@ -0,0 +1,14 @@
bench_split_via_immutable_dynamic_padding:
mov rax, rdi
mov rcx, qword ptr [rsi]
mov rdx, qword ptr [rsi + 8]
mov rsi, qword ptr [rsi + 16]
lea rdi, [rsi + 2*rsi]
add rdi, rcx
add rdi, 9
sub rdx, rsi
mov qword ptr [rax], rcx
mov qword ptr [rax + 8], rsi
mov qword ptr [rax + 16], rdi
mov qword ptr [rax + 24], rdx
ret

View File

@@ -0,0 +1,65 @@
Iterations: 100
Instructions: 1300
Total Cycles: 510
Total uOps: 1300
Dispatch Width: 4
uOps Per Cycle: 2.55
IPC: 2.55
Block RThroughput: 4.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 5 0.50 * mov rcx, qword ptr [rsi]
1 5 0.50 * mov rdx, qword ptr [rsi + 8]
1 5 0.50 * mov rsi, qword ptr [rsi + 16]
1 1 0.50 lea rdi, [rsi + 2*rsi]
1 1 0.33 add rdi, rcx
1 1 0.33 add rdi, 9
1 1 0.33 sub rdx, rsi
1 1 1.00 * mov qword ptr [rax], rcx
1 1 1.00 * mov qword ptr [rax + 8], rsi
1 1 1.00 * mov qword ptr [rax + 16], rdi
1 1 1.00 * mov qword ptr [rax + 24], rdx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 2.00 2.00 4.00 2.00 3.50 3.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.03 0.93 - 0.04 - - mov rax, rdi
- - - - - - 0.49 0.51 mov rcx, qword ptr [rsi]
- - - - - - 1.00 - mov rdx, qword ptr [rsi + 8]
- - - - - - 0.01 0.99 mov rsi, qword ptr [rsi + 16]
- - 0.93 0.07 - - - - lea rdi, [rsi + 2*rsi]
- - 0.05 0.02 - 0.93 - - add rdi, rcx
- - 0.49 0.49 - 0.02 - - add rdi, 9
- - 0.50 0.49 - 0.01 - - sub rdx, rsi
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax], rcx
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax + 8], rsi
- - - - 1.00 - 0.49 0.51 mov qword ptr [rax + 16], rdi
- - - - 1.00 - 0.51 0.49 mov qword ptr [rax + 24], rdx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,11 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_split_via_immutable_dynamic_size(
split: Split<&format::CocoPacket>,
) -> (&format::CocoPacket, &[[u8; 2]]) {
split.via_immutable()
}

View File

@@ -0,0 +1,13 @@
bench_split_via_immutable_dynamic_size:
mov rax, rdi
mov rcx, qword ptr [rsi]
mov rdx, qword ptr [rsi + 8]
mov rsi, qword ptr [rsi + 16]
lea rdi, [rcx + 2*rsi]
add rdi, 4
sub rdx, rsi
mov qword ptr [rax], rcx
mov qword ptr [rax + 8], rsi
mov qword ptr [rax + 16], rdi
mov qword ptr [rax + 24], rdx
ret

View File

@@ -0,0 +1,63 @@
Iterations: 100
Instructions: 1200
Total Cycles: 509
Total uOps: 1200
Dispatch Width: 4
uOps Per Cycle: 2.36
IPC: 2.36
Block RThroughput: 4.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 5 0.50 * mov rcx, qword ptr [rsi]
1 5 0.50 * mov rdx, qword ptr [rsi + 8]
1 5 0.50 * mov rsi, qword ptr [rsi + 16]
1 1 0.50 lea rdi, [rcx + 2*rsi]
1 1 0.33 add rdi, 4
1 1 0.33 sub rdx, rsi
1 1 1.00 * mov qword ptr [rax], rcx
1 1 1.00 * mov qword ptr [rax + 8], rsi
1 1 1.00 * mov qword ptr [rax + 16], rdi
1 1 1.00 * mov qword ptr [rax + 24], rdx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 1.66 1.66 4.00 1.68 3.50 3.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.34 0.33 - 0.33 - - mov rax, rdi
- - - - - - 0.49 0.51 mov rcx, qword ptr [rsi]
- - - - - - 0.51 0.49 mov rdx, qword ptr [rsi + 8]
- - - - - - 0.01 0.99 mov rsi, qword ptr [rsi + 16]
- - 0.33 0.67 - - - - lea rdi, [rcx + 2*rsi]
- - 0.63 0.34 - 0.03 - - add rdi, 4
- - 0.36 0.32 - 0.32 - - sub rdx, rsi
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax], rcx
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax + 8], rsi
- - - - 1.00 - 0.98 0.02 mov qword ptr [rax + 16], rdi
- - - - 1.00 - 0.51 0.49 mov qword ptr [rax + 24], rdx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,11 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_split_via_runtime_check_dynamic_padding(
split: Split<&format::CocoPacket>,
) -> Option<(&format::CocoPacket, &[[u8; 3]])> {
split.via_runtime_check().ok()
}

View File

@@ -0,0 +1,22 @@
bench_split_via_runtime_check_dynamic_padding:
mov rax, rdi
mov rdx, qword ptr [rsi + 16]
mov ecx, edx
and ecx, 3
cmp ecx, 1
jne .LBB5_1
mov rcx, qword ptr [rsi]
mov rsi, qword ptr [rsi + 8]
lea rdi, [rdx + 2*rdx]
add rdi, rcx
add rdi, 9
sub rsi, rdx
mov qword ptr [rax + 8], rdx
mov qword ptr [rax + 16], rdi
mov qword ptr [rax + 24], rsi
mov qword ptr [rax], rcx
ret
.LBB5_1:
xor ecx, ecx
mov qword ptr [rax], rcx
ret

View File

@@ -0,0 +1,79 @@
Iterations: 100
Instructions: 2000
Total Cycles: 708
Total uOps: 2000
Dispatch Width: 4
uOps Per Cycle: 2.82
IPC: 2.82
Block RThroughput: 5.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 5 0.50 * mov rdx, qword ptr [rsi + 16]
1 1 0.33 mov ecx, edx
1 1 0.33 and ecx, 3
1 1 0.33 cmp ecx, 1
1 1 1.00 jne .LBB5_1
1 5 0.50 * mov rcx, qword ptr [rsi]
1 5 0.50 * mov rsi, qword ptr [rsi + 8]
1 1 0.50 lea rdi, [rdx + 2*rdx]
1 1 0.33 add rdi, rcx
1 1 0.33 add rdi, 9
1 1 0.33 sub rsi, rdx
1 1 1.00 * mov qword ptr [rax + 8], rdx
1 1 1.00 * mov qword ptr [rax + 16], rdi
1 1 1.00 * mov qword ptr [rax + 24], rsi
1 1 1.00 * mov qword ptr [rax], rcx
1 1 1.00 U ret
1 0 0.25 xor ecx, ecx
1 1 1.00 * mov qword ptr [rax], rcx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 3.00 3.02 5.00 4.98 4.00 4.00
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - 0.99 - 0.01 - - mov rax, rdi
- - - - - - - 1.00 mov rdx, qword ptr [rsi + 16]
- - 0.99 0.01 - - - - mov ecx, edx
- - 0.99 0.01 - - - - and ecx, 3
- - 0.97 0.03 - - - - cmp ecx, 1
- - - - - 1.00 - - jne .LBB5_1
- - - - - - 1.00 - mov rcx, qword ptr [rsi]
- - - - - - 0.99 0.01 mov rsi, qword ptr [rsi + 8]
- - 0.01 0.99 - - - - lea rdi, [rdx + 2*rdx]
- - - 0.96 - 0.04 - - add rdi, rcx
- - 0.03 - - 0.97 - - add rdi, 9
- - 0.01 0.03 - 0.96 - - sub rsi, rdx
- - - - 1.00 - 1.00 - mov qword ptr [rax + 8], rdx
- - - - 1.00 - - 1.00 mov qword ptr [rax + 16], rdi
- - - - 1.00 - 0.01 0.99 mov qword ptr [rax + 24], rsi
- - - - 1.00 - 0.99 0.01 mov qword ptr [rax], rcx
- - - - - 1.00 - - ret
- - - - - - - - xor ecx, ecx
- - - - 1.00 - 0.01 0.99 mov qword ptr [rax], rcx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,11 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_split_via_runtime_check_dynamic_size(
split: Split<&format::CocoPacket>,
) -> Option<(&format::CocoPacket, &[[u8; 2]])> {
split.via_runtime_check().ok()
}

View File

@@ -0,0 +1,13 @@
bench_split_via_runtime_check_dynamic_size:
mov rax, rdi
mov rcx, qword ptr [rsi]
mov rdx, qword ptr [rsi + 8]
mov rsi, qword ptr [rsi + 16]
lea rdi, [rcx + 2*rsi]
add rdi, 4
sub rdx, rsi
mov qword ptr [rax], rcx
mov qword ptr [rax + 8], rsi
mov qword ptr [rax + 16], rdi
mov qword ptr [rax + 24], rdx
ret

View File

@@ -0,0 +1,63 @@
Iterations: 100
Instructions: 1200
Total Cycles: 509
Total uOps: 1200
Dispatch Width: 4
uOps Per Cycle: 2.36
IPC: 2.36
Block RThroughput: 4.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 5 0.50 * mov rcx, qword ptr [rsi]
1 5 0.50 * mov rdx, qword ptr [rsi + 8]
1 5 0.50 * mov rsi, qword ptr [rsi + 16]
1 1 0.50 lea rdi, [rcx + 2*rsi]
1 1 0.33 add rdi, 4
1 1 0.33 sub rdx, rsi
1 1 1.00 * mov qword ptr [rax], rcx
1 1 1.00 * mov qword ptr [rax + 8], rsi
1 1 1.00 * mov qword ptr [rax + 16], rdi
1 1 1.00 * mov qword ptr [rax + 24], rdx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 1.66 1.66 4.00 1.68 3.50 3.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.34 0.33 - 0.33 - - mov rax, rdi
- - - - - - 0.49 0.51 mov rcx, qword ptr [rsi]
- - - - - - 0.51 0.49 mov rdx, qword ptr [rsi + 8]
- - - - - - 0.01 0.99 mov rsi, qword ptr [rsi + 16]
- - 0.33 0.67 - - - - lea rdi, [rcx + 2*rsi]
- - 0.63 0.34 - 0.03 - - add rdi, 4
- - 0.36 0.32 - 0.32 - - sub rdx, rsi
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax], rcx
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax + 8], rsi
- - - - 1.00 - 0.98 0.02 mov qword ptr [rax + 16], rdi
- - - - 1.00 - 0.51 0.49 mov qword ptr [rax + 24], rdx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,11 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
unsafe fn bench_split_via_unchecked_dynamic_padding(
split: Split<&format::CocoPacket>,
) -> (&format::CocoPacket, &[[u8; 3]]) {
unsafe { split.via_unchecked() }
}

View File

@@ -0,0 +1,14 @@
bench_split_via_unchecked_dynamic_padding:
mov rax, rdi
mov rcx, qword ptr [rsi]
mov rdx, qword ptr [rsi + 8]
mov rsi, qword ptr [rsi + 16]
lea rdi, [rsi + 2*rsi]
add rdi, rcx
add rdi, 9
sub rdx, rsi
mov qword ptr [rax], rcx
mov qword ptr [rax + 8], rsi
mov qword ptr [rax + 16], rdi
mov qword ptr [rax + 24], rdx
ret

View File

@@ -0,0 +1,65 @@
Iterations: 100
Instructions: 1300
Total Cycles: 510
Total uOps: 1300
Dispatch Width: 4
uOps Per Cycle: 2.55
IPC: 2.55
Block RThroughput: 4.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 5 0.50 * mov rcx, qword ptr [rsi]
1 5 0.50 * mov rdx, qword ptr [rsi + 8]
1 5 0.50 * mov rsi, qword ptr [rsi + 16]
1 1 0.50 lea rdi, [rsi + 2*rsi]
1 1 0.33 add rdi, rcx
1 1 0.33 add rdi, 9
1 1 0.33 sub rdx, rsi
1 1 1.00 * mov qword ptr [rax], rcx
1 1 1.00 * mov qword ptr [rax + 8], rsi
1 1 1.00 * mov qword ptr [rax + 16], rdi
1 1 1.00 * mov qword ptr [rax + 24], rdx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 2.00 2.00 4.00 2.00 3.50 3.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.03 0.93 - 0.04 - - mov rax, rdi
- - - - - - 0.49 0.51 mov rcx, qword ptr [rsi]
- - - - - - 1.00 - mov rdx, qword ptr [rsi + 8]
- - - - - - 0.01 0.99 mov rsi, qword ptr [rsi + 16]
- - 0.93 0.07 - - - - lea rdi, [rsi + 2*rsi]
- - 0.05 0.02 - 0.93 - - add rdi, rcx
- - 0.49 0.49 - 0.02 - - add rdi, 9
- - 0.50 0.49 - 0.01 - - sub rdx, rsi
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax], rcx
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax + 8], rsi
- - - - 1.00 - 0.49 0.51 mov qword ptr [rax + 16], rdi
- - - - 1.00 - 0.51 0.49 mov qword ptr [rax + 24], rdx
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,11 @@
use zerocopy::*;
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
unsafe fn bench_split_via_unchecked_dynamic_size(
split: Split<&format::CocoPacket>,
) -> (&format::CocoPacket, &[[u8; 2]]) {
unsafe { split.via_unchecked() }
}

View File

@@ -0,0 +1,13 @@
bench_split_via_unchecked_dynamic_size:
mov rax, rdi
mov rcx, qword ptr [rsi]
mov rdx, qword ptr [rsi + 8]
mov rsi, qword ptr [rsi + 16]
lea rdi, [rcx + 2*rsi]
add rdi, 4
sub rdx, rsi
mov qword ptr [rax], rcx
mov qword ptr [rax + 8], rsi
mov qword ptr [rax + 16], rdi
mov qword ptr [rax + 24], rdx
ret

View File

@@ -0,0 +1,63 @@
Iterations: 100
Instructions: 1200
Total Cycles: 509
Total uOps: 1200
Dispatch Width: 4
uOps Per Cycle: 2.36
IPC: 2.36
Block RThroughput: 4.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 5 0.50 * mov rcx, qword ptr [rsi]
1 5 0.50 * mov rdx, qword ptr [rsi + 8]
1 5 0.50 * mov rsi, qword ptr [rsi + 16]
1 1 0.50 lea rdi, [rcx + 2*rsi]
1 1 0.33 add rdi, 4
1 1 0.33 sub rdx, rsi
1 1 1.00 * mov qword ptr [rax], rcx
1 1 1.00 * mov qword ptr [rax + 8], rsi
1 1 1.00 * mov qword ptr [rax + 16], rdi
1 1 1.00 * mov qword ptr [rax + 24], rdx
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 1.66 1.66 4.00 1.68 3.50 3.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.34 0.33 - 0.33 - - mov rax, rdi
- - - - - - 0.49 0.51 mov rcx, qword ptr [rsi]
- - - - - - 0.51 0.49 mov rdx, qword ptr [rsi + 8]
- - - - - - 0.01 0.99 mov rsi, qword ptr [rsi + 16]
- - 0.33 0.67 - - - - lea rdi, [rcx + 2*rsi]
- - 0.63 0.34 - 0.03 - - add rdi, 4
- - 0.36 0.32 - 0.32 - - sub rdx, rsi
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax], rcx
- - - - 1.00 - 0.50 0.50 mov qword ptr [rax + 8], rsi
- - - - 1.00 - 0.98 0.02 mov qword ptr [rax + 16], rdi
- - - - 1.00 - 0.51 0.49 mov qword ptr [rax + 24], rdx
- - - - - 1.00 - - ret

16
vendor/zerocopy/benches/transmute.rs vendored Normal file
View File

@@ -0,0 +1,16 @@
use zerocopy::Unalign;
use zerocopy_derive::*;
#[path = "formats/coco_static_size.rs"]
mod format;
#[derive(IntoBytes, KnownLayout, Immutable)]
#[repr(C)]
struct MinimalViableSource {
bytes: [u8; 6],
}
#[unsafe(no_mangle)]
fn bench_transmute(source: MinimalViableSource) -> Unalign<format::LocoPacket> {
zerocopy::transmute!(source)
}

View File

@@ -0,0 +1,3 @@
bench_transmute:
mov rax, rdi
ret

View File

@@ -0,0 +1,43 @@
Iterations: 100
Instructions: 200
Total Cycles: 104
Total uOps: 200
Dispatch Width: 4
uOps Per Cycle: 1.92
IPC: 1.92
Block RThroughput: 1.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 0.49 0.50 - 1.01 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.49 0.50 - 0.01 - - mov rax, rdi
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,16 @@
use zerocopy_derive::*;
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[derive(IntoBytes, KnownLayout, Immutable)]
#[repr(C, align(2))]
struct MinimalViableSource {
header: [u8; 6],
trailer: [[u8; 2]],
}
#[unsafe(no_mangle)]
fn bench_transmute_ref_dynamic_size(source: &MinimalViableSource) -> &format::LocoPacket {
zerocopy::transmute_ref!(source)
}

View File

@@ -0,0 +1,4 @@
bench_transmute_ref_dynamic_size:
mov rax, rdi
lea rdx, [rsi + 1]
ret

View File

@@ -0,0 +1,45 @@
Iterations: 100
Instructions: 300
Total Cycles: 104
Total uOps: 300
Dispatch Width: 4
uOps Per Cycle: 2.88
IPC: 2.88
Block RThroughput: 1.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.33 mov rax, rdi
1 1 0.50 lea rdx, [rsi + 1]
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 0.99 1.00 - 1.01 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.99 - - 0.01 - - mov rax, rdi
- - - 1.00 - - - - lea rdx, [rsi + 1]
- - - - - 1.00 - - ret

View File

@@ -0,0 +1,15 @@
use zerocopy_derive::*;
#[path = "formats/coco_static_size.rs"]
mod format;
#[derive(IntoBytes, KnownLayout, Immutable)]
#[repr(C, align(2))]
struct MinimalViableSource {
bytes: [u8; 6],
}
#[unsafe(no_mangle)]
fn bench_transmute_ref_static_size(source: &MinimalViableSource) -> &format::LocoPacket {
zerocopy::transmute_ref!(source)
}

Some files were not shown because too many files have changed in this diff Show More