diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 41830d57bd38..4a033e5b3b3b 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -1008,6 +1008,22 @@ (rd WritableReg) (mem MemArg)) + ;; Load address referenced by `mem` into `rd`. + (LoadIndexedAddr + (rd WritableReg) + (base Reg) + (index Reg) + (offset i16) + (size u8)) + + ;; Load address referenced by `mem` into `rd`. + (LoadLogicalIndexedAddr + (rd WritableReg) + (base Reg) + (index Reg) + (offset u16) + (size u8)) + ;; Meta-instruction to emit a loop around a sequence of instructions. ;; This control flow is not visible to the compiler core, in particular ;; the register allocator. Therefore, instructions in the loop may not @@ -1741,6 +1757,9 @@ (decl u32_from_value (u32) Value) (extern extractor u32_from_value u32_from_value) +(decl u16_from_value (u16) Value) +(extern extractor u16_from_value u16_from_value) + (decl u8_from_value (u8) Value) (extern extractor u8_from_value u8_from_value) @@ -1860,6 +1879,9 @@ (decl pure partial memarg_imm_from_offset_plus_bias (Offset32 u8) SImm20) (extern constructor memarg_imm_from_offset_plus_bias memarg_imm_from_offset_plus_bias) +(decl pure partial memarg_imm_from_shifted_offset (Offset32 u8) SImm20) +(extern constructor memarg_imm_from_shifted_offset memarg_imm_from_shifted_offset) + ;; Accessors for `MemFlags`. (decl littleendian () MemFlags) @@ -1927,6 +1949,21 @@ (if-let final_offset (memarg_symbol_offset_sum offset sym_offset)) (memarg_symbol name final_offset flags)) +(rule 2 (lower_address flags (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd (ishl (uextend (iadd x (u16_from_value z))) (u8_from_value shift)) y)) (offset32 0)) + (memarg_reg_plus_off (load_logical_indexed_addr x y z shift) 0 0 flags)) + +(rule 3 (lower_address flags (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd y (ishl (uextend (iadd x (u16_from_value z))) (u8_from_value shift)))) (offset32 0)) + (memarg_reg_plus_off (load_logical_indexed_addr y x z shift) 0 0 flags)) + +(rule 4 (lower_address flags (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd (ishl (sextend (iadd x (i16_from_value z))) (u8_from_value shift)) y)) (offset32 0)) + (memarg_reg_plus_off (load_indexed_addr x y z shift) 0 0 flags)) + +(rule 5 (lower_address flags (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd y (ishl (sextend (iadd x (i16_from_value z))) (u8_from_value shift)))) (offset32 0)) + (memarg_reg_plus_off (load_indexed_addr y x z shift) 0 0 flags)) ;; Lower an address plus a small bias into a `MemArg`. @@ -2817,6 +2854,20 @@ (_ Unit (emit (MInst.LoadAddr dst mem)))) dst)) +;; Helper for emitting `MInst.LoadIndexedAddr` instructions. +(decl load_indexed_addr (Reg Reg i16 u8) Reg) +(rule (load_indexed_addr base index offset size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.LoadIndexedAddr dst base index offset size)))) + dst)) + +;; Helper for emitting `MInst.LoadLogicalIndexedAddr` instructions. +(decl load_logical_indexed_addr (Reg Reg u16 u8) Reg) +(rule (load_logical_indexed_addr base index offset size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.LoadLogicalIndexedAddr dst base index offset size)))) + dst)) + ;; Helper for emitting `MInst.Call` instructions. (decl call_impl (WritableReg BoxCallInfo) SideEffectNoResult) (rule (call_impl reg info) diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 2222faeff1b5..0f542c681c23 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -2321,6 +2321,30 @@ impl Inst { rd, &mem, opcode_rx, opcode_rxy, opcode_ril, false, sink, emit_info, state, ); } + &Inst::LoadIndexedAddr { + rd, + base, + index, + offset, + size, + } => { + let opcode: u16 = 0xe360 | (size as u16 & 0xf) << 1; + let offset = offset as i32 as u32; + put(sink, &enc_rxy(opcode, rd.to_reg(), base, index, offset)); + } + &Inst::LoadLogicalIndexedAddr { + rd, + base, + index, + offset, + size, + } => { + let opcode: u16 = 0xe361 | (size as u16 & 0xf) << 1; + put( + sink, + &enc_rxy(opcode, rd.to_reg(), base, index, offset.into()), + ); + } &Inst::Mov64 { rd, rm } => { let opcode = 0xb904; // LGR diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 421cfde58fef..6363bacb76ca 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -1,7 +1,7 @@ //! This module defines s390x-specific machine instruction types. use crate::binemit::{Addend, CodeOffset, Reloc}; -use crate::ir::{ExternalName, Type, types}; +use crate::ir::{ExternalName, MemFlags, Type, types}; use crate::isa::s390x::abi::S390xMachineDeps; use crate::isa::{CallConv, FunctionAlignment}; use crate::machinst::*; @@ -240,6 +240,10 @@ impl Inst { | Inst::Unwind { .. } | Inst::ElfTlsGetOffset { .. } => InstructionSet::Base, + Inst::LoadIndexedAddr { .. } | Inst::LoadLogicalIndexedAddr { .. } => { + InstructionSet::MIE4 + } + // These depend on the opcode Inst::AluRRR { alu_op, .. } => match alu_op { ALUOp::NotAnd32 | ALUOp::NotAnd64 => InstructionSet::MIE3, @@ -1030,6 +1034,20 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor { + collector.reg_def(rd); + collector.reg_use(base); + collector.reg_use(index); + } + Inst::LoadLogicalIndexedAddr { + rd, base, index, .. + } => { + collector.reg_def(rd); + collector.reg_use(base); + collector.reg_use(index); + } Inst::StackProbeLoop { probe_count, .. } => { collector.reg_early_def(probe_count); } @@ -3507,6 +3525,58 @@ impl Inst { format!("{mem_str}{op} {rd}, {mem}") } + &Inst::LoadIndexedAddr { + rd, + base, + index, + offset, + size, + } => { + let rd = pretty_print_reg(rd.to_reg()); + let op = match size { + 1 => "lxah", + 2 => "lxaf", + 3 => "lxag", + 4 => "lxaq", + _ => unreachable!(), + }; + let disp = SImm20::maybe_from_i64(offset as i64).unwrap(); + let flags = MemFlags::trusted(); + let mem = MemArg::BXD20 { + base, + index, + disp, + flags, + }; + let mem = mem.pretty_print_default(); + format!("{op} {rd}, {mem}") + } + &Inst::LoadLogicalIndexedAddr { + rd, + base, + index, + offset, + size, + } => { + let rd = pretty_print_reg(rd.to_reg()); + let op = match size { + 1 => "llxah", + 2 => "llxaf", + 3 => "llxag", + 4 => "llxaq", + _ => unreachable!(), + }; + let disp = SImm20::maybe_from_i64(offset as i64).unwrap(); + let flags = MemFlags::trusted(); + let mem = MemArg::BXD20 { + base, + index, + disp, + flags, + }; + let mem = mem.pretty_print_default(); + format!("{op} {rd}, {mem}") + } &Inst::StackProbeLoop { probe_count, guard_size, diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 670ca6b2ccc0..71bd02dae8ae 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -115,6 +115,21 @@ (rule 1 (lower (has_type (vr128_ty ty) (iadd x y))) (vec_add ty x y)) +(rule 16 (lower (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd (ishl (uextend (iadd x (u16_from_value z))) (u8_from_value shift)) y))) + (load_logical_indexed_addr x y z shift)) + +(rule 17 (lower (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd y (ishl (uextend (iadd x (u16_from_value z))) (u8_from_value shift))))) + (load_logical_indexed_addr y x z shift)) + +(rule 18 (lower (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd (ishl (sextend (iadd x (i16_from_value z))) (u8_from_value shift)) y))) + (load_indexed_addr x y z shift)) + +(rule 19 (lower (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd y (ishl (sextend (iadd x (i16_from_value z))) (u8_from_value shift))))) + (load_indexed_addr y x z shift)) ;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index ea3de8b413e1..2eef4635924a 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -478,6 +478,13 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { Some(imm) } + #[inline] + fn u16_from_value(&mut self, val: Value) -> Option { + let constant = self.u64_from_value(val)?; + let imm = u16::try_from(constant).ok()?; + Some(imm) + } + #[inline] fn u8_from_value(&mut self, val: Value) -> Option { let constant = self.u64_from_value(val)?; @@ -722,6 +729,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { SImm20::maybe_from_i64(i64::from(imm)) } + #[inline] + fn memarg_imm_from_shifted_offset(&mut self, imm: Offset32, shift: u8) -> Option { + if (1..=4).contains(&shift) && i64::from(imm) & ((1 << shift) - 1) == 0 { + SImm20::maybe_from_i64(i64::from(imm) >> shift) + } else { + None + } + } + #[inline] fn memarg_imm_from_offset_plus_bias(&mut self, imm: Offset32, bias: u8) -> Option { let final_offset = i64::from(imm) + bias as i64; diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic-arch15.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic-arch15.clif index e2f9046560e4..19e8270205b2 100644 --- a/cranelift/filetests/filetests/isa/s390x/arithmetic-arch15.clif +++ b/cranelift/filetests/filetests/isa/s390x/arithmetic-arch15.clif @@ -327,3 +327,48 @@ block0(v0: i128): ; vst %v4, 0(%r2) ; br %r14 +function %i64_i32_offset_mul_unsigned(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 8000 + v4 = iadd v1, v3 + v5 = uextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + return v7 +} + +; VCode: +; block0: +; llxaq %r2, 8000(%r3,%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xe3, 0x23 +; swr %f4, %f0 +; .byte 0x01, 0x68 +; br %r14 + +function %uload8_i64_i64_offset_mul_signed(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 8000 + v4 = iadd v1, v3 + v5 = sextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + return v7 +} + +; VCode: +; block0: +; lxaq %r2, 8000(%r3,%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xe3, 0x23 +; swr %f4, %f0 +; .byte 0x01, 0x69 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/load-arch15.clif b/cranelift/filetests/filetests/isa/s390x/load-arch15.clif new file mode 100644 index 000000000000..d674f8182220 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/load-arch15.clif @@ -0,0 +1,80 @@ +test compile precise-output +set enable_multi_ret_implicit_sret +target s390x arch15 + +function %uload8_i64_i32_offset_mul_unsigned(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 8000 + v4 = iadd v1, v3 + v5 = uextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + v8 = uload8.i64 v7 + return v8 +} + +; VCode: +; block0: +; llxaq %r3, 8000(%r3,%r2) +; llgc %r2, 0(%r3) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xe3, 0x33 +; swr %f4, %f0 +; .byte 0x01, 0x68 +; llgc %r2, 0(%r3) ; trap: heap_oob +; br %r14 + +function %uload8_i64_i64_offset_mul_signed(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 8000 + v4 = iadd v1, v3 + v5 = sextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + v8 = uload8.i64 v7 + return v8 +} + +; VCode: +; block0: +; lxaq %r3, 8000(%r3,%r2) +; llgc %r2, 0(%r3) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xe3, 0x33 +; swr %f4, %f0 +; .byte 0x01, 0x69 +; llgc %r2, 0(%r3) ; trap: heap_oob +; br %r14 + +function %uload8_i64_i64_offset_shifted0(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 0 + v3 = uextend.i64 v1 + v4 = ishl v3, v2 + v5 = iadd v0, v4 + v6 = uload8.i64 v5+1000 + return v6 +} + +; VCode: +; block0: +; llgfr %r5, %r3 +; sllg %r5, %r5, 0 +; llgc %r2, 1000(%r5,%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; llgfr %r5, %r3 +; sllg %r5, %r5, 0 +; llgc %r2, 0x3e8(%r5, %r2) ; trap: heap_oob +; br %r14 + diff --git a/cranelift/filetests/filetests/runtests/s390x-lxa.clif b/cranelift/filetests/filetests/runtests/s390x-lxa.clif new file mode 100644 index 000000000000..be5cf126e71c --- /dev/null +++ b/cranelift/filetests/filetests/runtests/s390x-lxa.clif @@ -0,0 +1,40 @@ +test interpret +test run +target pulley64 +target s390x arch15 + +function %i64_i32_offset_mul_unsigned(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 0x7fff + v4 = iadd v1, v3 + v5 = uextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + return v7 +} + +; run: %i64_i32_offset_mul_unsigned(0, 0) == 0x7fff0 +; run: %i64_i32_offset_mul_unsigned(0, -1) == 0x7ffe0 +; run: %i64_i32_offset_mul_unsigned(-1, -1) == 0x7ffdf +; run: %i64_i32_offset_mul_unsigned(0, 0x7fff_ffff) == 0x8_0007ffe0 +; run: %i64_i32_offset_mul_unsigned(0x7fffffff_ffffffff, 0x7fff_ffff) == 0x80000008_0007ffdf +; run: %i64_i32_offset_mul_unsigned(0x7fffffff_ffffffff, 0x8000_0000) == 0x80000008_0007ffef + +function %i64_i32_offset_mul_signed(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 0x7fff + v4 = iadd v1, v3 + v5 = sextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + return v7 +} + +; run: %i64_i32_offset_mul_signed(0, 0) == 0x7fff0 +; run: %i64_i32_offset_mul_signed(0, -1) == 0x7ffe0 +; run: %i64_i32_offset_mul_signed(-1, -1) == 0x7ffdf +; run: %i64_i32_offset_mul_signed(0, 0x7fff_ffff) == 0xfffffff8_0007ffe0 +; run: %i64_i32_offset_mul_signed(0x7fffffff_ffffffff, 0x7fff_ffff) == 0x7ffffff8_0007ffdf +; run: %i64_i32_offset_mul_signed(0x7fffffff_ffffffff, 0x8000_0000) == 0x7ffffff8_0007ffef