Skip to content

Commit d9038ed

Browse files
authored
Cranelift/Wasmtime/Pulley/Debugging: use little-endian mode to spill/reload vectors in guest-debugging slot and ABI clobbers. (#12585)
* Cranelift/Wasmtime/Pulley/Debugging: use little-endian mode to spill/reload vectors in guest-debugging slot and ABI clobbers. When running Pulley on an s390x (or other big-endian) host, and enabling guest-debugging instrumentation, a very strange confluence of events occurs: - Pulley uses "native endian" of the host by default for loads and stores. - Patchable calls to debug hooks use the `preserve_all` ABI, which spills all registers in the trampoline adapter (callee in this ABI), including vector registers. - Saving vector-typed locals/operand stack values to the debugger state slot also uses vector stores. - All of these stores were thus big-endian on big-endian hosts. - Pulley's bytecode only supports little-endian vector loads/stores. We were thus hitting an assert in Pulley codegen (the Cranelift backend) when encountering a `VStore` VCode instruction with a big-endian mode. This PR makes two changes that avoid this issue: - The ABI code for Pulley is careful to specify little-endian mode explicitly for any vector load/store. - The debug instrumentation code is refactored to use little-endian explicitly for vector types *only*. - (Why not for all types? Because we GC-root GC ref values, and these need to be provided to the collector as mutable storage cells, so need to be in native endianness.) Test will come as part of #12575 incorporating a Pulley-with-guest-debugging test and running on s390x amongst our platforms. prtest:full * Review feedback. * Re-bless Cranelift tests (explicit `little` flags on `preserve_all` tests).
1 parent 83cf59f commit d9038ed

5 files changed

Lines changed: 197 additions & 137 deletions

File tree

cranelift/codegen/src/isa/pulley_shared/abi.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,9 +345,11 @@ where
345345
}
346346

347347
for (offset, ty, reg) in frame_layout.manually_managed_clobbers(&style) {
348-
insts.push(
349-
Inst::gen_store(Amode::SpOffset { offset }, reg, ty, MemFlags::trusted()).into(),
350-
);
348+
let mut flags = MemFlags::trusted();
349+
if ty.is_vector() {
350+
flags.set_endianness(ir::Endianness::Little);
351+
}
352+
insts.push(Inst::gen_store(Amode::SpOffset { offset }, reg, ty, flags).into());
351353
}
352354

353355
insts
@@ -366,12 +368,16 @@ where
366368

367369
// Restore clobbered registers that are manually managed in Cranelift.
368370
for (offset, ty, reg) in frame_layout.manually_managed_clobbers(&style) {
371+
let mut flags = MemFlags::trusted();
372+
if ty.is_vector() {
373+
flags.set_endianness(ir::Endianness::Little);
374+
}
369375
insts.push(
370376
Inst::gen_load(
371377
Writable::from_reg(reg),
372378
Amode::SpOffset { offset },
373379
ty,
374-
MemFlags::trusted(),
380+
flags,
375381
)
376382
.into(),
377383
);

cranelift/filetests/filetests/isa/pulley32/preserve-all.clif

Lines changed: 64 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -87,38 +87,38 @@ block0(v0: i64):
8787
; fstore64 sp+528, f29 // flags = notrap aligned
8888
; fstore64 sp+520, f30 // flags = notrap aligned
8989
; fstore64 sp+512, f31 // flags = notrap aligned
90-
; vstore128 sp+504, v0 // flags = notrap aligned
91-
; vstore128 sp+496, v1 // flags = notrap aligned
92-
; vstore128 sp+488, v2 // flags = notrap aligned
93-
; vstore128 sp+480, v3 // flags = notrap aligned
94-
; vstore128 sp+472, v4 // flags = notrap aligned
95-
; vstore128 sp+464, v5 // flags = notrap aligned
96-
; vstore128 sp+456, v6 // flags = notrap aligned
97-
; vstore128 sp+448, v7 // flags = notrap aligned
98-
; vstore128 sp+440, v8 // flags = notrap aligned
99-
; vstore128 sp+432, v9 // flags = notrap aligned
100-
; vstore128 sp+424, v10 // flags = notrap aligned
101-
; vstore128 sp+416, v11 // flags = notrap aligned
102-
; vstore128 sp+408, v12 // flags = notrap aligned
103-
; vstore128 sp+400, v13 // flags = notrap aligned
104-
; vstore128 sp+392, v14 // flags = notrap aligned
105-
; vstore128 sp+384, v15 // flags = notrap aligned
106-
; vstore128 sp+376, v16 // flags = notrap aligned
107-
; vstore128 sp+368, v17 // flags = notrap aligned
108-
; vstore128 sp+360, v18 // flags = notrap aligned
109-
; vstore128 sp+352, v19 // flags = notrap aligned
110-
; vstore128 sp+344, v20 // flags = notrap aligned
111-
; vstore128 sp+336, v21 // flags = notrap aligned
112-
; vstore128 sp+328, v22 // flags = notrap aligned
113-
; vstore128 sp+320, v23 // flags = notrap aligned
114-
; vstore128 sp+312, v24 // flags = notrap aligned
115-
; vstore128 sp+304, v25 // flags = notrap aligned
116-
; vstore128 sp+296, v26 // flags = notrap aligned
117-
; vstore128 sp+288, v27 // flags = notrap aligned
118-
; vstore128 sp+280, v28 // flags = notrap aligned
119-
; vstore128 sp+272, v29 // flags = notrap aligned
120-
; vstore128 sp+264, v30 // flags = notrap aligned
121-
; vstore128 sp+256, v31 // flags = notrap aligned
90+
; vstore128 sp+504, v0 // flags = notrap aligned little
91+
; vstore128 sp+496, v1 // flags = notrap aligned little
92+
; vstore128 sp+488, v2 // flags = notrap aligned little
93+
; vstore128 sp+480, v3 // flags = notrap aligned little
94+
; vstore128 sp+472, v4 // flags = notrap aligned little
95+
; vstore128 sp+464, v5 // flags = notrap aligned little
96+
; vstore128 sp+456, v6 // flags = notrap aligned little
97+
; vstore128 sp+448, v7 // flags = notrap aligned little
98+
; vstore128 sp+440, v8 // flags = notrap aligned little
99+
; vstore128 sp+432, v9 // flags = notrap aligned little
100+
; vstore128 sp+424, v10 // flags = notrap aligned little
101+
; vstore128 sp+416, v11 // flags = notrap aligned little
102+
; vstore128 sp+408, v12 // flags = notrap aligned little
103+
; vstore128 sp+400, v13 // flags = notrap aligned little
104+
; vstore128 sp+392, v14 // flags = notrap aligned little
105+
; vstore128 sp+384, v15 // flags = notrap aligned little
106+
; vstore128 sp+376, v16 // flags = notrap aligned little
107+
; vstore128 sp+368, v17 // flags = notrap aligned little
108+
; vstore128 sp+360, v18 // flags = notrap aligned little
109+
; vstore128 sp+352, v19 // flags = notrap aligned little
110+
; vstore128 sp+344, v20 // flags = notrap aligned little
111+
; vstore128 sp+336, v21 // flags = notrap aligned little
112+
; vstore128 sp+328, v22 // flags = notrap aligned little
113+
; vstore128 sp+320, v23 // flags = notrap aligned little
114+
; vstore128 sp+312, v24 // flags = notrap aligned little
115+
; vstore128 sp+304, v25 // flags = notrap aligned little
116+
; vstore128 sp+296, v26 // flags = notrap aligned little
117+
; vstore128 sp+288, v27 // flags = notrap aligned little
118+
; vstore128 sp+280, v28 // flags = notrap aligned little
119+
; vstore128 sp+272, v29 // flags = notrap aligned little
120+
; vstore128 sp+264, v30 // flags = notrap aligned little
121+
; vstore128 sp+256, v31 // flags = notrap aligned little
122122
; block0:
123123
; indirect_call_host CallInfo { dest: TestCase(%libcall), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 4294967295, 4294967295, 0] }, callee_conv: SystemV, caller_conv: PreserveAll, callee_pop_size: 0, try_call_info: None, patchable: false }
124124
; x0 = xload64 sp+888 // flags = notrap aligned
@@ -169,38 +169,38 @@ block0(v0: i64):
169169
; f29 = fload64 sp+528 // flags = notrap aligned
170170
; f30 = fload64 sp+520 // flags = notrap aligned
171171
; f31 = fload64 sp+512 // flags = notrap aligned
172-
; v0 = vload128 sp+504 // flags = notrap aligned
173-
; v1 = vload128 sp+496 // flags = notrap aligned
174-
; v2 = vload128 sp+488 // flags = notrap aligned
175-
; v3 = vload128 sp+480 // flags = notrap aligned
176-
; v4 = vload128 sp+472 // flags = notrap aligned
177-
; v5 = vload128 sp+464 // flags = notrap aligned
178-
; v6 = vload128 sp+456 // flags = notrap aligned
179-
; v7 = vload128 sp+448 // flags = notrap aligned
180-
; v8 = vload128 sp+440 // flags = notrap aligned
181-
; v9 = vload128 sp+432 // flags = notrap aligned
182-
; v10 = vload128 sp+424 // flags = notrap aligned
183-
; v11 = vload128 sp+416 // flags = notrap aligned
184-
; v12 = vload128 sp+408 // flags = notrap aligned
185-
; v13 = vload128 sp+400 // flags = notrap aligned
186-
; v14 = vload128 sp+392 // flags = notrap aligned
187-
; v15 = vload128 sp+384 // flags = notrap aligned
188-
; v16 = vload128 sp+376 // flags = notrap aligned
189-
; v17 = vload128 sp+368 // flags = notrap aligned
190-
; v18 = vload128 sp+360 // flags = notrap aligned
191-
; v19 = vload128 sp+352 // flags = notrap aligned
192-
; v20 = vload128 sp+344 // flags = notrap aligned
193-
; v21 = vload128 sp+336 // flags = notrap aligned
194-
; v22 = vload128 sp+328 // flags = notrap aligned
195-
; v23 = vload128 sp+320 // flags = notrap aligned
196-
; v24 = vload128 sp+312 // flags = notrap aligned
197-
; v25 = vload128 sp+304 // flags = notrap aligned
198-
; v26 = vload128 sp+296 // flags = notrap aligned
199-
; v27 = vload128 sp+288 // flags = notrap aligned
200-
; v28 = vload128 sp+280 // flags = notrap aligned
201-
; v29 = vload128 sp+272 // flags = notrap aligned
202-
; v30 = vload128 sp+264 // flags = notrap aligned
203-
; v31 = vload128 sp+256 // flags = notrap aligned
172+
; v0 = vload128 sp+504 // flags = notrap aligned little
173+
; v1 = vload128 sp+496 // flags = notrap aligned little
174+
; v2 = vload128 sp+488 // flags = notrap aligned little
175+
; v3 = vload128 sp+480 // flags = notrap aligned little
176+
; v4 = vload128 sp+472 // flags = notrap aligned little
177+
; v5 = vload128 sp+464 // flags = notrap aligned little
178+
; v6 = vload128 sp+456 // flags = notrap aligned little
179+
; v7 = vload128 sp+448 // flags = notrap aligned little
180+
; v8 = vload128 sp+440 // flags = notrap aligned little
181+
; v9 = vload128 sp+432 // flags = notrap aligned little
182+
; v10 = vload128 sp+424 // flags = notrap aligned little
183+
; v11 = vload128 sp+416 // flags = notrap aligned little
184+
; v12 = vload128 sp+408 // flags = notrap aligned little
185+
; v13 = vload128 sp+400 // flags = notrap aligned little
186+
; v14 = vload128 sp+392 // flags = notrap aligned little
187+
; v15 = vload128 sp+384 // flags = notrap aligned little
188+
; v16 = vload128 sp+376 // flags = notrap aligned little
189+
; v17 = vload128 sp+368 // flags = notrap aligned little
190+
; v18 = vload128 sp+360 // flags = notrap aligned little
191+
; v19 = vload128 sp+352 // flags = notrap aligned little
192+
; v20 = vload128 sp+344 // flags = notrap aligned little
193+
; v21 = vload128 sp+336 // flags = notrap aligned little
194+
; v22 = vload128 sp+328 // flags = notrap aligned little
195+
; v23 = vload128 sp+320 // flags = notrap aligned little
196+
; v24 = vload128 sp+312 // flags = notrap aligned little
197+
; v25 = vload128 sp+304 // flags = notrap aligned little
198+
; v26 = vload128 sp+296 // flags = notrap aligned little
199+
; v27 = vload128 sp+288 // flags = notrap aligned little
200+
; v28 = vload128 sp+280 // flags = notrap aligned little
201+
; v29 = vload128 sp+272 // flags = notrap aligned little
202+
; v30 = vload128 sp+264 // flags = notrap aligned little
203+
; v31 = vload128 sp+256 // flags = notrap aligned little
204204
; pop_frame_restore 896, {}
205205
; ret
206206
;

cranelift/filetests/filetests/isa/pulley64/preserve-all.clif

Lines changed: 64 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -87,38 +87,38 @@ block0(v0: i64):
8787
; fstore64 sp+528, f29 // flags = notrap aligned
8888
; fstore64 sp+520, f30 // flags = notrap aligned
8989
; fstore64 sp+512, f31 // flags = notrap aligned
90-
; vstore128 sp+504, v0 // flags = notrap aligned
91-
; vstore128 sp+496, v1 // flags = notrap aligned
92-
; vstore128 sp+488, v2 // flags = notrap aligned
93-
; vstore128 sp+480, v3 // flags = notrap aligned
94-
; vstore128 sp+472, v4 // flags = notrap aligned
95-
; vstore128 sp+464, v5 // flags = notrap aligned
96-
; vstore128 sp+456, v6 // flags = notrap aligned
97-
; vstore128 sp+448, v7 // flags = notrap aligned
98-
; vstore128 sp+440, v8 // flags = notrap aligned
99-
; vstore128 sp+432, v9 // flags = notrap aligned
100-
; vstore128 sp+424, v10 // flags = notrap aligned
101-
; vstore128 sp+416, v11 // flags = notrap aligned
102-
; vstore128 sp+408, v12 // flags = notrap aligned
103-
; vstore128 sp+400, v13 // flags = notrap aligned
104-
; vstore128 sp+392, v14 // flags = notrap aligned
105-
; vstore128 sp+384, v15 // flags = notrap aligned
106-
; vstore128 sp+376, v16 // flags = notrap aligned
107-
; vstore128 sp+368, v17 // flags = notrap aligned
108-
; vstore128 sp+360, v18 // flags = notrap aligned
109-
; vstore128 sp+352, v19 // flags = notrap aligned
110-
; vstore128 sp+344, v20 // flags = notrap aligned
111-
; vstore128 sp+336, v21 // flags = notrap aligned
112-
; vstore128 sp+328, v22 // flags = notrap aligned
113-
; vstore128 sp+320, v23 // flags = notrap aligned
114-
; vstore128 sp+312, v24 // flags = notrap aligned
115-
; vstore128 sp+304, v25 // flags = notrap aligned
116-
; vstore128 sp+296, v26 // flags = notrap aligned
117-
; vstore128 sp+288, v27 // flags = notrap aligned
118-
; vstore128 sp+280, v28 // flags = notrap aligned
119-
; vstore128 sp+272, v29 // flags = notrap aligned
120-
; vstore128 sp+264, v30 // flags = notrap aligned
121-
; vstore128 sp+256, v31 // flags = notrap aligned
90+
; vstore128 sp+504, v0 // flags = notrap aligned little
91+
; vstore128 sp+496, v1 // flags = notrap aligned little
92+
; vstore128 sp+488, v2 // flags = notrap aligned little
93+
; vstore128 sp+480, v3 // flags = notrap aligned little
94+
; vstore128 sp+472, v4 // flags = notrap aligned little
95+
; vstore128 sp+464, v5 // flags = notrap aligned little
96+
; vstore128 sp+456, v6 // flags = notrap aligned little
97+
; vstore128 sp+448, v7 // flags = notrap aligned little
98+
; vstore128 sp+440, v8 // flags = notrap aligned little
99+
; vstore128 sp+432, v9 // flags = notrap aligned little
100+
; vstore128 sp+424, v10 // flags = notrap aligned little
101+
; vstore128 sp+416, v11 // flags = notrap aligned little
102+
; vstore128 sp+408, v12 // flags = notrap aligned little
103+
; vstore128 sp+400, v13 // flags = notrap aligned little
104+
; vstore128 sp+392, v14 // flags = notrap aligned little
105+
; vstore128 sp+384, v15 // flags = notrap aligned little
106+
; vstore128 sp+376, v16 // flags = notrap aligned little
107+
; vstore128 sp+368, v17 // flags = notrap aligned little
108+
; vstore128 sp+360, v18 // flags = notrap aligned little
109+
; vstore128 sp+352, v19 // flags = notrap aligned little
110+
; vstore128 sp+344, v20 // flags = notrap aligned little
111+
; vstore128 sp+336, v21 // flags = notrap aligned little
112+
; vstore128 sp+328, v22 // flags = notrap aligned little
113+
; vstore128 sp+320, v23 // flags = notrap aligned little
114+
; vstore128 sp+312, v24 // flags = notrap aligned little
115+
; vstore128 sp+304, v25 // flags = notrap aligned little
116+
; vstore128 sp+296, v26 // flags = notrap aligned little
117+
; vstore128 sp+288, v27 // flags = notrap aligned little
118+
; vstore128 sp+280, v28 // flags = notrap aligned little
119+
; vstore128 sp+272, v29 // flags = notrap aligned little
120+
; vstore128 sp+264, v30 // flags = notrap aligned little
121+
; vstore128 sp+256, v31 // flags = notrap aligned little
122122
; block0:
123123
; indirect_call_host CallInfo { dest: TestCase(%libcall), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 4294967295, 4294967295, 0] }, callee_conv: SystemV, caller_conv: PreserveAll, callee_pop_size: 0, try_call_info: None, patchable: false }
124124
; x0 = xload64 sp+888 // flags = notrap aligned
@@ -169,38 +169,38 @@ block0(v0: i64):
169169
; f29 = fload64 sp+528 // flags = notrap aligned
170170
; f30 = fload64 sp+520 // flags = notrap aligned
171171
; f31 = fload64 sp+512 // flags = notrap aligned
172-
; v0 = vload128 sp+504 // flags = notrap aligned
173-
; v1 = vload128 sp+496 // flags = notrap aligned
174-
; v2 = vload128 sp+488 // flags = notrap aligned
175-
; v3 = vload128 sp+480 // flags = notrap aligned
176-
; v4 = vload128 sp+472 // flags = notrap aligned
177-
; v5 = vload128 sp+464 // flags = notrap aligned
178-
; v6 = vload128 sp+456 // flags = notrap aligned
179-
; v7 = vload128 sp+448 // flags = notrap aligned
180-
; v8 = vload128 sp+440 // flags = notrap aligned
181-
; v9 = vload128 sp+432 // flags = notrap aligned
182-
; v10 = vload128 sp+424 // flags = notrap aligned
183-
; v11 = vload128 sp+416 // flags = notrap aligned
184-
; v12 = vload128 sp+408 // flags = notrap aligned
185-
; v13 = vload128 sp+400 // flags = notrap aligned
186-
; v14 = vload128 sp+392 // flags = notrap aligned
187-
; v15 = vload128 sp+384 // flags = notrap aligned
188-
; v16 = vload128 sp+376 // flags = notrap aligned
189-
; v17 = vload128 sp+368 // flags = notrap aligned
190-
; v18 = vload128 sp+360 // flags = notrap aligned
191-
; v19 = vload128 sp+352 // flags = notrap aligned
192-
; v20 = vload128 sp+344 // flags = notrap aligned
193-
; v21 = vload128 sp+336 // flags = notrap aligned
194-
; v22 = vload128 sp+328 // flags = notrap aligned
195-
; v23 = vload128 sp+320 // flags = notrap aligned
196-
; v24 = vload128 sp+312 // flags = notrap aligned
197-
; v25 = vload128 sp+304 // flags = notrap aligned
198-
; v26 = vload128 sp+296 // flags = notrap aligned
199-
; v27 = vload128 sp+288 // flags = notrap aligned
200-
; v28 = vload128 sp+280 // flags = notrap aligned
201-
; v29 = vload128 sp+272 // flags = notrap aligned
202-
; v30 = vload128 sp+264 // flags = notrap aligned
203-
; v31 = vload128 sp+256 // flags = notrap aligned
172+
; v0 = vload128 sp+504 // flags = notrap aligned little
173+
; v1 = vload128 sp+496 // flags = notrap aligned little
174+
; v2 = vload128 sp+488 // flags = notrap aligned little
175+
; v3 = vload128 sp+480 // flags = notrap aligned little
176+
; v4 = vload128 sp+472 // flags = notrap aligned little
177+
; v5 = vload128 sp+464 // flags = notrap aligned little
178+
; v6 = vload128 sp+456 // flags = notrap aligned little
179+
; v7 = vload128 sp+448 // flags = notrap aligned little
180+
; v8 = vload128 sp+440 // flags = notrap aligned little
181+
; v9 = vload128 sp+432 // flags = notrap aligned little
182+
; v10 = vload128 sp+424 // flags = notrap aligned little
183+
; v11 = vload128 sp+416 // flags = notrap aligned little
184+
; v12 = vload128 sp+408 // flags = notrap aligned little
185+
; v13 = vload128 sp+400 // flags = notrap aligned little
186+
; v14 = vload128 sp+392 // flags = notrap aligned little
187+
; v15 = vload128 sp+384 // flags = notrap aligned little
188+
; v16 = vload128 sp+376 // flags = notrap aligned little
189+
; v17 = vload128 sp+368 // flags = notrap aligned little
190+
; v18 = vload128 sp+360 // flags = notrap aligned little
191+
; v19 = vload128 sp+352 // flags = notrap aligned little
192+
; v20 = vload128 sp+344 // flags = notrap aligned little
193+
; v21 = vload128 sp+336 // flags = notrap aligned little
194+
; v22 = vload128 sp+328 // flags = notrap aligned little
195+
; v23 = vload128 sp+320 // flags = notrap aligned little
196+
; v24 = vload128 sp+312 // flags = notrap aligned little
197+
; v25 = vload128 sp+304 // flags = notrap aligned little
198+
; v26 = vload128 sp+296 // flags = notrap aligned little
199+
; v27 = vload128 sp+288 // flags = notrap aligned little
200+
; v28 = vload128 sp+280 // flags = notrap aligned little
201+
; v29 = vload128 sp+272 // flags = notrap aligned little
202+
; v30 = vload128 sp+264 // flags = notrap aligned little
203+
; v31 = vload128 sp+256 // flags = notrap aligned little
204204
; pop_frame_restore 896, {}
205205
; ret
206206
;

0 commit comments

Comments
 (0)