Skip to content

Commit ad6d088

Browse files
authored
DRC: Use an additional stack for large arrays (#13192)
This commit adds an additional explicit stack for tracing and deallocating arrays with many elements. Instead of pushing all the elements onto the stack at once, we push the array onto the new stack, and push its elements onto the main stack in fixed-size chunks. It is probably easiest to review this code while ignoring whitespace changes, as some indentation shifted around a bit. Fixes #12958
1 parent 2cf5e4a commit ad6d088

1 file changed

Lines changed: 138 additions & 90 deletions

File tree

  • crates/wasmtime/src/runtime/vm/gc/enabled

crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs

Lines changed: 138 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444
//! Examination of Deferred Reference Counting and Cycle Detection* by Quinane:
4545
//! <https://openresearch-repository.anu.edu.au/bitstream/1885/42030/2/hon-thesis.pdf>
4646
47-
use super::VMArrayRef;
4847
use super::free_list::FreeList;
48+
use super::{VMArrayRef, VMGcObjectData};
4949
use crate::hash_map::HashMap;
5050
use crate::hash_set::HashSet;
5151
use crate::runtime::vm::{
@@ -60,7 +60,7 @@ use core::{
6060
alloc::Layout,
6161
any::Any,
6262
mem,
63-
ops::{Deref, DerefMut},
63+
ops::{Deref, DerefMut, Range},
6464
ptr::NonNull,
6565
};
6666
use wasmtime_environ::drc::{ARRAY_LENGTH_OFFSET, DrcTypeLayouts};
@@ -72,6 +72,8 @@ use wasmtime_environ::{
7272
#[expect(clippy::cast_possible_truncation, reason = "known to not overflow")]
7373
const GC_REF_ARRAY_ELEMS_OFFSET: u32 = ARRAY_LENGTH_OFFSET + (mem::size_of::<u32>() as u32);
7474

75+
const MAX_ARRAY_STACK_DEPTH: usize = 1024;
76+
7577
/// The deferred reference-counting (DRC) collector.
7678
///
7779
/// This reference-counting collector does not have a cycle collector, and so it
@@ -208,6 +210,14 @@ struct DrcHeap {
208210
/// help us catch unexpected re-entry, similar to how a `RefCell` would.
209211
dec_ref_stack: Option<Vec<VMGcRef>>,
210212

213+
/// An explicit stack for arrays that are too large to push all their
214+
/// elements onto `dec_ref_stack` at once. Each entry is an array GC
215+
/// reference and the range of element indices remaining to process.
216+
large_array_dec_ref_stack: Option<Vec<(VMGcRef, Range<u32>)>>,
217+
218+
/// A batched set of GC refs to deallocate all at once.
219+
to_dealloc: Option<Vec<VMGcRef>>,
220+
211221
/// Running total of bytes currently allocated (live objects) in this heap.
212222
allocated_bytes: usize,
213223
}
@@ -227,6 +237,8 @@ impl DrcHeap {
227237
vmmemory: None,
228238
free_list: None,
229239
dec_ref_stack: Some(Vec::with_capacity(1)),
240+
large_array_dec_ref_stack: Some(Vec::with_capacity(1)),
241+
to_dealloc: Some(Vec::with_capacity(1)),
230242
allocated_bytes: 0,
231243
})
232244
}
@@ -282,115 +294,143 @@ impl DrcHeap {
282294
) {
283295
let mut stack = self.dec_ref_stack.take().unwrap();
284296
debug_assert!(stack.is_empty());
297+
298+
let mut large_array_stack = self.large_array_dec_ref_stack.take().unwrap();
299+
debug_assert!(large_array_stack.is_empty());
300+
301+
let mut to_dealloc = self.to_dealloc.take().unwrap();
302+
debug_assert!(to_dealloc.is_empty());
303+
285304
stack.push(gc_ref.unchecked_copy());
286305

287-
while let Some(gc_ref) = stack.pop() {
288-
debug_assert!(!gc_ref.is_i31());
306+
while !stack.is_empty() || !large_array_stack.is_empty() {
307+
while let Some(gc_ref) = stack.pop() {
308+
debug_assert!(!gc_ref.is_i31());
289309

290-
// Read the DRC header once to get ref_count, type, and object_size.
291-
let drc_header = self.index_mut(drc_ref(&gc_ref));
292-
log::trace!(
293-
"decrement {:#p} ref count -> {}",
294-
gc_ref,
295-
drc_header.ref_count - 1
296-
);
297-
if !drc_header.dec_ref() {
298-
continue;
299-
}
300-
// Ref count reached zero.
301-
302-
// Extract type and size from the header we already read (avoiding
303-
// re-reading from heap).
304-
let ty = drc_header.header.ty();
305-
let object_size = drc_header.object_size;
306-
307-
// Trace: enqueue child GC refs for dec-ref'ing.
308-
if let Some(ty) = ty {
309-
match &self.trace_infos[&ty] {
310-
TraceInfo::Struct { gc_ref_offsets } => {
311-
stack.reserve(gc_ref_offsets.len());
312-
313-
let object_start =
314-
usize::try_from(gc_ref.as_heap_index().unwrap().get()).unwrap();
315-
let heap = self.heap_slice();
316-
for offset in gc_ref_offsets {
317-
let offset = usize::try_from(*offset).unwrap();
318-
319-
// Read gc ref fields directly from the heap slice,
320-
// avoiding the overhead of repeatedly indexing into
321-
// the heap via the object header.
322-
let field_start = object_start + offset;
323-
let field_end = field_start + mem::size_of::<VMGcRef>();
324-
debug_assert!(
325-
field_end <= object_start + usize::try_from(object_size).unwrap()
326-
);
327-
let raw = *heap[field_start..field_end].as_array().unwrap();
328-
let raw = u32::from_le_bytes(raw);
329-
330-
if let Some(child) = VMGcRef::from_raw_u32(raw)
331-
&& !child.is_i31()
332-
{
333-
debug_assert!({
334-
let header = self.header(&child);
335-
let kind = header.kind().as_u32();
336-
VMGcKind::try_from_u32(kind).is_some()
337-
});
338-
stack.push(child);
310+
// Read the DRC header once to get ref_count, type, and object_size.
311+
let drc_header = self.index_mut(drc_ref(&gc_ref));
312+
log::trace!(
313+
"decrement {:#p} ref count -> {}",
314+
gc_ref,
315+
drc_header.ref_count - 1
316+
);
317+
if !drc_header.dec_ref() {
318+
continue;
319+
}
320+
321+
// Extract type and size from the header we already read (avoiding
322+
// re-reading from heap).
323+
let ty = drc_header.header.ty();
324+
325+
// Trace: enqueue child GC refs for dec-ref'ing.
326+
if let Some(ty) = ty {
327+
match &self.trace_infos[&ty] {
328+
TraceInfo::Struct { gc_ref_offsets } => {
329+
stack.reserve(gc_ref_offsets.len());
330+
let data = self.gc_object_data(&gc_ref);
331+
for offset in gc_ref_offsets {
332+
Self::trace_offset(&mut stack, data, *offset);
339333
}
340334
}
341-
}
342-
TraceInfo::Array { gc_ref_elems } => {
343-
if *gc_ref_elems {
344-
let data = self.gc_object_data(&gc_ref);
335+
TraceInfo::Array { gc_ref_elems: true } => {
345336
let len = self.array_len(gc_ref.as_arrayref_unchecked());
346-
stack.reserve(usize::try_from(len).unwrap());
347-
for i in 0..len {
348-
let elem_offset = GC_REF_ARRAY_ELEMS_OFFSET
349-
+ i * u32::try_from(mem::size_of::<u32>()).unwrap();
350-
let raw = data.read_u32(elem_offset);
351-
if let Some(child) = VMGcRef::from_raw_u32(raw)
352-
&& !child.is_i31()
353-
{
354-
debug_assert!({
355-
let header = self.header(&child);
356-
let kind = header.kind().as_u32();
357-
VMGcKind::try_from_u32(kind).is_some()
358-
});
359-
stack.push(child);
337+
let len_usize = usize::try_from(len).unwrap();
338+
339+
if stack.len() + len_usize <= MAX_ARRAY_STACK_DEPTH {
340+
let data = self.gc_object_data(&gc_ref);
341+
stack.reserve(len_usize);
342+
for i in 0..len {
343+
Self::trace_array_elem(&mut stack, data, i);
344+
}
345+
} else {
346+
// Only push the first `n` elements onto the
347+
// stack; process the rest via the
348+
// `large_array_stack`.
349+
let n = MAX_ARRAY_STACK_DEPTH.saturating_sub(stack.len());
350+
let n = u32::try_from(n).unwrap();
351+
let data = self.gc_object_data(&gc_ref);
352+
for i in 0..n {
353+
Self::trace_array_elem(&mut stack, data, i);
360354
}
355+
large_array_stack.push((gc_ref.unchecked_copy(), n..len));
356+
357+
// Don't fallthrough and push onto `to_dealloc`
358+
// yet; only do that after we've processed all
359+
// elements. This ensures we don't push it
360+
// multiple times.
361+
continue;
361362
}
362363
}
364+
TraceInfo::Array {
365+
gc_ref_elems: false,
366+
} => {}
363367
}
368+
} else {
369+
// Handle `externref` host data. Only `externref`s have host
370+
// data, and `ty` is `None` only for `externref`s, so we skip
371+
// this for `struct` and `array` objects entirely.
372+
debug_assert!(drc_header.header.kind().matches(VMGcKind::ExternRef));
373+
let externref = gc_ref.as_typed::<VMDrcExternRef>(self).unwrap();
374+
let host_data_id = self.index(externref).host_data;
375+
host_data_table.dealloc(host_data_id);
364376
}
365-
} else {
366-
// Handle `externref` host data. Only `externref`s have host
367-
// data, and `ty` is `None` only for `externref`s, so we skip
368-
// this for `struct` and `array` objects entirely.
369-
debug_assert!(drc_header.header.kind().matches(VMGcKind::ExternRef));
370-
let externref = gc_ref.as_typed::<VMDrcExternRef>(self).unwrap();
371-
let host_data_id = self.index(externref).host_data;
372-
host_data_table.dealloc(host_data_id);
377+
378+
to_dealloc.push(gc_ref);
373379
}
374380

375-
// Deallocate using the object_size we already read.
376-
let alloc_size = FreeList::aligned_size(object_size).unwrap();
377-
let index = gc_ref.as_heap_index().unwrap();
381+
if let Some((gc_ref, mut elems)) = large_array_stack.pop() {
382+
// Add the next chunk of array elements onto the stack.
383+
let data = self.gc_object_data(&gc_ref);
384+
for i in elems.by_ref().take(MAX_ARRAY_STACK_DEPTH) {
385+
Self::trace_array_elem(&mut stack, data, i);
386+
}
378387

379-
if cfg!(gc_zeal) {
380-
let idx = usize::try_from(index.get()).unwrap();
381-
self.heap_slice_mut()[idx..][..usize::try_from(alloc_size).unwrap()].fill(POISON);
388+
// If we are done processing this array, then enqueue it for
389+
// deallocation. Otherwise, push it back onto the
390+
// `large_array_stack` for continued processing once the regular
391+
// stack is exhausted again.
392+
if elems.is_empty() {
393+
to_dealloc.push(gc_ref);
394+
} else {
395+
large_array_stack.push((gc_ref, elems));
396+
}
382397
}
398+
}
383399

384-
self.free_list
385-
.as_mut()
386-
.unwrap()
387-
.dealloc_fast(index, alloc_size);
388-
self.allocated_bytes -= usize::try_from(alloc_size).unwrap();
400+
// Deallocate the dead objects and return their memory blocks to the
401+
// free list.
402+
for gc_ref in to_dealloc.drain(..) {
403+
self.dealloc(gc_ref);
389404
}
390405

391406
debug_assert!(stack.is_empty());
392407
debug_assert!(self.dec_ref_stack.is_none());
393408
self.dec_ref_stack = Some(stack);
409+
410+
debug_assert!(large_array_stack.is_empty());
411+
debug_assert!(self.large_array_dec_ref_stack.is_none());
412+
self.large_array_dec_ref_stack = Some(large_array_stack);
413+
414+
debug_assert!(to_dealloc.is_empty());
415+
debug_assert!(self.to_dealloc.is_none());
416+
self.to_dealloc = Some(to_dealloc);
417+
}
418+
419+
#[inline]
420+
fn trace_array_elem(stack: &mut Vec<VMGcRef>, data: &VMGcObjectData, i: u32) {
421+
let elem_offset =
422+
GC_REF_ARRAY_ELEMS_OFFSET + i * u32::try_from(mem::size_of::<u32>()).unwrap();
423+
Self::trace_offset(stack, data, elem_offset)
424+
}
425+
426+
#[inline]
427+
fn trace_offset(stack: &mut Vec<VMGcRef>, data: &VMGcObjectData, offset: u32) {
428+
let raw = data.read_u32(offset);
429+
if let Some(gc_ref) = VMGcRef::from_raw_u32(raw)
430+
&& !gc_ref.is_i31()
431+
{
432+
stack.push(gc_ref);
433+
}
394434
}
395435

396436
/// Ensure that we have tracing information for the given type.
@@ -863,6 +903,8 @@ unsafe impl GcHeap for DrcHeap {
863903
over_approximated_stack_roots,
864904
free_list,
865905
dec_ref_stack,
906+
large_array_dec_ref_stack,
907+
to_dealloc,
866908
memory,
867909
vmmemory,
868910
allocated_bytes,
@@ -879,6 +921,12 @@ unsafe impl GcHeap for DrcHeap {
879921
*vmmemory = None;
880922
*allocated_bytes = 0;
881923
debug_assert!(dec_ref_stack.as_ref().is_some_and(|s| s.is_empty()));
924+
debug_assert!(
925+
large_array_dec_ref_stack
926+
.as_ref()
927+
.is_some_and(|s| s.is_empty())
928+
);
929+
debug_assert!(to_dealloc.as_ref().is_some_and(|d| d.is_empty()));
882930

883931
memory.take().unwrap()
884932
}

0 commit comments

Comments
 (0)