Skip to content

Commit 7b94e1d

Browse files
authored
feat: loop unrolling support (#2178)
1 parent b43f6d8 commit 7b94e1d

38 files changed

Lines changed: 2756 additions & 237 deletions

File tree

apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,59 @@ Otherwise, for example when using `tgpu.resolve` on a WGSL template, logs are ig
267267
- `console.log` only works in fragment and compute shaders.
268268
This is due to a [WebGPU limitation](https://www.w3.org/TR/WGSL/#address-space) that does not allow modifying buffers during the vertex shader stage.
269269
- `console.log` currently does not support template literals (but you can use [string substitutions](https://developer.mozilla.org/en-US/docs/Web/API/console#using_string_substitutions), or just pass multiple arguments instead).
270+
271+
## *for...of...* loops
272+
273+
TypeGPU supports `for...of...` loops in shader functions. The only constraints are that the loop variable must be declared with `const` and the iterable must be stored in a variable.
274+
275+
```ts twoslash
276+
import tgpu, { d } from 'typegpu';
277+
278+
const processNeighbor = (cell: d.v2i) => {};
279+
280+
// ---cut---
281+
const processNeighbors = (cell: d.v2i) => {
282+
'use gpu';
283+
284+
const offsets = [
285+
d.vec2i(0, 1),
286+
d.vec2i(0, -1),
287+
d.vec2i(1, 0),
288+
d.vec2i(-1, 0),
289+
];
290+
291+
for (const offset of offsets) {
292+
processNeighbor(cell.add(offset));
293+
}
294+
};
295+
```
296+
297+
## *tgpu.unroll*
298+
299+
For code with small, fixed iteration counts, you can use `tgpu.unroll` to unroll loops at compile time. This eliminates branch prediction overhead and can significantly improve performance.
300+
301+
### Usage
302+
303+
Wrap your iterable with `tgpu.unroll()`:
304+
305+
```ts twoslash
306+
import tgpu, { d } from 'typegpu';
307+
308+
const processNeighbor = (cell: d.v2i) => {};
309+
310+
// ---cut---
311+
const processNeighbors = (cell: d.v2i) => {
312+
'use gpu';
313+
314+
for (const dy of tgpu.unroll([-1, 0, 1])) {
315+
for (const dx of tgpu.unroll([-1, 0, 1])) {
316+
processNeighbor(cell.add(d.vec2i(dx, dy)));
317+
}
318+
}
319+
};
320+
```
321+
322+
:::note
323+
- There are no constraints on how large a loop can be for unrolling. We will always try to unroll it, and if we can't, you'll receive an error.
324+
- You cannot use `continue` or `break` inside loop that you intend to unroll later.
325+
:::

apps/typegpu-docs/src/examples/algorithms/jump-flood-distance/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,8 @@ const jumpFlood = root.createGuardedComputePipeline((x, y) => {
186186
let bestInsideDist = 1e20;
187187
let bestOutsideDist = 1e20;
188188

189-
for (let dy = -1; dy <= 1; dy++) {
190-
for (let dx = -1; dx <= 1; dx++) {
189+
for (const dx of tgpu.unroll([-1, 0, 1])) {
190+
for (const dy of tgpu.unroll([-1, 0, 1])) {
191191
const sample = sampleWithOffset(
192192
pingPongLayout.$.readView,
193193
d.vec2i(x, y),

apps/typegpu-docs/src/examples/algorithms/jump-flood-voronoi/index.ts

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,22 +165,23 @@ const jumpFlood = root.createGuardedComputePipeline((x, y) => {
165165
let minDist = 1e20;
166166
let bestSample = SampleResult({ color: d.vec4f(), coord: d.vec2f(-1) });
167167

168-
for (let dy = -1; dy <= 1; dy++) {
169-
for (let dx = -1; dx <= 1; dx++) {
168+
for (const dy of tgpu.unroll([-1, 0, 1])) {
169+
for (const dx of tgpu.unroll([-1, 0, 1])) {
170170
const sample = sampleWithOffset(
171171
pingPongLayout.$.readView,
172172
d.vec2i(x, y),
173173
d.vec2i(dx * offset, dy * offset),
174174
);
175175

176-
if (sample.coord.x < 0) {
177-
continue;
178-
}
179-
180-
const dist = std.distance(d.vec2f(x, y), sample.coord.mul(d.vec2f(size)));
181-
if (dist < minDist) {
182-
minDist = dist;
183-
bestSample = SampleResult(sample);
176+
if (sample.coord.x >= 0) {
177+
const dist = std.distance(
178+
d.vec2f(x, y),
179+
sample.coord.mul(d.vec2f(size)),
180+
);
181+
if (dist < minDist) {
182+
minDist = dist;
183+
bestSample = SampleResult(sample);
184+
}
184185
}
185186
}
186187
}

apps/typegpu-docs/src/examples/image-processing/background-segmentation/shaders.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ export const computeFn = tgpu.computeFn({
5656
).sub(d.vec2i(filterOffset, 0));
5757

5858
// Load a tile of pixels into shared memory
59-
for (let r = 0; r < 4; r++) {
60-
for (let c = 0; c < 4; c++) {
59+
for (const r of tgpu.unroll([0, 1, 2, 3])) {
60+
for (const c of tgpu.unroll([0, 1, 2, 3])) {
6161
let loadIndex = baseIndex.add(d.vec2i(c, r));
6262
if (flipAccess.$) {
6363
loadIndex = loadIndex.yx;
@@ -75,8 +75,8 @@ export const computeFn = tgpu.computeFn({
7575
std.workgroupBarrier();
7676

7777
// Apply the horizontal blur filter and write to the output texture
78-
for (let r = 0; r < 4; r++) {
79-
for (let c = 0; c < 4; c++) {
78+
for (const r of tgpu.unroll([0, 1, 2, 3])) {
79+
for (const c of tgpu.unroll([0, 1, 2, 3])) {
8080
let writeIndex = baseIndex.add(d.vec2i(c, r));
8181
if (flipAccess.$) {
8282
writeIndex = writeIndex.yx;

apps/typegpu-docs/src/examples/image-processing/blur/index.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ const computeFn = tgpu.computeFn({
7373
).sub(d.vec2i(filterOffset, 0));
7474

7575
// Load a tile of pixels into shared memory
76-
for (let r = 0; r < 4; r++) {
77-
for (let c = 0; c < 4; c++) {
76+
for (const r of tgpu.unroll([0, 1, 2, 3])) {
77+
for (const c of tgpu.unroll([0, 1, 2, 3])) {
7878
let loadIndex = baseIndex.add(d.vec2i(c, r));
7979
if (ioLayout.$.flip !== 0) {
8080
loadIndex = loadIndex.yx;
@@ -92,8 +92,8 @@ const computeFn = tgpu.computeFn({
9292
std.workgroupBarrier();
9393

9494
// Apply the horizontal blur filter and write to the output texture
95-
for (let r = 0; r < 4; r++) {
96-
for (let c = 0; c < 4; c++) {
95+
for (const r of tgpu.unroll([0, 1, 2, 3])) {
96+
for (const c of tgpu.unroll([0, 1, 2, 3])) {
9797
let writeIndex = baseIndex.add(d.vec2i(c, r));
9898
if (ioLayout.$.flip !== 0) {
9999
writeIndex = writeIndex.yx;

apps/typegpu-docs/src/examples/rendering/3d-fish/compute.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { d, std } from 'typegpu';
1+
import tgpu, { d, std } from 'typegpu';
22
import * as p from './params.ts';
33
import { computeBindGroupLayout as layout } from './schemas.ts';
44
import { projectPointOnLine } from './tgsl-helpers.ts';
@@ -39,7 +39,7 @@ export const simulate = (fishIndex: number) => {
3939
if (cohesionCount > 0) {
4040
cohesion = cohesion / cohesionCount - fishData.position;
4141
}
42-
for (let i = 0; i < 3; i += 1) {
42+
for (const i of tgpu.unroll([0, 1, 2])) {
4343
const repulsion = d.vec3f();
4444
repulsion[i] = 1;
4545

apps/typegpu-docs/src/examples/rendering/clouds/utils.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,13 @@ export const raymarch = tgpu.fn([d.vec3f, d.vec3f, d.vec3f], d.vec4f)(
7272
},
7373
);
7474

75+
const iterations = Array.from({ length: FBM_OCTAVES }, (_, i) => i);
7576
const fbm = tgpu.fn([d.vec3f], d.f32)((pos) => {
7677
let sum = d.f32();
7778
let amp = d.f32(CLOUD_AMPLITUDE);
7879
let freq = d.f32(CLOUD_FREQUENCY);
7980

80-
for (let i = 0; i < FBM_OCTAVES; i++) {
81+
for (const _i of tgpu.unroll(iterations)) {
8182
sum += noise3d(std.mul(pos, freq)) * amp;
8283
amp *= FBM_PERSISTENCE;
8384
freq *= FBM_LACUNARITY;

apps/typegpu-docs/src/examples/rendering/cubemap-reflection/icosphere.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ export class IcosphereGenerator {
171171
]);
172172

173173
const baseIndexNext = triangleIndex * 12;
174-
for (let i = d.u32(0); i < 12; i++) {
174+
for (const i of tgpu.unroll([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])) {
175175
const reprojectedVertex = newVertices[i];
176176

177177
const triBase = i - (i % 3);

apps/typegpu-docs/src/examples/rendering/jelly-slider/taa.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ export const taaResolveFn = tgpu.computeFn({
2525

2626
const dimensions = std.textureDimensions(taaResolveLayout.$.currentTexture);
2727

28-
for (let x = -1; x <= 1; x++) {
29-
for (let y = -1; y <= 1; y++) {
28+
for (const x of tgpu.unroll([-1, 0, 1])) {
29+
for (const y of tgpu.unroll([-1, 0, 1])) {
3030
const sampleCoord = d.vec2i(gid.xy).add(d.vec2i(x, y));
3131
const clampedCoord = std.clamp(
3232
sampleCoord,

apps/typegpu-docs/src/examples/rendering/jelly-switch/taa.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ export const taaResolveFn = tgpu.computeFn({
2525

2626
const dimensions = std.textureDimensions(taaResolveLayout.$.currentTexture);
2727

28-
for (let x = -1; x <= 1; x++) {
29-
for (let y = -1; y <= 1; y++) {
28+
for (const x of tgpu.unroll([-1, 0, 1])) {
29+
for (const y of tgpu.unroll([-1, 0, 1])) {
3030
const sampleCoord = d.vec2i(gid.xy).add(d.vec2i(x, y));
3131
const clampedCoord = std.clamp(
3232
sampleCoord,

0 commit comments

Comments
 (0)