Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed 5_Matrix_scalar_multiplication.o
Binary file not shown.
19 changes: 10 additions & 9 deletions examples/KernelBench/level1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
pipeline: matmul

- kernel: level1/7_Matmul_with_small_K_dimension_.py
input_shapes: [4096x64, 64x4096]
input_shapes: [1024x64, 64x1024]
initializations: [rnd, rnd]
output_shape: 4096x4096
gflops: (4096 * 64 * 4096 * 2) / 1e9
output_shape: 1024x1024
gflops: (1024 * 64 * 1024 * 2) / 1e9
pipeline: matmul

- kernel: level1/8_Matmul_with_irregular_shapes_.py
Expand Down Expand Up @@ -123,16 +123,17 @@
pipeline: matmul

- kernel: level1/19_ReLU.py
input_shapes: [4096x8192]
input_shapes: [1024x1024]
initializations: [rnd]
output_shape: 4096x8192
gflops: (4096 * 8192) / 1e9
output_shape: 1024x1024
gflops: (1024 * 1024) / 1e9

- kernel: level1/20_LeakyReLU.py
input_shapes: [4096x8192]
input_shapes: [1024x1024]
initializations: [rnd]
output_shape: 4096x8192
gflops: (4096 * 8192) / 1e9
output_shape: 1024x1024
gflops: (1024 * 1024) / 1e9
pipeline: element_wise

- kernel: level1/21_Sigmoid.py
input_shapes: [4096x8192]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# This is an optimizing pipeline for kernel_bench element-wise kernels.
# Assumption: M, N, K % 32 = 0
Pipeline:
# Tries to combine as much as possible into one big generic
- pass: "linalg-fuse-elementwise-ops"
# Register tiling and unroll to fill the pipeline
- schedule: "tiling.py[gen=tile_ops]{target_op=linalg.generic
tile_sizes=[1,$register_tile]
Expand Down
1 change: 1 addition & 0 deletions examples/KernelBench/schedules/x86_64/pack_and_tile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# This is a good default to most CPU based pipelines.
Pipeline:
## Packing & Cache tiling (CPU generic)
- pass: "linalg-fuse-elementwise-ops"
- schedule: "packing.py[gen=block_pack_matmuls]{block_factors=[32,32,32] rhs_transpose_outer_block=True rhs_transpose_inner_block=False}"
- schedule: "x86/pack_lowering.py[gen=lower_packs_unpacks]{tile_size=32}"
- pass: "linalg-morph-ops{named-to-category generic-to-category}"
Expand Down
Loading