diff --git a/CMakeLists.txt b/CMakeLists.txt index b65c8a1e2..8ee92a604 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,10 +69,9 @@ add_definitions(-DFAASM_PROFILE_ON=0) # Custom LLVM build (also for profiling) set(FAASM_CUSTOM_LLVM 0) if (${FAASM_CUSTOM_LLVM}) - message(STATUS "Using custom LLVM") - - # Force using custom build of LLVM for codegen - set(LLVM_DIR $ENV{HOME}/faasm/llvm/build/lib/cmake/llvm) + # In accordance with bin/build_llvm_perf.sh and LLVM version for WAVM + set(LLVM_DIR /usr/local/code/llvm-perf/build/lib/cmake/llvm) + message(STATUS "Using custom LLVM at ${LLVM_DIR} for profiling") find_package(LLVM REQUIRED CONFIG NO_DEFAULT_PATH) set(WAVM_PERF_LIBS 1) diff --git a/ansible/musl_native.yml b/ansible/musl_native.yml new file mode 100644 index 000000000..2897a9238 --- /dev/null +++ b/ansible/musl_native.yml @@ -0,0 +1,6 @@ +--- + +- hosts: localhost + gather_facts: yes + roles: + - musl_native diff --git a/ansible/roles/musl_native/tasks/main.yml b/ansible/roles/musl_native/tasks/main.yml new file mode 100644 index 000000000..01087c52a --- /dev/null +++ b/ansible/roles/musl_native/tasks/main.yml @@ -0,0 +1,26 @@ +--- + +- name: "Clone musl" + git: + repo: 'git://git.musl-libc.org/musl' + dest: /usr/local/code/musl_native + version: v1.1.24 + +- name: "Build" + shell: "{{ item }}" + args: + chdir: "/usr/local/code/musl_native" + with_items: + - "CC=/usr/bin/clang ./configure --prefix=/usr/local/faasm/musl_native" + - "make -j 10" + +- name: "Install" + become: yes + shell: "{{ item }}" + args: + chdir: "/usr/local/code/musl_native" + with_items: + - "mkdir -p /usr/local/faasm/musl_native" + - "sudo make install" + + diff --git a/bin/build_llvm_perf.sh b/bin/build_llvm_perf.sh index fda8d0e9e..aa82317d4 100755 --- a/bin/build_llvm_perf.sh +++ b/bin/build_llvm_perf.sh @@ -1,12 +1,17 @@ #!/bin/bash set -e +set -x -FAASM_DIR=${HOME}/faasm -LLVM_DIR=${FAASM_DIR}/llvm +CODE_DIR=/usr/local/code/ +if [[ ! -d ${CODE_DIR} ]]; then + echo "CODE_DIR (${CODE_DIR}) does not exists" + exit 1 +fi + +LLVM_DIR=${CODE_DIR}/llvm-perf BUILD_DIR=${LLVM_DIR}/build -# Clone LLVM repo if not done already -mkdir -p ${FAASM_DIR} +# Clone LLVM-9 repo if not done already. Version must be compatible with WAVM if [[ ! -d "$LLVM_DIR" ]]; then git clone https://github.com/llvm/llvm-project ${LLVM_DIR} pushd ${LLVM_DIR} @@ -14,17 +19,20 @@ if [[ ! -d "$LLVM_DIR" ]]; then popd fi -# Clear build dir -# rm -rf ${BUILD_DIR} - -# Make the build dir if it exists +# Make the build dir mkdir -p ${BUILD_DIR} - pushd ${BUILD_DIR} +# Clear build dir +if [[ -f "${BUILD_DIR}/build.ninja" ]]; then + ninja clean +fi + # Enable JIT profiling features of LLVM cmake \ -G Ninja \ + -DCMAKE_C_COMPILER=/usr/bin/clang \ + -DCMAKE_CXX_COMPILER=/usr/bin/clang++ \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DLLVM_USE_PERF=1 \ -DLLVM_USE_INTEL_JITEVENTS=1 \ diff --git a/docker/omp-musl.dockerfile b/docker/omp-musl.dockerfile new file mode 100644 index 000000000..f0f6c3478 --- /dev/null +++ b/docker/omp-musl.dockerfile @@ -0,0 +1,13 @@ +FROM alpine + +RUN apk update && \ + apk add g++ libgomp make + +# Compile omp programs +COPY func/omp /build +WORKDIR /build +RUN make + +# clean up +RUN rm -rf /build +WORKDIR / diff --git a/docs/perf-diff/explaination.md b/docs/perf-diff/explaination.md new file mode 100644 index 000000000..9c6d6238d --- /dev/null +++ b/docs/perf-diff/explaination.md @@ -0,0 +1,142 @@ +# Perf shows that the loops are doing the same + +## Log function + +Experiment code [here](../../func/demo/log_difference.cpp). + +### Perf stat + +#### Native + +``` + Performance counter stats for 'speed_difference': + + 14530.607857 task-clock (msec) # 1.000 CPUs utilized + 7 context-switches # 0.000 K/sec + 0 cpu-migrations # 0.000 K/sec + 589 page-faults # 0.041 K/sec + 46,636,539,230 cycles # 3.210 GHz + 114,771,451,699 instructions # 2.46 insn per cycle + 13,208,253,517 branches # 908.995 M/sec + 196,535 branch-misses # 0.00% of all branches + + 14.531197506 seconds time elapsed +``` + +#### Wasm + +``` + Performance counter stats for 'simple_runner demo speed_difference': + + 4852.352536 task-clock (msec) # 1.000 CPUs utilized + 3 context-switches # 0.001 K/sec + 0 cpu-migrations # 0.000 K/sec + 5,611 page-faults # 0.001 M/sec + 14,531,719,357 cycles # 2.995 GHz + 35,368,260,394 instructions # 2.43 insn per cycle + 4,120,953,595 branches # 849.269 M/sec + 5,078,676 branch-misses # 0.12% of all branches + + 4.852843140 seconds time elapsed +``` + +### Perf report + +Symbol table: +``` +functionDef35 log +functionDef2 __original_main +``` + +The global time spent in each section is very similar: +![Time division](time-division.png) + +The loop bodies (pictures below) look quite similar -- unless the unknown/unknown:XX are something to worry about. The Wasm code +fiddles a lot with the MM registers before the loop. Manually compiling the native code with `clang++ -O3 -mavx2 -S` for comparison lead +to the same code as shown below. + +![main native](main-native.png) + +![main Wasm](main-wasm.png) + +I think this is not a good experiment for general-purpose speed comparison between Wasm and Native because it seems that +here the only difference is that MUSL's log is faster than GLibC's log. When looking at the `log` function ran in Wasm, +it is a lot smaller than the GLibC one, which is the only explanation I have here for the speed difference. + +This experiment is however useful for understanding our second experiment because there is a log function taking 25% of +the run time of the native Mersenne Twister engine experiment but there is no sign of `log` seems absent from the Wasm +which however leads to the same result. I can only think the log was somehow optimised away? + +### Confirmation + +Compiling similar code at `-O3` natively but changing libc implementations gives the following: +* glibc: 14.2 +* musl: 6.3 + +Confirming our hypothesis that the musl `log` function is better. + +## Mersenne Twister Engine + +Experiment code [here](../../func/demo/mt_difference.cpp). + +### Perf stats + +#### Native + + +``` + 9170.419967 task-clock (msec) # 1.000 CPUs utilized + 5 context-switches # 0.001 K/sec + 0 cpu-migrations # 0.000 K/sec + 588 page-faults # 0.064 K/sec + 27,822,779,162 cycles # 3.034 GHz + 17,508,333,503 instructions # 0.63 insn per cycle + 2,560,064,376 branches # 279.165 M/sec + 1,122,999 branch-misses # 0.04% of all branches + + 9.171005174 seconds time elapsed + +``` + +#### Wasm + +``` + + + 1317.060034 task-clock (msec) # 1.000 CPUs utilized + 1 context-switches # 0.001 K/sec + 0 cpu-migrations # 0.000 K/sec + 5,610 page-faults # 0.004 M/sec + 2,992,325,962 cycles # 2.272 GHz + 6,657,716,630 instructions # 2.22 insn per cycle + 218,836,121 branches # 166.155 M/sec + 5,013,145 branch-misses # 2.29% of all branches + + 1.317535341 seconds time elapsed +``` + + +### Perf Report + +On the left, Wasm on the right, native. The symbol table is as follows. The +fact we only have the `main` function in Wasm seem to indicate there is some +inlining going on. +``` +functionDef2 __original_main +``` + +![Perf report MT](mt-time.png) + +And indeed we can compare the loop bodies in the main function we the native simply +calls the MT engine. And the Wasm code seem to be that same code but inlined (omitted +because it was too long). + +The Native main loop uses `callq`, I'm not sure about perf's measures about the cost +of instructions (`callq` < `mov` < `add`), and it seems clear that it doesn't include +the time spent in `callq` because perf inverts the call graph. + +### Confirmation + +TODO confirm that: +* Wasm code generation is more prone to inlining which helped in this loopy context +* Wasm optimised the `logl` away, or else what happened to it? diff --git a/docs/perf-diff/main-native.png b/docs/perf-diff/main-native.png new file mode 100644 index 000000000..15135dfda Binary files /dev/null and b/docs/perf-diff/main-native.png differ diff --git a/docs/perf-diff/main-wasm.png b/docs/perf-diff/main-wasm.png new file mode 100644 index 000000000..dd9e30d14 Binary files /dev/null and b/docs/perf-diff/main-wasm.png differ diff --git a/docs/perf-diff/mt-time.png b/docs/perf-diff/mt-time.png new file mode 100644 index 000000000..e84ecbd97 Binary files /dev/null and b/docs/perf-diff/mt-time.png differ diff --git a/docs/perf-diff/time-division.png b/docs/perf-diff/time-division.png new file mode 100644 index 000000000..3ebaac899 Binary files /dev/null and b/docs/perf-diff/time-division.png differ diff --git a/func/demo/CMakeLists.txt b/func/demo/CMakeLists.txt index fa3aee853..429fe1c61 100644 --- a/func/demo/CMakeLists.txt +++ b/func/demo/CMakeLists.txt @@ -43,6 +43,7 @@ demo_func(increment increment.cpp) demo_func(isatty isatty.cpp) demo_func(listdir listdir.cpp) demo_func(lock lock.c) +demo_func(log_difference log_difference.cpp) demo_func(malloc malloc.cpp) demo_func(matrix matrix.cpp) demo_func(memcpy memcpy.cpp) @@ -50,6 +51,7 @@ demo_func(memmove memmove.cpp) demo_func(mmap mmap.cpp) demo_func(mmap_big mmap_big.cpp) demo_func(mmap_file mmap_file.cpp) +demo_func(mt_difference mt_difference.cpp) demo_func(new_obj new_obj.cpp) demo_func(noop noop.c) demo_func(optarg optarg.cpp) diff --git a/func/demo/log_difference.cpp b/func/demo/log_difference.cpp new file mode 100644 index 000000000..06b638054 --- /dev/null +++ b/func/demo/log_difference.cpp @@ -0,0 +1,16 @@ +#include +#include + +const int32_t EXPECTED = -2147483529; + +int main() { + int32_t result = 100; + for (int32_t i = -200000000; i < 200000000; i++) { + result ^= ((int32_t) log(abs(i))) & ((int32_t) log(abs(i))) ; + } + if (result != EXPECTED) { + printf("Custom reduction failed. Expected %d but got %d\n", EXPECTED, result); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/func/demo/mt_difference.cpp b/func/demo/mt_difference.cpp new file mode 100644 index 000000000..3b40b8367 --- /dev/null +++ b/func/demo/mt_difference.cpp @@ -0,0 +1,20 @@ +#include +#include +#include + +const int32_t EXPECTED = 49999327; + +int main() { + std::uniform_real_distribution unif(0, 1); + std::mt19937_64 generator(193734715); + int32_t result = 0; + + for (int32_t i = 0; i < 100000000; i++) { + result += unif(generator) < 0.5 ? 1 : 0; + } + if (result != EXPECTED) { + printf("Custom reduction failed. Expected %d but got %d\n", EXPECTED, result); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/func/omp/Makefile b/func/omp/Makefile new file mode 100644 index 000000000..183ecd1bd --- /dev/null +++ b/func/omp/Makefile @@ -0,0 +1,16 @@ +# Makefile for building native_musl targets in the omp-musl container + +CXX=g++ +CXX_FLAGS=-mavx2 -O3 +OMP_FLAGS=-fopenmp +OUT_DIR=/usr/bin + +.PHONY : all + +all : mt_pi docker_benchmarker + +mt_pi: mt_pi.cpp + $(CXX) $(CXX_FLAGS) $(OMP_FLAGS) $< -o $(OUT_DIR)/$@ + +docker_benchmarker: docker_benchmarker.cpp + $(CXX) $(CXX_FLAGS) $< -o $(OUT_DIR)/$@ diff --git a/func/omp/docker_benchmarker.cpp b/func/omp/docker_benchmarker.cpp new file mode 100644 index 000000000..b57e23fd5 --- /dev/null +++ b/func/omp/docker_benchmarker.cpp @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include + +constexpr auto BINARY_DIR = "/usr/bin/"; + +void +nativeRun(std::ofstream &profOut, const std::string &execPath, long num_iterations, const std::string &iteration_name, + int num_threads); + +int main(int argc, char *argv[]) { + + if (argc != 3) { + printf("Usage:\ndocker_benchmarker \n"); + return 1; + } + + std::string funcName = argv[1]; + int nativeIterations = std::stoi(argv[2]); + + std::string outfile = std::string("/wasm/omp/") + funcName + "/bench.csv"; + + std::ofstream profOut; + profOut.open(outfile); + profOut << "iterations,numThreads,type,microseconds" << std::endl; + + std::string execPath = std::string(BINARY_DIR) + funcName; + + std::vector iterations = {200000l, 20000000l, 200000000l}; + std::vector iter_names = {"Tiny,", "Small,", "Big,"}; + + for (int run = 1; run <= nativeIterations; run++) { + printf("NATIVE - %s (%d/%d)\n", funcName.c_str(), run, nativeIterations); + for (size_t i = 0; i < iterations.size(); i++) { + nativeRun(profOut, execPath, iterations[i], iter_names[i], 1); + for (int num_threads = 2; num_threads < 25; num_threads += 2) { + nativeRun(profOut, execPath, iterations[i], iter_names[i], num_threads); + } + } + } + + profOut.flush(); + profOut.close(); + + chmod(outfile.c_str(),S_IRWXU | S_IRWXG | S_IRWXO); + + return 0; +} + + +void +nativeRun(std::ofstream &profOut, const std::string &execPath, long num_iterations, const std::string &iteration_name, + int num_threads) { + + char buffer [128]; + sprintf(buffer, "%s %d %ld", execPath.c_str(), num_threads, num_iterations); + + std::chrono::steady_clock::time_point t1 = std::chrono::steady_clock::now(); + int error = system(buffer); + const int nativeTime = std::chrono::duration_cast(std::chrono::steady_clock::now() - t1).count(); + + if (error != 0) { + printf("Failed to execute %s", buffer); + } + + profOut << iteration_name << num_threads << ",native," << nativeTime << std::endl; +} diff --git a/func/omp/mt_pi.cpp b/func/omp/mt_pi.cpp index 92f5afc26..4731c0e52 100644 --- a/func/omp/mt_pi.cpp +++ b/func/omp/mt_pi.cpp @@ -2,8 +2,6 @@ #include #include -//#define num_devices num_threads - unsigned long thread_seed() { int threadNum = omp_get_thread_num(); return threadNum * threadNum * 77 - 22 * threadNum + 1927; @@ -14,13 +12,11 @@ int main(int argc, char **argv) { int num_threads = 1; if (argc == 3) { num_threads = std::stoi(argv[1]); - iterations = std::stol(argv[2]); + iterations = std::stoll(argv[2]); } else if (argc != 1) { printf("Usage: mt_pi [num_threads num_iterations]"); } -// omp_set_default_device(-2); - long long result = 0; #pragma omp parallel num_threads(num_threads) default(none) firstprivate(iterations) reduction(+:result) { @@ -42,5 +38,4 @@ int main(int argc, char **argv) { if (pi - 3.14 > 0.01) { printf("Low accuracy. Expected pi got %f\n", pi); } - return EXIT_SUCCESS; } \ No newline at end of file diff --git a/include/wasm/openmp/ThreadState.h b/include/wasm/openmp/ThreadState.h index 148a1c221..820867bbf 100644 --- a/include/wasm/openmp/ThreadState.h +++ b/include/wasm/openmp/ThreadState.h @@ -1,5 +1,4 @@ -#ifndef FAASM_THREADSTATE_H -#define FAASM_THREADSTATE_H +#pragma once #include @@ -14,5 +13,3 @@ namespace wasm { void setThreadLevel(std::shared_ptr); } } - -#endif //FAASM_THREADSTATE_H diff --git a/src/runner/bench_omp.cpp b/src/runner/bench_omp.cpp index e34267a51..fcdaa9b7a 100644 --- a/src/runner/bench_omp.cpp +++ b/src/runner/bench_omp.cpp @@ -6,107 +6,106 @@ #include #include -constexpr auto BINARY_DIR = "/usr/local/code/faasm/ninja-build/bin/"; +constexpr auto DOCKER_CMD = "/usr/bin/docker run --cpus=20 -v /usr/local/code/faasm/wasm:/wasm --rm faasm/omp-musl:0.1.3 docker_benchmarker "; -void -nativeRun(std::ofstream &profOut, const std::string &execPath, long num_iterations, const std::string &iteration_name, - int num_threads); +void nativeRun(const std::string &funcName, int numRuns); -void wasmRun(std::ofstream &profOut, message::Message &call, long num_iterations, const std::string &iteration_name, - int num_threads, wasm::WAVMWasmModule module); +void wasmRun(const std::string &user, const std::string &funcName, int wasmIterations, const std::string &outfile); +void wasmRunIteration(std::ofstream &profOut, message::Message &call, long num_iterations, const std::string &iteration_name, + int num_threads, wasm::WAVMWasmModule module); int main(int argc, char *argv[]) { util::initLogging(); const std::shared_ptr &logger = util::getLogger(); - if (argc < 4) { - logger->error("Usage:\nbench_func "); + if (argc != 4) { + logger->error("Usage:\nbench_omp "); return 1; } - std::string user = argv[1]; - std::string funcName = argv[2]; - int nativeIterations = std::stoi(argv[3]); - int wasmIterations = std::stoi(argv[4]); + std::string user = "omp"; + std::string funcName = argv[1]; + int nativeIterations = std::stoi(argv[2]); + int wasmIterations = std::stoi(argv[3]); + + std::string outfile = std::string("/usr/local/code/faasm/wasm/omp/") + funcName + "/bench.csv"; + // Clean up for container (and ensuring we will be writing to the same file + boost::filesystem::remove(outfile); logger->info("Benchmarking {} ({}x native and {}x wasm)", funcName, nativeIterations, wasmIterations); - std::string outfile = std::string("/usr/local/code/faasm/wasm/") + user + "/" + funcName + "/bench.csv"; + nativeRun(funcName, nativeIterations); + wasmRun(user, funcName, wasmIterations, outfile); - std::ofstream profOut; - profOut.open(outfile); - profOut << "iterations,numThreads,type,microseconds" << std::endl; + logger->info("Finished benchmark - {}", funcName); + return 0; +} + +void nativeRun(const std::string &funcName, int numRuns) { + auto logger = util::getLogger(); + const std::string execCmd = fmt::format("{} {} {}", DOCKER_CMD, funcName, numRuns); - std::string execPath = std::string(BINARY_DIR) + funcName; + logger->info("Exectuting native"); - logger->info("Running benchmark natively"); - if (!boost::filesystem::exists(execPath)) { - throw std::runtime_error("Could not find binary at " + execPath); + const util::TimePoint nativeTp = util::startTimer(); + const int error = system(execCmd.c_str()); + const long nativeTime = util::getTimeDiffMillis(nativeTp); + + logger->info("Done executing native in {} ms", nativeTime); + + if (error != 0) { + logger->error("Failed to execute %s", execCmd.c_str()); } +} - std::vector iterations = {200000l, 20000000l, 200000000l}; - std::vector iter_names = {"Tiny,", "Small,", "Big,"}; +void wasmRun(const std::string &user, const std::string &funcName, int wasmIterations, const std::string &outfile) { - for (int run = 1; run <= nativeIterations; run++) { - logger->info("NATIVE - {} ({}/{})", funcName, run, nativeIterations); - for (size_t i = 0; i < iterations.size(); i++) { - nativeRun(profOut, execPath, iterations[i], iter_names[i], 1); - for (int num_threads = 2; num_threads < 25; num_threads += 2) { - nativeRun(profOut, execPath, iterations[i], iter_names[i], num_threads); - } - } + const std::shared_ptr &logger = util::getLogger(); + + std::ofstream profOut; + profOut.open(outfile, std::fstream::app); + + logger->info("Running Wasm benchmark"); + if (!boost::filesystem::exists(outfile)) { + throw std::runtime_error("Could not find native benchmark output at " + outfile); } + std::vector iterations = {200000l, 20000000l, 200000000l}; + std::vector iter_names = {"Tiny,", "Small,", "Big,"}; + message::Message call = util::messageFactory(user, funcName); module_cache::WasmModuleCache &moduleCache = module_cache::getWasmModuleCache(); wasm::WAVMWasmModule &cachedModule = moduleCache.getCachedModule(call); + const util::TimePoint wasmTp = util::startTimer(); + for (int run = 1; run <= wasmIterations; run++) { - logger->info("WASM - {} ({}/{})", funcName, run, nativeIterations); + logger->info("WASM - {} ({}/{})", funcName, run, wasmIterations); for (size_t i = 0; i < iterations.size(); i++) { - wasmRun(profOut, call, iterations[i], iter_names[i], 1, cachedModule); + wasmRunIteration(profOut, call, iterations[i], iter_names[i], 1, cachedModule); for (int num_threads = 2; num_threads < 25; num_threads += 2) { - wasmRun(profOut, call, iterations[i], iter_names[i], num_threads, cachedModule); + wasmRunIteration(profOut, call, iterations[i], iter_names[i], num_threads, cachedModule); } } } + const long wasmTime = util::getTimeDiffMillis(wasmTp); + logger->info("Done executing native in {} ms", wasmTime); - logger->info("Finished benchmark - {}", funcName); profOut.flush(); profOut.close(); - - return 0; } -void wasmRun(std::ofstream &profOut, message::Message &call, long num_iterations, const std::string &iteration_name, +void wasmRunIteration(std::ofstream &profOut, message::Message &call, long num_iterations, const std::string &iteration_name, int num_threads, wasm::WAVMWasmModule cachedModule) { auto args = fmt::format("{} {}", num_threads, num_iterations); call.set_cmdline(args.c_str()); - const util::TimePoint wasmTp = util::startTimer(); + const util::TimePoint iterationTp = util::startTimer(); wasm::WAVMWasmModule module{cachedModule}; - module.execute(call); + const long wasmIterationTime = util::getTimeDiffMicros(iterationTp); - const long nativeTime = util::getTimeDiffMicros(wasmTp); - - profOut << iteration_name << num_threads << ",wasm," << nativeTime << std::endl; -} - -void -nativeRun(std::ofstream &profOut, const std::string &execPath, long num_iterations, const std::string &iteration_name, - int num_threads) { - const std::string execCmd = fmt::format("{} {} {}", execPath, num_threads, num_iterations); - - const util::TimePoint wasmTp = util::startTimer(); - const int error = system(execCmd.c_str()); - const long nativeTime = util::getTimeDiffMicros(wasmTp); - - if (error != 0) { - printf("Failed to execute %s", execCmd.c_str()); - } - - profOut << iteration_name << num_threads << ",native," << nativeTime << std::endl; + profOut << iteration_name << num_threads << ",wasm," << wasmIterationTime << std::endl; } diff --git a/tasks/compile.py b/tasks/compile.py index be048b12e..bb3063279 100644 --- a/tasks/compile.py +++ b/tasks/compile.py @@ -1,4 +1,4 @@ -from os import mkdir, listdir +from os import makedirs, listdir from os.path import exists, join, splitext from shutil import copy from subprocess import call @@ -15,8 +15,7 @@ def _copy_built_function(user, func): dest_folder = join(WASM_DIR, user, func) - if not exists(dest_folder): - mkdir(dest_folder) + makedirs(dest_folder, exist_ok=True) src_file = join(FUNC_BUILD_DIR, user, ".".join([func, "wasm"])) dest_file = join(dest_folder, "function.wasm")