Merge pull request #51702 from thaJeztah/bump_wazero

vendor: github.com/tetratelabs/wazero v1.10.1
This commit is contained in:
Rob Murray
2025-12-12 12:18:09 +00:00
committed by GitHub
52 changed files with 1511 additions and 811 deletions

2
go.mod
View File

@@ -217,7 +217,7 @@ require (
github.com/shibumi/go-pathspec v1.3.0 // indirect
github.com/spdx/tools-golang v0.5.5 // indirect
github.com/stretchr/testify v1.11.1 // indirect
github.com/tetratelabs/wazero v1.9.0 // indirect
github.com/tetratelabs/wazero v1.10.1 // indirect
github.com/tinylib/msgp v1.3.0 // indirect
github.com/tonistiigi/dchapes-mode v0.0.0-20250318174251-73d941a28323 // indirect
github.com/tonistiigi/fsutil v0.0.0-20250605211040-586307ad452f // indirect

4
go.sum
View File

@@ -602,8 +602,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/tedsuo/ifrit v0.0.0-20230516164442-7862c310ad26 h1:mWCRvpoEMVlslxEvvptKgIUb35va9yj9Oq5wGw/er5I=
github.com/tedsuo/ifrit v0.0.0-20230516164442-7862c310ad26/go.mod h1:0uD3VMXkZ7Bw0ojGCwDzebBBzPBXtzEZeXai+56BLX4=
github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
github.com/tetratelabs/wazero v1.10.1 h1:2DugeJf6VVk58KTPszlNfeeN8AhhpwcZqkJj2wwFuH8=
github.com/tetratelabs/wazero v1.10.1/go.mod h1:DRm5twOQ5Gr1AoEdSi0CLjDQF1J9ZAuyqFIjl1KKfQU=
github.com/tinylib/msgp v1.3.0 h1:ULuf7GPooDaIlbyvgAxBV/FI7ynli6LZ1/nVUNu+0ww=
github.com/tinylib/msgp v1.3.0/go.mod h1:ykjzy2wzgrlvpDCRc4LA8UXy6D8bzMSuAF3WD57Gok0=
github.com/tonistiigi/dchapes-mode v0.0.0-20250318174251-73d941a28323 h1:r0p7fK56l8WPequOaR3i9LBqfPtEdXIQbUTzT55iqT4=

View File

@@ -113,6 +113,7 @@ spectest_v1_testdata_dir := $(spectest_v1_dir)/testdata
spec_version_v1 := wg-1.0
spectest_v2_dir := $(spectest_base_dir)/v2
spectest_v2_testdata_dir := $(spectest_v2_dir)/testdata
# Latest draft state as of March 12, 2024.
spec_version_v2 := 1c5e5d178bd75c79b7a12881c529098beaee2a05
spectest_threads_dir := $(spectest_base_dir)/threads
@@ -121,6 +122,10 @@ spectest_threads_testdata_dir := $(spectest_threads_dir)/testdata
# It will likely be renamed to main in the future - https://github.com/WebAssembly/threads/issues/216.
spec_version_threads := 3635ca51a17e57e106988846c5b0e0cc48ac04fc
spectest_tail_call_dir := $(spectest_base_dir)/tail-call
spectest_tail_call_testdata_dir := $(spectest_tail_call_dir)/testdata
spec_version_tail_call := 4fd2339b5e9709e74b326797f69a88b13eac4d47
.PHONY: build.spectest
build.spectest:
@$(MAKE) build.spectest.v1
@@ -175,6 +180,15 @@ build.spectest.threads:
wast2json --enable-threads --debug-names $$f; \
done
.PHONY: build.spectest.tail_call
build.spectest.tail_call:
mkdir -p $(spectest_tail_call_testdata_dir)
cd $(spectest_tail_call_testdata_dir) \
&& curl -sSL 'https://api.github.com/repos/WebAssembly/testsuite/contents/proposals/tail-call?ref=$(spec_version_tail_call)' | jq -r '.[]| .download_url' | grep -E ".wast" | xargs -Iurl curl -sJL url -O
cd $(spectest_tail_call_testdata_dir) && for f in `find . -name '*.wast'`; do \
wast2json --enable-tail-call --debug-names $$f; \
done
.PHONY: test
test:
@go test $(go_test_options) ./...
@@ -220,13 +234,10 @@ check:
@GOARCH=wasm GOOS=wasip1 go build ./...
# Ensure we build on aix. See #1723
@GOARCH=ppc64 GOOS=aix go build ./...
# Ensure we build on windows:
@GOARCH=amd64 GOOS=windows go build ./...
# Ensure we build on an arbitrary operating system:
@GOARCH=amd64 GOOS=dragonfly go build ./...
# Ensure we build on solaris/illumos:
@GOARCH=amd64 GOOS=illumos go build ./...
@GOARCH=amd64 GOOS=solaris go build ./...
# Ensure we build on linux s390x. See #2412
@GOARCH=s390x GOOS=linux go build ./...
# Ensure we build on linux ppc64le. See #2412
@GOARCH=ppc64le GOOS=linux go build ./...
# Ensure we build on linux arm for Dapr:
# gh release view -R dapr/dapr --json assets --jq 'first(.assets[] | select(.name = "daprd_linux_arm.tar.gz") | {url, downloadCount})'
@GOARCH=arm GOOS=linux go build ./...
@@ -274,22 +285,15 @@ libsodium:
#### CLI release related ####
VERSION ?= dev
# Default to a dummy version 0.0.1.1, which is always lower than a real release.
# Legal version values should look like 'x.x.x.x' where x is an integer from 0 to 65534.
# https://learn.microsoft.com/en-us/windows/win32/msi/productversion?redirectedfrom=MSDN
# https://stackoverflow.com/questions/9312221/msi-version-numbers
MSI_VERSION ?= 0.0.1.1
non_windows_platforms := darwin_amd64 darwin_arm64 linux_amd64 linux_arm64
non_windows_archives := $(non_windows_platforms:%=dist/wazero_$(VERSION)_%.tar.gz)
windows_platforms := windows_amd64 # TODO: add arm64 windows once we start testing on it.
windows_archives := $(windows_platforms:%=dist/wazero_$(VERSION)_%.zip) $(windows_platforms:%=dist/wazero_$(VERSION)_%.msi)
windows_archives := $(windows_platforms:%=dist/wazero_$(VERSION)_%.zip)
checksum_txt := dist/wazero_$(VERSION)_checksums.txt
# define macros for multi-platform builds. these parse the filename being built
go-arch = $(if $(findstring amd64,$1),amd64,arm64)
go-os = $(if $(findstring .exe,$1),windows,$(if $(findstring linux,$1),linux,darwin))
# msi-arch is a macro so we can detect it based on the file naming convention
msi-arch = $(if $(findstring amd64,$1),x64,arm64)
build/wazero_%/wazero:
$(call go-build,$@,$<)
@@ -314,51 +318,15 @@ define go-build
@echo build "ok"
endef
# this makes a marker file ending in .signed to avoid repeatedly calling codesign
%.signed: %
$(call codesign,$<)
@touch $@
# This requires osslsigncode package (apt or brew) or latest windows release from mtrojnar/osslsigncode
#
# Default is self-signed while production should be a Digicert signing key
#
# Ex.
# ```bash
# keytool -genkey -alias wazero -storetype PKCS12 -keyalg RSA -keysize 2048 -storepass wazero-bunch \
# -keystore wazero.p12 -dname "O=wazero,CN=wazero.io" -validity 3650
# ```
WINDOWS_CODESIGN_P12 ?= packaging/msi/wazero.p12
WINDOWS_CODESIGN_PASSWORD ?= wazero-bunch
define codesign
@printf "$(ansi_format_dark)" codesign "signing $1"
@osslsigncode sign -h sha256 -pkcs12 ${WINDOWS_CODESIGN_P12} -pass "${WINDOWS_CODESIGN_PASSWORD}" \
-n "wazero is the zero dependency WebAssembly runtime for Go developers" -i https://wazero.io -t http://timestamp.digicert.com \
$(if $(findstring msi,$(1)),-add-msi-dse) -in $1 -out $1-signed
@mv $1-signed $1
@printf "$(ansi_format_bright)" codesign "ok"
endef
# This task is only supported on Windows, where we use candle.exe (compile wxs to wixobj) and light.exe (link to msi)
dist/wazero_$(VERSION)_%.msi: build/wazero_%/wazero.exe.signed
ifeq ($(OS),Windows_NT)
@echo msi "building $@"
@mkdir -p $(@D)
@candle -nologo -arch $(call msi-arch,$@) -dVersion=$(MSI_VERSION) -dBin=$(<:.signed=) -o build/wazero.wixobj packaging/msi/wazero.wxs
@light -nologo -o $@ build/wazero.wixobj -spdb
$(call codesign,$@)
@echo msi "ok"
endif
dist/wazero_$(VERSION)_%.zip: build/wazero_%/wazero.exe.signed
dist/wazero_$(VERSION)_%.zip: build/wazero_%/wazero.exe
@echo zip "zipping $@"
@mkdir -p $(@D)
@zip -qj $@ $(<:.signed=)
@zip -qj $@ $<
@echo zip "ok"
# Darwin doesn't have sha256sum. See https://github.com/actions/virtual-environments/issues/90
sha256sum := $(if $(findstring darwin,$(shell go env GOOS)),shasum -a 256,sha256sum)
$(checksum_txt):
@cd $(@D); touch $(@F); $(sha256sum) * >> $(@F)
$(checksum_txt): $(non_windows_archives) $(windows_archives)
@cd $(@D); touch $(@F); $(sha256sum) * > $(@F)
dist: $(non_windows_archives) $(if $(findstring Windows_NT,$(OS)),$(windows_archives),) $(checksum_txt)
dist: $(non_windows_archives) $(windows_archives) $(checksum_txt)

View File

@@ -507,7 +507,7 @@ inserted after exit: https://github.com/emscripten-core/emscripten/issues/12322
## WASI
Unfortunately, (WASI Snapshot Preview 1)[https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md] is not formally defined enough, and has APIs with ambiguous semantics.
Unfortunately, [WASI Snapshot Preview 1](https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md) is not formally defined enough, and has APIs with ambiguous semantics.
This section describes how Wazero interprets and implements the semantics of several WASI APIs that may be interpreted differently by different wasm runtimes.
Those APIs may affect the portability of a WASI application.

View File

@@ -43,7 +43,7 @@ magnitude (10x) or more. This is done without host-specific dependencies.
### Conformance
Both runtimes pass WebAssembly Core [1.0][7] and [2.0][14] specification tests
Both runtimes pass WebAssembly Core [1.0][3] and [2.0][4] specification tests
on supported platforms:
| Runtime | Usage | amd64 | arm64 | others |
@@ -58,7 +58,7 @@ wazero into their Go applications.
### wazero
wazero's [1.0 release][15] happened in March 2023, and is [in use][16] by many
wazero's [1.0 release][8] happened in March 2023, and is [in use][9] by many
projects and production sites.
We offer an API stability promise with semantic versioning. In other words, we
@@ -72,14 +72,14 @@ You can get the latest version of wazero like this.
go get github.com/tetratelabs/wazero@latest
```
Please give us a [star][17] if you end up using wazero!
Please give us a [star][10] if you end up using wazero!
### Go
wazero has no dependencies except Go, so the only source of conflict in your
project's use of wazero is the Go version.
wazero follows the same version policy as Go's [Release Policy][10]: two
wazero follows the same version policy as Go's [Release Policy][5]: two
versions. wazero will ensure these versions work and bugs are valid if there's
an issue with a current Go version.
@@ -96,18 +96,18 @@ systems are ones we test, but that doesn't necessarily mean other operating
system versions won't work.
We currently test Linux (Ubuntu and scratch), MacOS and Windows as packaged by
[GitHub Actions][11], as well as nested VMs running on Linux for FreeBSD, NetBSD,
[GitHub Actions][6], as well as nested VMs running on Linux for FreeBSD, NetBSD,
OpenBSD, DragonFly BSD, illumos and Solaris.
We also test cross compilation for many `GOOS` and `GOARCH` combinations.
* Interpreter
* Linux is tested on amd64 (native) as well arm64 and riscv64 via emulation.
* Linux is tested on amd64 and arm64 (native) as well as riscv64 via emulation.
* Windows, FreeBSD, NetBSD, OpenBSD, DragonFly BSD, illumos and Solaris are
tested only on amd64.
* macOS is tested only on arm64.
* Compiler
* Linux is tested on amd64 (native) as well arm64 via emulation.
* Linux is tested on amd64 and arm64.
* Windows, FreeBSD, NetBSD, DragonFly BSD, illumos and Solaris are
tested only on amd64.
* macOS is tested only on arm64.
@@ -116,24 +116,25 @@ wazero has no dependencies and doesn't require CGO. This means it can also be
embedded in an application that doesn't use an operating system. This is a main
differentiator between wazero and alternatives.
We verify zero dependencies by running tests in Docker's [scratch image][12].
We verify zero dependencies by running tests in Docker's [scratch image][7].
This approach ensures compatibility with any parent image.
### macOS code-signing entitlements
If you're developing for macOS and need to code-sign your application,
please read issue [#2393][11].
-----
wazero is a registered trademark of Tetrate.io, Inc. in the United States and/or other countries
[1]: https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/
[2]: https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/
[4]: https://github.com/WebAssembly/meetings/blob/main/process/subgroups.md
[5]: https://github.com/WebAssembly/WASI
[6]: https://pkg.go.dev/golang.org/x/sys/unix
[7]: https://github.com/WebAssembly/spec/tree/wg-1.0/test/core
[9]: https://github.com/tetratelabs/wazero/issues/506
[10]: https://go.dev/doc/devel/release
[11]: https://github.com/actions/virtual-environments
[12]: https://docs.docker.com/develop/develop-images/baseimages/#create-a-simple-parent-image-using-scratch
[13]: https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md
[14]: https://github.com/WebAssembly/spec/tree/d39195773112a22b245ffbe864bab6d1182ccb06/test/core
[15]: https://tetrate.io/blog/introducing-wazero-from-tetrate/
[16]: https://wazero.io/community/users/
[17]: https://github.com/tetratelabs/wazero/stargazers
[3]: https://github.com/WebAssembly/spec/tree/wg-1.0/test/core
[4]: https://github.com/WebAssembly/spec/tree/d39195773112a22b245ffbe864bab6d1182ccb06/test/core
[5]: https://go.dev/doc/devel/release
[6]: https://github.com/actions/virtual-environments
[7]: https://docs.docker.com/develop/develop-images/baseimages/#create-a-simple-parent-image-using-scratch
[8]: https://tetrate.io/blog/introducing-wazero-from-tetrate/
[9]: https://wazero.io/community/users/
[10]: https://github.com/wazero/wazero/stargazers
[11]: https://github.com/wazero/wazero/issues/2393

View File

@@ -0,0 +1,19 @@
package experimental
import (
"context"
"github.com/tetratelabs/wazero/internal/expctxkeys"
)
// WithCompilationWorkers sets the desired number of compilation workers.
func WithCompilationWorkers(ctx context.Context, workers int) context.Context {
return context.WithValue(ctx, expctxkeys.CompilationWorkers{}, workers)
}
// GetCompilationWorkers returns the desired number of compilation workers.
// The minimum value returned is 1.
func GetCompilationWorkers(ctx context.Context) int {
workers, _ := ctx.Value(expctxkeys.CompilationWorkers{}).(int)
return max(workers, 1)
}

View File

@@ -13,3 +13,6 @@ import "github.com/tetratelabs/wazero/api"
// binaries will use a theroetical maximum like 4GB, so if using such a binary on a system
// without mmap, consider editing the binary to reduce the max size setting of memory.
const CoreFeaturesThreads = api.CoreFeatureSIMD << 1
// CoreFeaturesThreads enables tail call instructions ("tail-call").
const CoreFeaturesTailCall = api.CoreFeatureSIMD << 2

View File

@@ -814,6 +814,7 @@ operatorSwitch:
c.emit(
newOperationCallIndirect(typeIndex, tableIndex),
)
case wasm.OpcodeDrop:
r := inclusiveRange{Start: 0, End: 0}
if peekValueType == unsignedTypeV128 {
@@ -3423,6 +3424,45 @@ operatorSwitch:
default:
return fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp))
}
case wasm.OpcodeTailCallReturnCall:
fdef := c.module.FunctionDefinition(index)
functionFrame := c.controlFrames.functionFrame()
// Currently we do not support imported functions, we treat them as regular calls.
// For details, see internal/engine/RATIONALE.md
if _, _, isImport := fdef.Import(); isImport {
c.emit(newOperationCall(index))
dropOp := newOperationDrop(c.getFrameDropRange(functionFrame, false))
// Cleanup the stack and then jmp to function frame's continuation (meaning return).
c.emit(dropOp)
c.emit(newOperationBr(functionFrame.asLabel()))
} else {
c.emit(newOperationTailCallReturnCall(index))
}
// Return operation is stack-polymorphic, and mark the state as unreachable.
// That means subsequent instructions in the current control frame are "unreachable"
// and can be safely removed.
c.markUnreachable()
case wasm.OpcodeTailCallReturnCallIndirect:
typeIndex := index
tableIndex, n, err := leb128.LoadUint32(c.body[c.pc+1:])
if err != nil {
return fmt.Errorf("read target for br_table: %w", err)
}
c.pc += n
functionFrame := c.controlFrames.functionFrame()
dropRange := c.getFrameDropRange(functionFrame, false)
c.emit(newOperationTailCallReturnCallIndirect(typeIndex, tableIndex, dropRange, functionFrame.asLabel()))
// Return operation is stack-polymorphic, and mark the state as unreachable.
// That means subsequent instructions in the current control frame are "unreachable"
// and can be safely removed.
c.markUnreachable()
default:
return fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op)
}
@@ -3449,7 +3489,10 @@ func (c *compiler) applyToStack(opcode wasm.Opcode) (index uint32, err error) {
wasm.OpcodeLocalSet,
wasm.OpcodeLocalTee,
wasm.OpcodeGlobalGet,
wasm.OpcodeGlobalSet:
wasm.OpcodeGlobalSet,
// tail-call proposal
wasm.OpcodeTailCallReturnCall,
wasm.OpcodeTailCallReturnCallIndirect:
// Assumes that we are at the opcode now so skip it before read immediates.
v, num, err := leb128.LoadUint32(c.body[c.pc+1:])
if err != nil {

View File

@@ -7,6 +7,7 @@ import (
"fmt"
"math"
"math/bits"
"slices"
"sync"
"unsafe"
@@ -27,27 +28,37 @@ import (
// The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
var callStackCeiling = 2000
type compiledFunctionWithCount struct {
funcs []compiledFunction
refCount int
}
// engine is an interpreter implementation of wasm.Engine
type engine struct {
enabledFeatures api.CoreFeatures
compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex.
mux sync.RWMutex
compiledFunctions map[wasm.ModuleID]*compiledFunctionWithCount // guarded by mutex.
mux sync.Mutex
}
func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine {
return &engine{
enabledFeatures: enabledFeatures,
compiledFunctions: map[wasm.ModuleID][]compiledFunction{},
compiledFunctions: map[wasm.ModuleID]*compiledFunctionWithCount{},
}
}
// Close implements the same method as documented on wasm.Engine.
func (e *engine) Close() (err error) {
e.mux.Lock()
defer e.mux.Unlock()
clear(e.compiledFunctions)
return
}
// CompiledModuleCount implements the same method as documented on wasm.Engine.
func (e *engine) CompiledModuleCount() uint32 {
e.mux.Lock()
defer e.mux.Unlock()
return uint32(len(e.compiledFunctions))
}
@@ -59,19 +70,33 @@ func (e *engine) DeleteCompiledModule(m *wasm.Module) {
func (e *engine) deleteCompiledFunctions(module *wasm.Module) {
e.mux.Lock()
defer e.mux.Unlock()
cf, ok := e.compiledFunctions[module.ID]
if !ok {
return
}
cf.refCount--
if cf.refCount > 0 {
return
}
delete(e.compiledFunctions, module.ID)
}
func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) {
e.mux.Lock()
defer e.mux.Unlock()
e.compiledFunctions[module.ID] = fs
e.compiledFunctions[module.ID] = &compiledFunctionWithCount{funcs: fs, refCount: 1}
}
func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) {
e.mux.RLock()
defer e.mux.RUnlock()
fs, ok = e.compiledFunctions[module.ID]
func (e *engine) getCompiledFunctions(module *wasm.Module, increaseRefCount bool) (fs []compiledFunction, ok bool) {
e.mux.Lock()
defer e.mux.Unlock()
cf, ok := e.compiledFunctions[module.ID]
if ok {
fs = cf.funcs
if increaseRefCount {
cf.refCount++
}
}
return
}
@@ -242,15 +267,9 @@ type snapshot struct {
// Snapshot implements the same method as documented on experimental.Snapshotter.
func (ce *callEngine) Snapshot() experimental.Snapshot {
stack := make([]uint64, len(ce.stack))
copy(stack, ce.stack)
frames := make([]*callFrame, len(ce.frames))
copy(frames, ce.frames)
return &snapshot{
stack: stack,
frames: frames,
stack: slices.Clone(ce.stack),
frames: slices.Clone(ce.frames),
ce: ce,
}
}
@@ -356,7 +375,7 @@ const callFrameStackSize = 0
// CompileModule implements the same method as documented on wasm.Engine.
func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error {
if _, ok := e.getCompiledFunctions(module); ok { // cache hit!
if _, ok := e.getCompiledFunctions(module, true); ok { // cache hit!
return nil
}
@@ -405,7 +424,7 @@ func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInsta
functions: make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)),
}
codes, ok := e.getCompiledFunctions(module)
codes, ok := e.getCompiledFunctions(module, false)
if !ok {
return nil, errors.New("source module must be compiled before instantiation")
}
@@ -427,12 +446,10 @@ func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInsta
// lowerIR lowers the interpreterir operations to engine friendly struct.
func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error {
// Copy the body from the result.
ret.body = make([]unionOperation, len(ir.Operations))
copy(ret.body, ir.Operations)
ret.body = slices.Clone(ir.Operations)
// Also copy the offsets if necessary.
if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 {
ret.offsetsInWasmBinary = make([]uint64, len(offsets))
copy(ret.offsetsInWasmBinary, offsets)
ret.offsetsInWasmBinary = slices.Clone(offsets)
}
labelAddressResolutions := [labelKindNum][]uint64{}
@@ -449,9 +466,7 @@ func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error {
frameToAddresses := labelAddressResolutions[label.Kind()]
// Expand the slice if necessary.
if diff := fid - len(frameToAddresses) + 1; diff > 0 {
for j := 0; j < diff; j++ {
frameToAddresses = append(frameToAddresses, 0)
}
frameToAddresses = append(frameToAddresses, make([]uint64, diff)...)
}
frameToAddresses[fid] = address
labelAddressResolutions[kind] = frameToAddresses
@@ -472,6 +487,8 @@ func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error {
target := op.Us[j]
e.setLabelAddress(&op.Us[j], label(target), labelAddressResolutions)
}
case operationKindTailCallReturnCallIndirect:
e.setLabelAddress(&op.Us[1], label(op.Us[1]), labelAddressResolutions)
}
}
return nil
@@ -761,18 +778,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
case operationKindCallIndirect:
offset := ce.popValue()
table := tables[op.U2]
if offset >= uint64(len(table.References)) {
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
}
rawPtr := table.References[offset]
if rawPtr == 0 {
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
}
tf := functionFromUintptr(rawPtr)
if tf.typeID != typeIDs[op.U1] {
panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
}
tf := ce.functionForOffset(table, offset, typeIDs[op.U1])
ce.callFunction(ctx, f.moduleInstance, tf)
frame.pc++
@@ -1725,12 +1731,17 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
if fillSize+offset > uint64(len(memoryInst.Buffer)) {
panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
} else if fillSize != 0 {
// Uses the copy trick for faster filling buffer.
// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
// Uses the copy trick for faster filling the buffer with the value.
// https://github.com/golang/go/blob/go1.24.0/src/bytes/bytes.go#L664-L673
buf := memoryInst.Buffer[offset : offset+fillSize]
buf[0] = value
for i := 1; i < len(buf); i *= 2 {
copy(buf[i:], buf[:i])
if value == 0 {
clear(buf)
} else {
buf[0] = value
for i := 1; i < len(buf); {
chunk := min(i, 8192)
i += copy(buf[i:], buf[:chunk])
}
}
}
frame.pc++
@@ -1804,7 +1815,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
} else if num > 0 {
// Uses the copy trick for faster filling the region with the value.
// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
// https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517
targetRegion := table.References[offset : offset+num]
targetRegion[0] = ref
for i := 1; i < len(targetRegion); i *= 2 {
@@ -4331,6 +4342,32 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
memoryInst.Mux.Unlock()
ce.pushValue(uint64(old))
frame.pc++
case operationKindTailCallReturnCall:
f := &functions[op.U1]
ce.dropForTailCall(frame, f)
body, bodyLen = ce.resetPc(frame, f)
case operationKindTailCallReturnCallIndirect:
offset := ce.popValue()
table := tables[op.U2]
tf := ce.functionForOffset(table, offset, typeIDs[op.U1])
// We are allowing proper tail calls only across functions that belong to the same
// module; for indirect calls, we have to enforce it at run-time.
// For details, see internal/engine/RATIONALE.md
if tf.moduleInstance != f.moduleInstance {
// Revert to a normal call.
ce.callFunction(ctx, f.moduleInstance, tf)
// Return
ce.drop(op.Us[0])
// Jump to the function frame (return)
frame.pc = op.Us[1]
continue
}
ce.dropForTailCall(frame, tf)
body, bodyLen = ce.resetPc(frame, tf)
default:
frame.pc++
}
@@ -4338,6 +4375,40 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
ce.popFrame()
}
func (ce *callEngine) dropForTailCall(frame *callFrame, f *function) {
base := frame.base - frame.f.funcType.ParamNumInUint64
paramCount := f.funcType.ParamNumInUint64
ce.stack = append(ce.stack[:base], ce.stack[len(ce.stack)-paramCount:]...)
}
func (ce *callEngine) resetPc(frame *callFrame, f *function) (body []unionOperation, bodyLen uint64) {
// The compiler is currently allowing proper tail call only across functions
// that belong to the same module; thus, we can overwrite the frame in-place.
// For details, see internal/engine/RATIONALE.md
frame.f = f
frame.base = len(ce.stack)
frame.pc = 0
body = frame.f.parent.body
bodyLen = uint64(len(body))
return body, bodyLen
}
func (ce *callEngine) functionForOffset(table *wasm.TableInstance, offset uint64, expectedTypeID wasm.FunctionTypeID) *function {
if offset >= uint64(len(table.References)) {
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
}
rawPtr := table.References[offset]
if rawPtr == 0 {
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
}
tf := functionFromUintptr(rawPtr)
if tf.typeID != expectedTypeID {
panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
}
return tf
}
func wasmCompatMax32bits(v1, v2 uint32) uint64 {
return uint64(math.Float32bits(moremath.WasmCompatMax32(
math.Float32frombits(v1),
@@ -4564,9 +4635,7 @@ func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleIns
// In the interpreter engine, ce.stack may only have capacity to store
// parameters. Grow when there are more results than parameters.
if growLen := resultLen - paramLen; growLen > 0 {
for i := 0; i < growLen; i++ {
ce.stack = append(ce.stack, 0)
}
ce.stack = append(ce.stack, make([]uint64, growLen)...)
stackLen += growLen
}

View File

@@ -445,6 +445,10 @@ func (o operationKind) String() (ret string) {
ret = "operationKindAtomicRMW8Cmpxchg"
case operationKindAtomicRMW16Cmpxchg:
ret = "operationKindAtomicRMW16Cmpxchg"
case operationKindTailCallReturnCall:
ret = "operationKindTailCallReturnCall"
case operationKindTailCallReturnCallIndirect:
ret = "operationKindTailCallReturnCallIndirect"
default:
panic(fmt.Errorf("unknown operation %d", o))
}
@@ -768,6 +772,11 @@ const (
// operationKindAtomicRMW16Cmpxchg is the kind for NewOperationAtomicRMW16Cmpxchg.
operationKindAtomicRMW16Cmpxchg
// operationKindTailCallReturnCall is the Kind for newOperationTailCallReturnCall.
operationKindTailCallReturnCall
// operationKindTailCallReturnCallIndirect is the Kind for newOperationKindTailCallReturnCallIndirect.
operationKindTailCallReturnCallIndirect
// operationKindEnd is always placed at the bottom of this iota definition to be used in the test.
operationKindEnd
)
@@ -1097,6 +1106,12 @@ func (o unionOperation) String() string {
operationKindAtomicRMW16Cmpxchg:
return o.Kind.String()
case operationKindTailCallReturnCall:
return fmt.Sprintf("%s %d %s", o.Kind, o.U1, label(o.U2).String())
case operationKindTailCallReturnCallIndirect:
return fmt.Sprintf("%s %d %d", o.Kind, o.U1, o.U2)
default:
panic(fmt.Sprintf("TODO: %v", o.Kind))
}
@@ -2810,3 +2825,21 @@ func newOperationAtomicRMW8Cmpxchg(unsignedType unsignedType, arg memoryArg) uni
func newOperationAtomicRMW16Cmpxchg(unsignedType unsignedType, arg memoryArg) unionOperation {
return unionOperation{Kind: operationKindAtomicRMW16Cmpxchg, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
}
// newOperationTailCallReturnCall is a constructor for unionOperation with operationKindTailCallReturnCall.
//
// This corresponds to
//
// wasm.OpcodeTailCallReturnCall.
func newOperationTailCallReturnCall(functionIndex uint32) unionOperation {
return unionOperation{Kind: operationKindTailCallReturnCall, U1: uint64(functionIndex)}
}
// NewOperationCallIndirect is a constructor for unionOperation with operationKindTailCallReturnCallIndirect.
//
// This corresponds to
//
// wasm.OpcodeTailCallReturnCallIndirect.
func newOperationTailCallReturnCallIndirect(typeIndex, tableIndex uint32, dropDepth inclusiveRange, l label) unionOperation {
return unionOperation{Kind: operationKindTailCallReturnCallIndirect, U1: uint64(typeIndex), U2: uint64(tableIndex), Us: []uint64{dropDepth.AsU64(), uint64(l)}}
}

View File

@@ -272,9 +272,9 @@ func (c *compiler) wasmOpcodeSignature(op wasm.Opcode, index uint32) (*signature
return signature_I32_None, nil
case wasm.OpcodeReturn:
return signature_None_None, nil
case wasm.OpcodeCall:
case wasm.OpcodeCall, wasm.OpcodeTailCallReturnCall:
return c.funcTypeToSigs.get(c.funcs[index], false /* direct */), nil
case wasm.OpcodeCallIndirect:
case wasm.OpcodeCallIndirect, wasm.OpcodeTailCallReturnCallIndirect:
return c.funcTypeToSigs.get(index, true /* call_indirect */), nil
case wasm.OpcodeDrop:
return signature_Unknown_None, nil

View File

@@ -88,7 +88,7 @@ type Compiler interface {
MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
AddRelocationInfo(funcRef ssa.FuncRef)
AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool)
// AddSourceOffsetInfo appends the source offset information for the given offset.
AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
@@ -115,6 +115,8 @@ type RelocationInfo struct {
Offset int64
// Target is the target function of the call instruction.
FuncRef ssa.FuncRef
// IsTailCall indicates whether the call instruction is a tail call.
IsTailCall bool
}
// compiler implements Compiler.
@@ -352,10 +354,11 @@ func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
}
// AddRelocationInfo implements Compiler.AddRelocationInfo.
func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool) {
c.relocations = append(c.relocations, RelocationInfo{
Offset: int64(len(c.buf)),
FuncRef: funcRef,
Offset: int64(len(c.buf)),
FuncRef: funcRef,
IsTailCall: isTailCall,
})
}

View File

@@ -21,7 +21,9 @@ type instruction struct {
func (i *instruction) IsCall() bool { return i.kind == call }
// IsIndirectCall implements regalloc.Instr.
func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect }
func (i *instruction) IsIndirectCall() bool {
return i.kind == callIndirect
}
// IsReturn implements regalloc.Instr.
func (i *instruction) IsReturn() bool { return i.kind == ret }
@@ -288,6 +290,11 @@ func (i *instruction) String() string {
case nopUseReg:
return fmt.Sprintf("nop_use_reg %s", i.op1.format(true))
case tailCall:
return fmt.Sprintf("tailCall %s", ssa.FuncRef(i.u1))
case tailCallIndirect:
return fmt.Sprintf("tailCallIndirect %s", i.op1.format(true))
default:
panic(fmt.Sprintf("BUG: %d", int(i.kind)))
}
@@ -357,7 +364,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
default:
panic(fmt.Sprintf("BUG: invalid operand: %s", i))
}
case useKindCallInd:
case useKindCallInd, useKindTailCallInd:
op := i.op1
switch op.kind {
case operandKindReg:
@@ -428,13 +435,16 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
func (i *instruction) AssignUse(index int, v regalloc.VReg) {
switch uk := useKinds[i.kind]; uk {
case useKindNone:
case useKindCallInd:
case useKindCallInd, useKindTailCallInd:
if index != 0 {
panic("BUG")
}
op := &i.op1
switch op.kind {
case operandKindReg:
if uk == useKindTailCallInd && v != r11VReg {
panic("BUG")
}
op.setReg(v)
case operandKindMem:
op.addressMode().assignUses(index, v)
@@ -838,6 +848,12 @@ const (
// nopUseReg is a meta instruction that uses one register and does nothing.
nopUseReg
// tailCall is a meta instruction that emits a tail call.
tailCall
// tailCallIndirect is a meta instruction that emits a tail call with an indirect call.
tailCallIndirect
instrMax
)
@@ -1079,6 +1095,10 @@ func (k instructionKind) String() string {
return "lockcmpxchg"
case lockxadd:
return "lockxadd"
case tailCall:
return "tailCall"
case tailCallIndirect:
return "tailCallIndirect"
default:
panic("BUG")
}
@@ -1173,6 +1193,27 @@ func (i *instruction) asCallIndirect(ptr operand, abi *backend.FunctionABI) *ins
return i
}
func (i *instruction) asTailCallReturnCall(ref ssa.FuncRef, abi *backend.FunctionABI) *instruction {
i.kind = tailCall
i.u1 = uint64(ref)
if abi != nil {
i.u2 = abi.ABIInfoAsUint64()
}
return i
}
func (i *instruction) asTailCallReturnCallIndirect(ptr operand, abi *backend.FunctionABI) *instruction {
if ptr.kind != operandKindReg && ptr.kind != operandKindMem {
panic("BUG")
}
i.kind = tailCallIndirect
i.op1 = ptr
if abi != nil {
i.u2 = abi.ABIInfoAsUint64()
}
return i
}
func (i *instruction) asRet() *instruction {
i.kind = ret
return i
@@ -2342,6 +2383,8 @@ var defKinds = [instrMax]defKind{
lockxadd: defKindNone,
neg: defKindNone,
nopUseReg: defKindNone,
tailCall: defKindCall,
tailCallIndirect: defKindCall,
}
// String implements fmt.Stringer.
@@ -2375,6 +2418,7 @@ const (
useKindBlendvpd
useKindCall
useKindCallInd
useKindTailCallInd
useKindFcvtToSintSequence
useKindFcvtToUintSequence
)
@@ -2425,6 +2469,8 @@ var useKinds = [instrMax]useKind{
lockxadd: useKindOp1RegOp2,
neg: useKindOp1,
nopUseReg: useKindOp1,
tailCall: useKindCall,
tailCallIndirect: useKindTailCallInd,
}
func (u useKind) String() string {
@@ -2441,6 +2487,8 @@ func (u useKind) String() string {
return "call"
case useKindCallInd:
return "callInd"
case useKindTailCallInd:
return "tailCallInd"
default:
return "invalid"
}

View File

@@ -1211,7 +1211,7 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) {
case call:
c.EmitByte(0xe8)
// Meaning that the call target is a function value, and requires relocation.
c.AddRelocationInfo(ssa.FuncRef(i.u1))
c.AddRelocationInfo(ssa.FuncRef(i.u1), false)
// Note that this is zero as a placeholder for the call target if it's a function value.
c.Emit4Bytes(uint32(i.u2))
@@ -1244,6 +1244,37 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) {
panic("BUG: invalid operand kind")
}
case tailCall:
// Encode as jmp.
c.EmitByte(0xe9)
// Meaning that the call target is a function value, and requires relocation.
c.AddRelocationInfo(ssa.FuncRef(i.u1), true)
// Note that this is zero as a placeholder for the call target if it's a function value.
c.Emit4Bytes(uint32(i.u2))
case tailCallIndirect:
op := i.op1
const opcodeNum = 1
const opcode = 0xff
const regMemSubOpcode = 4
rex := rexInfo(0).clearW()
switch op.kind {
// Indirect tail calls always take a register as the target.
// Note: the register should be a callee-saved register (usually r11).
case operandKindReg:
dst := regEncodings[op.reg().RealReg()]
encodeRegReg(c,
legacyPrefixesNone,
opcode, opcodeNum,
regMemSubOpcode,
dst,
rex,
)
default:
panic("BUG: invalid operand kind")
}
case xchg:
src, dst := regEncodings[i.op1.reg().RealReg()], i.op2
size := i.u1

View File

@@ -17,7 +17,7 @@ import (
// NewBackend returns a new backend for arm64.
func NewBackend() backend.Machine {
m := &machine{
cpuFeatures: platform.CpuFeatures,
cpuFeatures: platform.CpuFeatures(),
regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo),
spillSlots: map[regalloc.VRegID]int64{},
amodePool: wazevoapi.NewPool[amode](nil),
@@ -1109,6 +1109,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
atomicOp, size := instr.AtomicRmwData()
m.lowerAtomicRmw(atomicOp, addr, val, size, instr.Return())
case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect:
m.lowerTailCall(instr)
default:
panic("TODO: lowering " + op.String())
}
@@ -1885,31 +1888,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) {
func (m *machine) lowerCall(si *ssa.Instruction) {
isDirectCall := si.Opcode() == ssa.OpcodeCall
var indirectCalleePtr ssa.Value
var directCallee ssa.FuncRef
var sigID ssa.SignatureID
var args []ssa.Value
var isMemmove bool
if isDirectCall {
directCallee, sigID, args = si.CallData()
} else {
indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData()
}
calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID))
stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP.
}
// Note: See machine.SetupPrologue for the stack layout.
// The stack pointer decrease/increase will be inserted later in the compilation.
for i, arg := range args {
reg := m.c.VRegOf(arg)
def := m.c.ValueDefinition(arg)
m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
}
indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
if isMemmove {
// Go's memmove *might* use all xmm0-xmm15, so we need to release them.
@@ -1939,6 +1918,39 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
m.insert(m.allocateInstr().asNopUseReg(regInfo.RealRegToVReg[rdx]))
}
m.insertReturns(si, calleeABI, stackSlotSize)
}
func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, bool, *backend.FunctionABI, int64) {
var indirectCalleePtr ssa.Value
var directCallee ssa.FuncRef
var sigID ssa.SignatureID
var args []ssa.Value
var isMemmove bool
if isDirectCall {
directCallee, sigID, args = si.CallData()
} else {
indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData()
}
calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID))
stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP.
}
// Note: See machine.SetupPrologue for the stack layout.
// The stack pointer decrease/increase will be inserted later in the compilation.
for i, arg := range args {
reg := m.c.VRegOf(arg)
def := m.c.ValueDefinition(arg)
m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
}
return indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize
}
func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) {
var index int
r1, rs := si.Returns()
if r1.Valid() {
@@ -1952,6 +1964,43 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
}
}
func (m *machine) lowerTailCall(si *ssa.Instruction) {
isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall
indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
if isMemmove {
panic("memmove not supported in tail calls")
}
isAllRegs := stackSlotSize == 0
switch {
case isDirectCall && isAllRegs:
call := m.allocateInstr().asTailCallReturnCall(directCallee, calleeABI)
m.insert(call)
case !isDirectCall && isAllRegs:
// In a tail call we insert the epilogue before the jump instruction,
// so an arbitrary register might be overwritten while restoring the stack.
// So, as compared to a regular indirect call, we ensure the pointer is stored
// in a caller-saved register (r11).
// For details, see internal/engine/RATIONALE.md
ptrOp := m.getOperand_Reg(m.c.ValueDefinition(indirectCalleePtr))
tmpJmp := r11VReg
m.InsertMove(tmpJmp, ptrOp.reg(), ssa.TypeI64)
callInd := m.allocateInstr().asTailCallReturnCallIndirect(newOperandReg(tmpJmp), calleeABI)
m.insert(callInd)
case isDirectCall && !isAllRegs:
call := m.allocateInstr().asCall(directCallee, calleeABI)
m.insert(call)
case !isDirectCall && !isAllRegs:
ptrOp := m.getOperand_Mem_Reg(m.c.ValueDefinition(indirectCalleePtr))
callInd := m.allocateInstr().asCallIndirect(ptrOp, calleeABI)
m.insert(callInd)
}
// If this is a proper tail call, returns will be cleared in the postRegAlloc phase.
m.insertReturns(si, calleeABI, stackSlotSize)
}
// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
// caller side of the function call.
func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) {

View File

@@ -188,6 +188,23 @@ func (m *machine) postRegAlloc() {
linkInstr(inc, next)
}
continue
case tailCall, tailCallIndirect:
// At this point, reg alloc is done, therefore we can safely insert dec RPS instruction
// right before the tail call (jump) instruction. If this is done before reg alloc, the stack slot
// can point to the wrong location and therefore results in a wrong value.
tailCall := cur
_, _, _, _, size := backend.ABIInfoFromUint64(tailCall.u2)
if size > 0 {
dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
linkInstr(tailCall.prev, dec)
linkInstr(dec, tailCall)
}
// In a tail call, we insert the epilogue before the jump instruction.
m.setupEpilogueAfter(tailCall.prev)
// If this has been encoded as a proper tail call, we can remove the trailing instructions
// For details, see internal/engine/RATIONALE.md
m.removeUntilRet(cur.next)
continue
}
// Removes the redundant copy instruction.
@@ -278,6 +295,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
linkInstr(cur, prevNext)
}
// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction.
func (m *machine) removeUntilRet(cur *instruction) {
for ; cur != nil; cur = cur.next {
prev, next := cur.prev, cur.next
prev.next = next
if next != nil {
next.prev = prev
}
if cur.kind == ret {
return
}
}
}
func (m *machine) addRSP(offset int32, cur *instruction) *instruction {
if offset == 0 {
return cur

View File

@@ -261,6 +261,23 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg
func (m *machine) lowerCall(si *ssa.Instruction) {
isDirectCall := si.Opcode() == ssa.OpcodeCall
indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
if isDirectCall {
call := m.allocateInstr()
call.asCall(directCallee, calleeABI)
m.insert(call)
} else {
ptr := m.compiler.VRegOf(indirectCalleePtr)
callInd := m.allocateInstr()
callInd.asCallIndirect(ptr, calleeABI)
m.insert(callInd)
}
m.insertReturns(si, calleeABI, stackSlotSize)
}
func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, *backend.FunctionABI, int64) {
var indirectCalleePtr ssa.Value
var directCallee ssa.FuncRef
var sigID ssa.SignatureID
@@ -282,18 +299,10 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
def := m.compiler.ValueDefinition(arg)
m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
}
return indirectCalleePtr, directCallee, calleeABI, stackSlotSize
}
if isDirectCall {
call := m.allocateInstr()
call.asCall(directCallee, calleeABI)
m.insert(call)
} else {
ptr := m.compiler.VRegOf(indirectCalleePtr)
callInd := m.allocateInstr()
callInd.asCallIndirect(ptr, calleeABI)
m.insert(callInd)
}
func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) {
var index int
r1, rs := si.Returns()
if r1.Valid() {
@@ -307,6 +316,40 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
}
}
func (m *machine) lowerTailCall(si *ssa.Instruction) {
isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall
indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
// We currently support tail calls only when the args are passed via registers
// otherwise we fall back to a plain call.
// For details, see internal/engine/RATIONALE.md
isAllRegs := stackSlotSize == 0
switch {
case isDirectCall && isAllRegs:
tailJump := m.allocateInstr()
tailJump.asTailCall(directCallee, calleeABI)
m.insert(tailJump)
case !isDirectCall && isAllRegs:
ptr := m.compiler.VRegOf(indirectCalleePtr)
callInd := m.allocateInstr()
callInd.asTailCallIndirect(ptr, calleeABI)
m.insert(callInd)
case isDirectCall && !isAllRegs:
tailJump := m.allocateInstr()
tailJump.asCall(directCallee, calleeABI)
m.insert(tailJump)
case !isDirectCall && !isAllRegs:
ptr := m.compiler.VRegOf(indirectCalleePtr)
callInd := m.allocateInstr()
callInd.asCallIndirect(ptr, calleeABI)
m.insert(callInd)
}
// If this is a proper tail call, returns will be cleared in the postRegAlloc phase.
m.insertReturns(si, calleeABI, stackSlotSize)
}
func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
alu := m.allocateInstr()

View File

@@ -140,6 +140,8 @@ var defKinds = [numInstructionKinds]defKind{
atomicStore: defKindNone,
dmb: defKindNone,
loadConstBlockArg: defKindRD,
tailCall: defKindCall,
tailCallInd: defKindCall,
}
// Defs returns the list of regalloc.VReg that are defined by the instruction.
@@ -278,6 +280,8 @@ var useKinds = [numInstructionKinds]useKind{
atomicStore: useKindRNRM,
loadConstBlockArg: useKindNone,
dmb: useKindNone,
tailCall: useKindCall,
tailCallInd: useKindCallInd,
}
// Uses returns the list of regalloc.VReg that are used by the instruction.
@@ -1501,6 +1505,10 @@ func (i *instruction) String() (str string) {
str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
case dmb:
str = "dmb"
case tailCall:
str = fmt.Sprintf("b %s", ssa.FuncRef(i.u1))
case tailCallInd:
str = fmt.Sprintf("b %s", formatVRegSized(i.rn.nr(), 64))
case udf:
str = "udf"
case emitSourceOffsetInfo:
@@ -1550,6 +1558,22 @@ func (i *instruction) asDMB() {
i.kind = dmb
}
func (i *instruction) asTailCall(ref ssa.FuncRef, abi *backend.FunctionABI) {
i.kind = tailCall
i.u1 = uint64(ref)
if abi != nil {
i.u2 = abi.ABIInfoAsUint64()
}
}
func (i *instruction) asTailCallIndirect(ptr regalloc.VReg, abi *backend.FunctionABI) {
i.kind = tailCallInd
i.rn = operandNR(ptr)
if abi != nil {
i.u2 = abi.ABIInfoAsUint64()
}
}
// TODO: delete unnecessary things.
const (
// nop0 represents a no-op of zero size.
@@ -1727,6 +1751,10 @@ const (
atomicStore
// dmb represents the data memory barrier instruction in inner-shareable (ish) mode.
dmb
// tailCall represents a tail call instruction.
tailCall
// tailCallInd represents a tail call indirect instruction.
tailCallInd
// UDF is the undefined instruction. For debugging only.
udf
// loadConstBlockArg represents a load of a constant block argument.

View File

@@ -39,7 +39,7 @@ func (i *instruction) encode(m *machine) {
c.Emit4Bytes(encodeUnconditionalBranch(false, imm))
case call:
// We still don't know the exact address of the function to call, so we emit a placeholder.
c.AddRelocationInfo(i.callFuncRef())
c.AddRelocationInfo(i.callFuncRef(), false)
c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder
case callInd:
c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true))
@@ -417,6 +417,12 @@ func (i *instruction) encode(m *machine) {
))
case dmb:
c.Emit4Bytes(encodeDMB())
case tailCall:
// We still don't know the exact address of the function to call, so we emit a placeholder.
c.AddRelocationInfo(i.callFuncRef(), true) // true = IsTailCall
c.Emit4Bytes(encodeUnconditionalBranch(false, 0)) // 0 = placeholder
case tailCallInd:
c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], false))
default:
panic(i.String())
}

View File

@@ -788,6 +788,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
instr.asDMB()
m.insert(instr)
case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect:
m.lowerTailCall(instr)
default:
panic("TODO: lowering " + op.String())
}

View File

@@ -198,6 +198,11 @@ func (m *machine) postRegAlloc() {
switch cur.kind {
case ret:
m.setupEpilogueAfter(cur.prev)
case tailCall, tailCallInd:
m.setupEpilogueAfter(cur.prev)
// If this has been encoded as a proper tail call, we can remove the trailing instructions.
// For details, see internal/engine/RATIONALE.md
m.removeUntilRet(cur.next)
case loadConstBlockArg:
lc := cur
next := lc.next
@@ -325,6 +330,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
linkInstr(cur, prevNext)
}
// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction.
func (m *machine) removeUntilRet(cur *instruction) {
for ; cur != nil; cur = cur.next {
prev, next := cur.prev, cur.next
prev.next = next
if next != nil {
next.prev = prev
}
if cur.kind == ret {
return
}
}
}
// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
// stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0,
// which always points to the execution context whenever the native code is entered from Go.

View File

@@ -59,13 +59,19 @@ func (m *machine) ResolveRelocations(
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
// Find the near trampoline island from callTrampolineIslandOffsets.
islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset))
islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef)
// Imported functions don't need trampolines, so we ignore them when we compute the offset
// (see also encodeCallTrampolineIsland)
funcOffset := int(r.FuncRef) - importedFns
islandTargetOffset := islandOffset + trampolineCallSize*funcOffset
diff = int64(islandTargetOffset) - (instrOffset)
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
panic("BUG in trampoline placement")
}
}
binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff))
// The unconditional branch instruction is usually encoded as a branch-and-link (BL),
// because it is a function call. However, if the instruction is a tail call,
// we encode it as a plain unconditional branch (B), so we won't overwrite the link register.
binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(!r.IsTailCall, diff))
}
}

View File

@@ -6,8 +6,10 @@ import (
"errors"
"fmt"
"runtime"
"slices"
"sort"
"sync"
"sync/atomic"
"unsafe"
"github.com/tetratelabs/wazero/api"
@@ -23,11 +25,15 @@ import (
)
type (
compiledModuleWithCount struct {
*compiledModule
refCount int
}
// engine implements wasm.Engine.
engine struct {
wazeroVersion string
fileCache filecache.Cache
compiledModules map[wasm.ModuleID]*compiledModule
compiledModules map[wasm.ModuleID]*compiledModuleWithCount
// sortedCompiledModules is a list of compiled modules sorted by the initial address of the executable.
sortedCompiledModules []*compiledModule
mux sync.RWMutex
@@ -42,25 +48,32 @@ type (
}
sharedFunctions struct {
// memoryGrowExecutable is a compiled trampoline executable for memory.grow builtin function.
memoryGrowExecutable []byte
// checkModuleExitCode is a compiled trampoline executable for checking module instance exit code. This
// is used when ensureTermination is true.
checkModuleExitCode []byte
// stackGrowExecutable is a compiled executable for growing stack builtin function.
stackGrowExecutable []byte
// tableGrowExecutable is a compiled trampoline executable for table.grow builtin function.
tableGrowExecutable []byte
// refFuncExecutable is a compiled trampoline executable for ref.func builtin function.
refFuncExecutable []byte
// memoryWait32Executable is a compiled trampoline executable for memory.wait32 builtin function
memoryWait32Executable []byte
// memoryWait64Executable is a compiled trampoline executable for memory.wait64 builtin function
memoryWait64Executable []byte
// memoryNotifyExecutable is a compiled trampoline executable for memory.notify builtin function
memoryNotifyExecutable []byte
listenerBeforeTrampolines map[*wasm.FunctionType][]byte
listenerAfterTrampolines map[*wasm.FunctionType][]byte
// The compiled trampolines executable.
executable []byte
// memoryGrowAddress is the address of memory.grow builtin function.
memoryGrowAddress *byte
// checkModuleExitCodeAddress is the address of checking module instance exit code.
// This is used when ensureTermination is true.
checkModuleExitCodeAddress *byte
// stackGrowAddress is the address of growing stack builtin function.
stackGrowAddress *byte
// tableGrowAddress is the address of table.grow builtin function.
tableGrowAddress *byte
// refFuncAddress is the address of ref.func builtin function.
refFuncAddress *byte
// memoryWait32Address is the address of memory.wait32 builtin function
memoryWait32Address *byte
// memoryWait64Address is the address of memory.wait64 builtin function
memoryWait64Address *byte
// memoryNotifyAddress is the address of memory.notify builtin function
memoryNotifyAddress *byte
listenerTrampolines listenerTrampolines
}
listenerTrampolines = map[*wasm.FunctionType]struct {
executable []byte
before *byte
after *byte
}
// compiledModule is a compiled variant of a wasm.Module and ready to be used for instantiation.
@@ -83,8 +96,9 @@ type (
}
executables struct {
executable []byte
entryPreambles [][]byte
executable []byte
entryPreambles []byte
entryPreamblesPtrs []*byte
}
)
@@ -105,7 +119,7 @@ func NewEngine(ctx context.Context, _ api.CoreFeatures, fc filecache.Cache) wasm
machine := newMachine()
be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
e := &engine{
compiledModules: make(map[wasm.ModuleID]*compiledModule),
compiledModules: make(map[wasm.ModuleID]*compiledModuleWithCount),
setFinalizer: runtime.SetFinalizer,
machine: machine,
be: be,
@@ -164,23 +178,46 @@ func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listene
}
func (exec *executables) compileEntryPreambles(m *wasm.Module, machine backend.Machine, be backend.Compiler) {
exec.entryPreambles = make([][]byte, len(m.TypeSection))
for i := range m.TypeSection {
if len(m.TypeSection) == 0 {
return
}
var preambles []byte
sizes := make([]int, len(m.TypeSection))
for i := range sizes {
typ := &m.TypeSection[i]
sig := frontend.SignatureForWasmFunctionType(typ)
be.Init()
buf := machine.CompileEntryPreamble(&sig)
executable := mmapExecutable(buf)
exec.entryPreambles[i] = executable
preambles = append(preambles, buf...)
align := 15 & -len(preambles) // Align 16-bytes boundary.
preambles = append(preambles, make([]byte, align)...)
sizes[i] = len(buf) + align
}
exec.entryPreambles = mmapExecutable(preambles)
exec.entryPreamblesPtrs = make([]*byte, len(sizes))
offset := 0
for i, size := range sizes {
ptr := &exec.entryPreambles[offset]
exec.entryPreamblesPtrs[i] = ptr
offset += size
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&executable[0])),
uint64(len(executable)), fmt.Sprintf("entry_preamble::type=%s", typ.String()))
typ := &m.TypeSection[i]
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(ptr)),
uint64(size), fmt.Sprintf("entry_preamble::type=%s", typ.String()))
}
}
}
func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (*compiledModule, error) {
if module.IsHostModule {
return e.compileHostModule(ctx, module, listeners)
}
withListener := len(listeners) > 0
cm := &compiledModule{
offsets: wazevoapi.NewModuleContextOffsetData(module, withListener), parent: e, module: module,
@@ -188,116 +225,137 @@ func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listene
executables: &executables{},
}
if module.IsHostModule {
return e.compileHostModule(ctx, module, listeners)
}
importedFns, localFns := int(module.ImportFunctionCount), len(module.FunctionSection)
if localFns == 0 {
return cm, nil
}
rels := make([]backend.RelocationInfo, 0)
refToBinaryOffset := make([]int, importedFns+localFns)
if wazevoapi.DeterministicCompilationVerifierEnabled {
// The compilation must be deterministic regardless of the order of functions being compiled.
wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx)
machine := newMachine()
relocator, err := newEngineRelocator(machine, importedFns, localFns)
if err != nil {
return nil, err
}
needSourceInfo := module.DWARFLines != nil
// Creates new compiler instances which are reused for each function.
ssaBuilder := ssa.NewBuilder()
fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
machine := newMachine()
be := backend.NewCompiler(ctx, machine, ssaBuilder)
cm.executables.compileEntryPreambles(module, machine, be)
totalSize := 0 // Total binary size of the executable.
cm.functionOffsets = make([]int, localFns)
bodies := make([][]byte, localFns)
// Trampoline relocation related variables.
trampolineInterval, callTrampolineIslandSize, err := machine.CallTrampolineIslandInfo(localFns)
if err != nil {
return nil, err
var indexes []int
if wazevoapi.DeterministicCompilationVerifierEnabled {
// The compilation must be deterministic regardless of the order of functions being compiled.
indexes = wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx)
}
needCallTrampoline := callTrampolineIslandSize > 0
var callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands.
for i := range module.CodeSection {
if wazevoapi.DeterministicCompilationVerifierEnabled {
i = wazevoapi.DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx, i)
}
if workers := experimental.GetCompilationWorkers(ctx); workers <= 1 {
// Compile with a single goroutine.
fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
fidx := wasm.Index(i + importedFns)
if wazevoapi.NeedFunctionNameInContext {
def := module.FunctionDefinition(fidx)
name := def.DebugName()
if len(def.ExportNames()) > 0 {
name = def.ExportNames()[0]
for i := range module.CodeSection {
if wazevoapi.DeterministicCompilationVerifierEnabled {
i = indexes[i]
}
ctx = wazevoapi.SetCurrentFunctionName(ctx, i, fmt.Sprintf("[%d/%d]%s", i, len(module.CodeSection)-1, name))
}
needListener := len(listeners) > 0 && listeners[i] != nil
body, relsPerFunc, err := e.compileLocalWasmFunction(ctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
if err != nil {
return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err)
}
fidx := wasm.Index(i + importedFns)
fctx := functionContext(ctx, module, i, fidx)
// Align 16-bytes boundary.
totalSize = (totalSize + 15) &^ 15
cm.functionOffsets[i] = totalSize
if needSourceInfo {
// At the beginning of the function, we add the offset of the function body so that
// we can resolve the source location of the call site of before listener call.
cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize))
cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[i].BodyOffsetInCodeSection)
for _, info := range be.SourceOffsetInfo() {
cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)+uintptr(info.ExecutableOffset))
cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset))
needListener := len(listeners) > i && listeners[i] != nil
body, relsPerFunc, err := e.compileLocalWasmFunction(fctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
if err != nil {
return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err)
}
relocator.appendFunction(fctx, module, cm, i, fidx, body, relsPerFunc, be.SourceOffsetInfo())
}
} else {
// Compile with N worker goroutines.
// Collect compiled functions across workers in a slice,
// to be added to the relocator in-order and resolved serially at the end.
// This uses more memory and CPU (across cores), but can be significantly faster.
type compiledFunc struct {
fctx context.Context
fnum int
fidx wasm.Index
body []byte
relsPerFunc []backend.RelocationInfo
offsPerFunc []backend.SourceOffsetInfo
}
fref := frontend.FunctionIndexToFuncRef(fidx)
refToBinaryOffset[fref] = totalSize
compiledFuncs := make([]compiledFunc, len(module.CodeSection))
ctx, cancel := context.WithCancelCause(ctx)
defer cancel(nil)
// At this point, relocation offsets are relative to the start of the function body,
// so we adjust it to the start of the executable.
for _, r := range relsPerFunc {
r.Offset += int64(totalSize)
rels = append(rels, r)
var count atomic.Uint32
var wg sync.WaitGroup
wg.Add(workers)
for range workers {
go func() {
defer wg.Done()
// Creates new compiler instances which are reused for each function.
machine := newMachine()
ssaBuilder := ssa.NewBuilder()
be := backend.NewCompiler(ctx, machine, ssaBuilder)
fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
for {
if err := ctx.Err(); err != nil {
// Compilation canceled!
return
}
i := int(count.Add(1)) - 1
if i >= len(module.CodeSection) {
return
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
i = indexes[i]
}
fidx := wasm.Index(i + importedFns)
fctx := functionContext(ctx, module, i, fidx)
needListener := len(listeners) > i && listeners[i] != nil
body, relsPerFunc, err := e.compileLocalWasmFunction(fctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
if err != nil {
cancel(fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err))
return
}
compiledFuncs[i] = compiledFunc{
fctx, i, fidx, body,
// These slices are internal to the backend compiler and since we are going to buffer them instead
// of process them immediately we need to copy the memory.
slices.Clone(relsPerFunc),
slices.Clone(be.SourceOffsetInfo()),
}
}
}()
}
bodies[i] = body
totalSize += len(body)
if wazevoapi.PrintMachineCodeHexPerFunction {
fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body))
wg.Wait()
if err := context.Cause(ctx); err != nil {
return nil, err
}
if needCallTrampoline {
// If the total size exceeds the trampoline interval, we need to add a trampoline island.
if totalSize/trampolineInterval > len(callTrampolineIslandOffsets) {
callTrampolineIslandOffsets = append(callTrampolineIslandOffsets, totalSize)
totalSize += callTrampolineIslandSize
}
for i := range compiledFuncs {
fn := &compiledFuncs[i]
relocator.appendFunction(fn.fctx, module, cm, fn.fnum, fn.fidx, fn.body, fn.relsPerFunc, fn.offsPerFunc)
}
}
// Allocate executable memory and then copy the generated machine code.
executable, err := platform.MmapCodeSegment(totalSize)
executable, err := platform.MmapCodeSegment(relocator.totalSize)
if err != nil {
panic(err)
}
cm.executable = executable
for i, b := range bodies {
for i, b := range relocator.bodies {
offset := cm.functionOffsets[i]
copy(executable[offset:], b)
}
@@ -312,22 +370,108 @@ func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listene
}
}
// Resolve relocations for local function calls.
if len(rels) > 0 {
machine.ResolveRelocations(refToBinaryOffset, importedFns, executable, rels, callTrampolineIslandOffsets)
}
relocator.resolveRelocations(machine, executable, importedFns)
if runtime.GOARCH == "arm64" {
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
if err = platform.MprotectRX(executable); err != nil {
return nil, err
}
if err = platform.MprotectRX(executable); err != nil {
return nil, err
}
cm.sharedFunctions = e.sharedFunctions
e.setFinalizer(cm.executables, executablesFinalizer)
return cm, nil
}
func functionContext(ctx context.Context, module *wasm.Module, fnum int, fidx wasm.Index) context.Context {
if wazevoapi.NeedFunctionNameInContext {
def := module.FunctionDefinition(fidx)
name := def.DebugName()
if len(def.ExportNames()) > 0 {
name = def.ExportNames()[0]
}
ctx = wazevoapi.SetCurrentFunctionName(ctx, fnum, fmt.Sprintf("[%d/%d]%s", fnum, len(module.CodeSection)-1, name))
}
return ctx
}
type engineRelocator struct {
bodies [][]byte
refToBinaryOffset []int
rels []backend.RelocationInfo
totalSize int // Total binary size of the executable.
trampolineInterval int
callTrampolineIslandSize int
callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands.
}
func newEngineRelocator(
machine backend.Machine,
importedFns, localFns int,
) (r engineRelocator, err error) {
// Trampoline relocation related variables.
r.trampolineInterval, r.callTrampolineIslandSize, err = machine.CallTrampolineIslandInfo(localFns)
r.refToBinaryOffset = make([]int, importedFns+localFns)
r.bodies = make([][]byte, 0, localFns)
return
}
func (r *engineRelocator) resolveRelocations(machine backend.Machine, executable []byte, importedFns int) {
// Resolve relocations for local function calls.
if len(r.rels) > 0 {
machine.ResolveRelocations(r.refToBinaryOffset, importedFns, executable, r.rels, r.callTrampolineIslandOffsets)
}
}
func (r *engineRelocator) appendFunction(
ctx context.Context,
module *wasm.Module,
cm *compiledModule,
fnum int, fidx wasm.Index,
body []byte,
relsPerFunc []backend.RelocationInfo,
offsPerFunc []backend.SourceOffsetInfo,
) {
// Align 16-bytes boundary.
r.totalSize = (r.totalSize + 15) &^ 15
cm.functionOffsets[fnum] = r.totalSize
needSourceInfo := module.DWARFLines != nil
if needSourceInfo {
// At the beginning of the function, we add the offset of the function body so that
// we can resolve the source location of the call site of before listener call.
cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(r.totalSize))
cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[fnum].BodyOffsetInCodeSection)
for _, info := range offsPerFunc {
cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(r.totalSize)+uintptr(info.ExecutableOffset))
cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset))
}
}
fref := frontend.FunctionIndexToFuncRef(fidx)
r.refToBinaryOffset[fref] = r.totalSize
// At this point, relocation offsets are relative to the start of the function body,
// so we adjust it to the start of the executable.
r.rels = slices.Grow(r.rels, len(relsPerFunc))
for _, rel := range relsPerFunc {
rel.Offset += int64(r.totalSize)
r.rels = append(r.rels, rel)
}
r.totalSize += len(body)
r.bodies = append(r.bodies, body)
if wazevoapi.PrintMachineCodeHexPerFunction {
fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body))
}
if r.callTrampolineIslandSize > 0 {
// If the total size exceeds the trampoline interval, we need to add a trampoline island.
if r.totalSize/r.trampolineInterval > len(r.callTrampolineIslandOffsets) {
r.callTrampolineIslandOffsets = append(r.callTrampolineIslandOffsets, r.totalSize)
r.totalSize += r.callTrampolineIslandSize
}
}
}
func (e *engine) compileLocalWasmFunction(
ctx context.Context,
module *wasm.Module,
@@ -374,9 +518,7 @@ func (e *engine) compileLocalWasmFunction(
}
// TODO: optimize as zero copy.
copied := make([]byte, len(original))
copy(copied, original)
return copied, rels, nil
return slices.Clone(original), rels, nil
}
func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener) (*compiledModule, error) {
@@ -448,9 +590,7 @@ func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, lis
}
// TODO: optimize as zero copy.
copied := make([]byte, len(body))
copy(copied, body)
bodies[i] = copied
bodies[i] = slices.Clone(body)
totalSize += len(body)
}
@@ -475,11 +615,8 @@ func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, lis
wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
}
if runtime.GOARCH == "arm64" {
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
if err = platform.MprotectRX(executable); err != nil {
return nil, err
}
if err = platform.MprotectRX(executable); err != nil {
return nil, err
}
e.setFinalizer(cm.executables, executablesFinalizer)
return cm, nil
@@ -507,12 +644,17 @@ func (e *engine) DeleteCompiledModule(m *wasm.Module) {
e.mux.Lock()
defer e.mux.Unlock()
cm, ok := e.compiledModules[m.ID]
if ok {
if len(cm.executable) > 0 {
e.deleteCompiledModuleFromSortedList(cm)
}
delete(e.compiledModules, m.ID)
if !ok {
return
}
cm.refCount--
if cm.refCount > 0 {
return
}
if len(cm.executable) > 0 {
e.deleteCompiledModuleFromSortedList(cm.compiledModule)
}
delete(e.compiledModules, m.ID)
}
func (e *engine) addCompiledModuleToSortedList(cm *compiledModule) {
@@ -569,7 +711,7 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.
// Note: imported functions are resolved in moduleEngine.ResolveImportedFunction.
me.importedFunctions = make([]importedFunction, m.ImportFunctionCount)
compiled, ok := e.getCompiledModuleFromMemory(m)
compiled, ok := e.getCompiledModuleFromMemory(m, false)
if !ok {
return nil, errors.New("source module must be compiled before instantiation")
}
@@ -591,167 +733,123 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.
}
func (e *engine) compileSharedFunctions() {
e.sharedFunctions = &sharedFunctions{
listenerBeforeTrampolines: make(map[*wasm.FunctionType][]byte),
listenerAfterTrampolines: make(map[*wasm.FunctionType][]byte),
var sizes [8]int
var trampolines []byte
addTrampoline := func(i int, buf []byte) {
trampolines = append(trampolines, buf...)
align := 15 & -len(trampolines) // Align 16-bytes boundary.
trampolines = append(trampolines, make([]byte, align)...)
sizes[i] = len(buf) + align
}
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{
addTrampoline(0,
e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32},
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.memoryGrowExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.memoryGrowExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_grow_trampoline")
}
}
}, false))
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{
addTrampoline(1,
e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */},
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.tableGrowExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.tableGrowExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "table_grow_trampoline")
}
}
}, false))
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{
addTrampoline(2,
e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI32 /* exec context */},
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.checkModuleExitCode = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.checkModuleExitCode
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "check_module_exit_code_trampoline")
}
}
}, false))
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{
addTrampoline(3,
e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* function index */},
Results: []ssa.Type{ssa.TypeI64}, // returns the function reference.
}, false)
e.sharedFunctions.refFuncExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.refFuncExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "ref_func_trampoline")
}
}
}, false))
e.be.Init()
{
src := e.machine.CompileStackGrowCallSequence()
e.sharedFunctions.stackGrowExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.stackGrowExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "stack_grow_trampoline")
}
}
addTrampoline(4, e.machine.CompileStackGrowCallSequence())
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{
addTrampoline(5,
e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{
// exec context, timeout, expected, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
// Returns the status.
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.memoryWait32Executable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.memoryWait32Executable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait32_trampoline")
}
}
}, false))
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{
addTrampoline(6,
e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{
// exec context, timeout, expected, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
// Returns the status.
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.memoryWait64Executable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.memoryWait64Executable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait64_trampoline")
}
}
}, false))
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{
addTrampoline(7,
e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{
// exec context, count, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
// Returns the number notified.
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.memoryNotifyExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.memoryNotifyExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_notify_trampoline")
}
}, false))
fns := &sharedFunctions{
executable: mmapExecutable(trampolines),
listenerTrampolines: make(listenerTrampolines),
}
e.setFinalizer(fns, sharedFunctionsFinalizer)
offset := 0
fns.memoryGrowAddress = &fns.executable[offset]
offset += sizes[0]
fns.tableGrowAddress = &fns.executable[offset]
offset += sizes[1]
fns.checkModuleExitCodeAddress = &fns.executable[offset]
offset += sizes[2]
fns.refFuncAddress = &fns.executable[offset]
offset += sizes[3]
fns.stackGrowAddress = &fns.executable[offset]
offset += sizes[4]
fns.memoryWait32Address = &fns.executable[offset]
offset += sizes[5]
fns.memoryWait64Address = &fns.executable[offset]
offset += sizes[6]
fns.memoryNotifyAddress = &fns.executable[offset]
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryGrowAddress)), uint64(sizes[0]), "memory_grow_trampoline")
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.tableGrowAddress)), uint64(sizes[1]), "table_grow_trampoline")
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.checkModuleExitCodeAddress)), uint64(sizes[2]), "check_module_exit_code_trampoline")
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.refFuncAddress)), uint64(sizes[3]), "ref_func_trampoline")
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.stackGrowAddress)), uint64(sizes[4]), "stack_grow_trampoline")
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryWait32Address)), uint64(sizes[5]), "memory_wait32_trampoline")
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryWait64Address)), uint64(sizes[6]), "memory_wait64_trampoline")
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryNotifyAddress)), uint64(sizes[7]), "memory_notify_trampoline")
}
e.setFinalizer(e.sharedFunctions, sharedFunctionsFinalizer)
e.sharedFunctions = fns
}
func sharedFunctionsFinalizer(sf *sharedFunctions) {
if err := platform.MunmapCodeSegment(sf.memoryGrowExecutable); err != nil {
if err := platform.MunmapCodeSegment(sf.executable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.checkModuleExitCode); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.stackGrowExecutable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.tableGrowExecutable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.refFuncExecutable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.memoryWait32Executable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.memoryWait64Executable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.memoryNotifyExecutable); err != nil {
panic(err)
}
for _, f := range sf.listenerBeforeTrampolines {
if err := platform.MunmapCodeSegment(f); err != nil {
panic(err)
}
}
for _, f := range sf.listenerAfterTrampolines {
if err := platform.MunmapCodeSegment(f); err != nil {
for _, f := range sf.listenerTrampolines {
if err := platform.MunmapCodeSegment(f.executable); err != nil {
panic(err)
}
}
sf.memoryGrowExecutable = nil
sf.checkModuleExitCode = nil
sf.stackGrowExecutable = nil
sf.tableGrowExecutable = nil
sf.refFuncExecutable = nil
sf.memoryWait32Executable = nil
sf.memoryWait64Executable = nil
sf.memoryNotifyExecutable = nil
sf.listenerBeforeTrampolines = nil
sf.listenerAfterTrampolines = nil
sf.executable = nil
sf.listenerTrampolines = nil
}
func executablesFinalizer(exec *executables) {
@@ -762,12 +860,13 @@ func executablesFinalizer(exec *executables) {
}
exec.executable = nil
for _, f := range exec.entryPreambles {
if err := platform.MunmapCodeSegment(f); err != nil {
if len(exec.entryPreambles) > 0 {
if err := platform.MunmapCodeSegment(exec.entryPreambles); err != nil {
panic(err)
}
}
exec.entryPreambles = nil
exec.entryPreamblesPtrs = nil
}
func mmapExecutable(src []byte) []byte {
@@ -778,11 +877,8 @@ func mmapExecutable(src []byte) []byte {
copy(executable, src)
if runtime.GOARCH == "arm64" {
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
if err = platform.MprotectRX(executable); err != nil {
panic(err)
}
if err = platform.MprotectRX(executable); err != nil {
panic(err)
}
return executable
}
@@ -804,25 +900,30 @@ func (e *engine) getListenerTrampolineForType(functionType *wasm.FunctionType) (
e.mux.Lock()
defer e.mux.Unlock()
beforeBuf, ok := e.sharedFunctions.listenerBeforeTrampolines[functionType]
afterBuf := e.sharedFunctions.listenerAfterTrampolines[functionType]
if ok {
return &beforeBuf[0], &afterBuf[0]
trampoline, ok := e.sharedFunctions.listenerTrampolines[functionType]
if !ok {
var executable []byte
beforeSig, afterSig := frontend.SignatureForListener(functionType)
e.be.Init()
buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false)
executable = append(executable, buf...)
align := 15 & -len(executable) // Align 16-bytes boundary.
executable = append(executable, make([]byte, align)...)
offset := len(executable)
e.be.Init()
buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false)
executable = append(executable, buf...)
trampoline.executable = mmapExecutable(executable)
trampoline.before = &trampoline.executable[0]
trampoline.after = &trampoline.executable[offset]
e.sharedFunctions.listenerTrampolines[functionType] = trampoline
}
beforeSig, afterSig := frontend.SignatureForListener(functionType)
e.be.Init()
buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false)
beforeBuf = mmapExecutable(buf)
e.be.Init()
buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false)
afterBuf = mmapExecutable(buf)
e.sharedFunctions.listenerBeforeTrampolines[functionType] = beforeBuf
e.sharedFunctions.listenerAfterTrampolines[functionType] = afterBuf
return &beforeBuf[0], &afterBuf[0]
return trampoline.before, trampoline.after
}
func (cm *compiledModule) getSourceOffset(pc uintptr) uint64 {

View File

@@ -8,7 +8,6 @@ import (
"fmt"
"hash/crc32"
"io"
"runtime"
"unsafe"
"github.com/tetratelabs/wazero/experimental"
@@ -33,7 +32,7 @@ func fileCacheKey(m *wasm.Module) (ret filecache.Key) {
s.Write(magic)
// Write the CPU features so that we can cache the compiled module for the same CPU.
// This prevents the incompatible CPU features from being used.
cpu := platform.CpuFeatures.Raw()
cpu := platform.CpuFeatures().Raw()
// Reuse the `ret` buffer to write the first 8 bytes of the CPU features so that we can avoid the allocation.
binary.LittleEndian.PutUint64(ret[:8], cpu)
s.Write(ret[:8])
@@ -51,7 +50,7 @@ func (e *engine) addCompiledModule(module *wasm.Module, cm *compiledModule) (err
}
func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (cm *compiledModule, ok bool, err error) {
cm, ok = e.getCompiledModuleFromMemory(module)
cm, ok = e.getCompiledModuleFromMemory(module, true)
if ok {
return
}
@@ -88,16 +87,23 @@ func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental
func (e *engine) addCompiledModuleToMemory(m *wasm.Module, cm *compiledModule) {
e.mux.Lock()
defer e.mux.Unlock()
e.compiledModules[m.ID] = cm
e.compiledModules[m.ID] = &compiledModuleWithCount{compiledModule: cm, refCount: 1}
if len(cm.executable) > 0 {
e.addCompiledModuleToSortedList(cm)
}
}
func (e *engine) getCompiledModuleFromMemory(module *wasm.Module) (cm *compiledModule, ok bool) {
e.mux.RLock()
defer e.mux.RUnlock()
cm, ok = e.compiledModules[module.ID]
func (e *engine) getCompiledModuleFromMemory(module *wasm.Module, increaseRefCount bool) (cm *compiledModule, ok bool) {
e.mux.Lock()
defer e.mux.Unlock()
cmWithCount, ok := e.compiledModules[module.ID]
if ok {
cm = cmWithCount.compiledModule
if increaseRefCount {
cmWithCount.refCount++
}
}
return
}
@@ -246,11 +252,8 @@ func deserializeCompiledModule(wazeroVersion string, reader io.ReadCloser) (cm *
return nil, false, fmt.Errorf("compilationcache: checksum mismatch (expected %d, got %d)", expected, checksum)
}
if runtime.GOARCH == "arm64" {
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
if err = platform.MprotectRX(executable); err != nil {
return nil, false, err
}
if err = platform.MprotectRX(executable); err != nil {
return nil, false, err
}
cm.executable = executable
}

View File

@@ -469,7 +469,7 @@ func (c *Compiler) allocateVarLengthValues(_cap int, vs ...ssa.Value) ssa.Values
builder := c.ssaBuilder
pool := builder.VarLengthPool()
args := pool.Allocate(_cap)
args = args.Append(builder.VarLengthPool(), vs...)
args = args.Append(pool, vs...)
return args
}

View File

@@ -123,8 +123,7 @@ func (c *Compiler) nPeekDup(n int) ssa.Values {
l := c.state()
tail := len(l.values)
args := c.allocateVarLengthValues(n)
args = args.Append(c.ssaBuilder.VarLengthPool(), l.values[tail-n:tail]...)
args := c.allocateVarLengthValues(n, l.values[tail-n:tail]...)
return args
}
@@ -665,19 +664,22 @@ func (c *Compiler) lowerCurrentOpcode() {
tableBaseAddr := c.loadTableBaseAddr(tableInstancePtr)
addr := builder.AllocateInstruction().AsIadd(tableBaseAddr, offsetInBytes).Insert(builder).Return()
// Prepare the loop and following block.
beforeLoop := builder.AllocateBasicBlock()
loopBlk := builder.AllocateBasicBlock()
loopVar := loopBlk.AddParam(builder, ssa.TypeI64)
followingBlk := builder.AllocateBasicBlock()
// Uses the copy trick for faster filling buffer like memory.fill, but in this case we copy 8 bytes at a time.
// Tables are rarely huge, so ignore the 8KB maximum.
// https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517
//
// buf := memoryInst.Buffer[offset : offset+fillSize]
// buf[0:8] = value
// for i := 8; i < fillSize; i *= 2 { Begin with 8 bytes.
// copy(buf[i:], buf[:i])
// }
// Prepare the loop and following block.
beforeLoop := builder.AllocateBasicBlock()
loopBlk := builder.AllocateBasicBlock()
loopVar := loopBlk.AddParam(builder, ssa.TypeI64)
followingBlk := builder.AllocateBasicBlock()
// Insert the jump to the beforeLoop block; If the fillSize is zero, then jump to the following block to skip entire logics.
zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return()
ifFillSizeZero := builder.AllocateInstruction().AsIcmp(fillSizeExt, zero, ssa.IntegerCmpCondEqual).
@@ -688,32 +690,24 @@ func (c *Compiler) lowerCurrentOpcode() {
// buf[0:8] = value
builder.SetCurrentBlock(beforeLoop)
builder.AllocateInstruction().AsStore(ssa.OpcodeStore, value, addr, 0).Insert(builder)
initValue := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return()
c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk)
eight := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return()
c.insertJumpToBlock(c.allocateVarLengthValues(1, eight), loopBlk)
builder.SetCurrentBlock(loopBlk)
dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return()
// If loopVar*2 > fillSizeInBytes, then count must be fillSizeInBytes-loopVar.
var count ssa.Value
{
loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
loopVarDoubledLargerThanFillSize := builder.
AllocateInstruction().AsIcmp(loopVarDoubled, fillSizeInBytes, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual).
Insert(builder).Return()
diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return()
count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return()
}
newLoopVar := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
newLoopVarLessThanFillSize := builder.AllocateInstruction().
AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
// On the last iteration, count must be fillSizeInBytes-loopVar.
diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return()
count := builder.AllocateInstruction().AsSelect(newLoopVarLessThanFillSize, loopVar, diff).Insert(builder).Return()
c.callMemmove(dstAddr, addr, count)
shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return()
loopVarLessThanFillSize := builder.AllocateInstruction().
AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
builder.AllocateInstruction().
AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
AsBrnz(newLoopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
Insert(builder)
c.insertJumpToBlock(ssa.ValuesNil, followingBlk)
@@ -741,11 +735,15 @@ func (c *Compiler) lowerCurrentOpcode() {
// Calculate the base address:
addr := builder.AllocateInstruction().AsIadd(c.getMemoryBaseValue(false), offset).Insert(builder).Return()
// Uses the copy trick for faster filling buffer: https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
// Uses the copy trick for faster filling buffer, with a maximum chunk size of 8KB.
// https://github.com/golang/go/blob/go1.24.0/src/bytes/bytes.go#L664-L673
//
// buf := memoryInst.Buffer[offset : offset+fillSize]
// buf[0] = value
// for i := 1; i < fillSize; i *= 2 {
// copy(buf[i:], buf[:i])
// for i := 1; i < fillSize; {
// chunk := ((i - 1) & 8191) + 1
// copy(buf[i:], buf[:chunk])
// i += chunk
// }
// Prepare the loop and following block.
@@ -764,32 +762,31 @@ func (c *Compiler) lowerCurrentOpcode() {
// buf[0] = value
builder.SetCurrentBlock(beforeLoop)
builder.AllocateInstruction().AsStore(ssa.OpcodeIstore8, value, addr, 0).Insert(builder)
initValue := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk)
one := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
c.insertJumpToBlock(c.allocateVarLengthValues(1, one), loopBlk)
builder.SetCurrentBlock(loopBlk)
dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return()
// If loopVar*2 > fillSizeExt, then count must be fillSizeExt-loopVar.
var count ssa.Value
{
loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
loopVarDoubledLargerThanFillSize := builder.
AllocateInstruction().AsIcmp(loopVarDoubled, fillSize, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual).
Insert(builder).Return()
diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return()
count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return()
}
// chunk := ((i - 1) & 8191) + 1
mask := builder.AllocateInstruction().AsIconst64(8191).Insert(builder).Return()
tmp1 := builder.AllocateInstruction().AsIsub(loopVar, one).Insert(builder).Return()
tmp2 := builder.AllocateInstruction().AsBand(tmp1, mask).Insert(builder).Return()
chunk := builder.AllocateInstruction().AsIadd(tmp2, one).Insert(builder).Return()
// i += chunk
newLoopVar := builder.AllocateInstruction().AsIadd(loopVar, chunk).Insert(builder).Return()
newLoopVarLessThanFillSize := builder.AllocateInstruction().
AsIcmp(newLoopVar, fillSize, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
// count = min(chunk, fillSize-loopVar)
diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return()
count := builder.AllocateInstruction().AsSelect(newLoopVarLessThanFillSize, chunk, diff).Insert(builder).Return()
c.callMemmove(dstAddr, addr, count)
shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return()
loopVarLessThanFillSize := builder.AllocateInstruction().
AsIcmp(newLoopVar, fillSize, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
builder.AllocateInstruction().
AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
AsBrnz(newLoopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
Insert(builder)
c.insertJumpToBlock(ssa.ValuesNil, followingBlk)
@@ -1173,7 +1170,7 @@ func (c *Compiler) lowerCurrentOpcode() {
ssa.TypeI64,
).Insert(builder).Return()
args := c.allocateVarLengthValues(1, c.execCtxPtrValue, pages)
args := c.allocateVarLengthValues(2, c.execCtxPtrValue, pages)
callGrowRet := builder.
AllocateInstruction().
AsCallIndirect(memoryGrowPtr, &c.memoryGrowSig, args).
@@ -1343,8 +1340,7 @@ func (c *Compiler) lowerCurrentOpcode() {
blockType: bt,
})
args := c.allocateVarLengthValues(originalLen)
args = args.Append(builder.VarLengthPool(), state.values[originalLen:]...)
args := c.allocateVarLengthValues(len(bt.Params), state.values[originalLen:]...)
// Insert the jump to the header of loop.
br := builder.AllocateInstruction()
@@ -1383,8 +1379,7 @@ func (c *Compiler) lowerCurrentOpcode() {
// multiple definitions (one in Then and another in Else blocks).
c.addBlockParamsFromWasmTypes(bt.Results, followingBlk)
args := c.allocateVarLengthValues(len(bt.Params))
args = args.Append(builder.VarLengthPool(), state.values[len(state.values)-len(bt.Params):]...)
args := c.allocateVarLengthValues(len(bt.Params), state.values[len(state.values)-len(bt.Params):]...)
// Insert the conditional jump to the Else block.
brz := builder.AllocateInstruction()
@@ -1568,11 +1563,7 @@ func (c *Compiler) lowerCurrentOpcode() {
c.callListenerAfter()
}
results := c.nPeekDup(c.results())
instr := builder.AllocateInstruction()
instr.AsReturn(results)
builder.InsertInstruction(instr)
c.lowerReturn(builder)
state.unreachable = true
case wasm.OpcodeUnreachable:
@@ -1597,66 +1588,7 @@ func (c *Compiler) lowerCurrentOpcode() {
if state.unreachable {
break
}
var typIndex wasm.Index
if fnIndex < c.m.ImportFunctionCount {
// Before transfer the control to the callee, we have to store the current module's moduleContextPtr
// into execContext.callerModuleContextPtr in case when the callee is a Go function.
c.storeCallerModuleContext()
var fi int
for i := range c.m.ImportSection {
imp := &c.m.ImportSection[i]
if imp.Type == wasm.ExternTypeFunc {
if fi == int(fnIndex) {
typIndex = imp.DescFunc
break
}
fi++
}
}
} else {
typIndex = c.m.FunctionSection[fnIndex-c.m.ImportFunctionCount]
}
typ := &c.m.TypeSection[typIndex]
argN := len(typ.Params)
tail := len(state.values) - argN
vs := state.values[tail:]
state.values = state.values[:tail]
args := c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue)
sig := c.signatures[typ]
call := builder.AllocateInstruction()
if fnIndex >= c.m.ImportFunctionCount {
args = args.Append(builder.VarLengthPool(), c.moduleCtxPtrValue) // This case the callee module is itself.
args = args.Append(builder.VarLengthPool(), vs...)
call.AsCall(FunctionIndexToFuncRef(fnIndex), sig, args)
builder.InsertInstruction(call)
} else {
// This case we have to read the address of the imported function from the module context.
moduleCtx := c.moduleCtxPtrValue
loadFuncPtr, loadModuleCtxPtr := builder.AllocateInstruction(), builder.AllocateInstruction()
funcPtrOffset, moduleCtxPtrOffset, _ := c.offset.ImportedFunctionOffset(fnIndex)
loadFuncPtr.AsLoad(moduleCtx, funcPtrOffset.U32(), ssa.TypeI64)
loadModuleCtxPtr.AsLoad(moduleCtx, moduleCtxPtrOffset.U32(), ssa.TypeI64)
builder.InsertInstruction(loadFuncPtr)
builder.InsertInstruction(loadModuleCtxPtr)
args = args.Append(builder.VarLengthPool(), loadModuleCtxPtr.Return())
args = args.Append(builder.VarLengthPool(), vs...)
call.AsCallIndirect(loadFuncPtr.Return(), sig, args)
builder.InsertInstruction(call)
}
first, rest := call.Returns()
if first.Valid() {
state.push(first)
}
for _, v := range rest {
state.push(v)
}
c.reloadAfterCall()
c.lowerCall(fnIndex)
case wasm.OpcodeDrop:
if state.unreachable {
@@ -3190,7 +3122,7 @@ func (c *Compiler) lowerCurrentOpcode() {
ssa.TypeI64,
).Insert(builder).Return()
args := c.allocateVarLengthValues(3, c.execCtxPtrValue, timeout, exp, addr)
args := c.allocateVarLengthValues(4, c.execCtxPtrValue, timeout, exp, addr)
memoryWaitRet := builder.AllocateInstruction().
AsCallIndirect(memoryWaitPtr, sig, args).
Insert(builder).Return()
@@ -3211,7 +3143,7 @@ func (c *Compiler) lowerCurrentOpcode() {
wazevoapi.ExecutionContextOffsetMemoryNotifyTrampolineAddress.U32(),
ssa.TypeI64,
).Insert(builder).Return()
args := c.allocateVarLengthValues(2, c.execCtxPtrValue, count, addr)
args := c.allocateVarLengthValues(3, c.execCtxPtrValue, count, addr)
memoryNotifyRet := builder.AllocateInstruction().
AsCallIndirect(memoryNotifyPtr, &c.memoryNotifySig, args).
Insert(builder).Return()
@@ -3460,6 +3392,25 @@ func (c *Compiler) lowerCurrentOpcode() {
elementAddr := c.lowerAccessTableWithBoundsCheck(tableIndex, targetOffsetInTable)
loaded := builder.AllocateInstruction().AsLoad(elementAddr, 0, ssa.TypeI64).Insert(builder).Return()
state.push(loaded)
case wasm.OpcodeTailCallReturnCallIndirect:
typeIndex := c.readI32u()
tableIndex := c.readI32u()
if state.unreachable {
break
}
_, _ = typeIndex, tableIndex
c.lowerTailCallReturnCallIndirect(typeIndex, tableIndex)
state.unreachable = true
case wasm.OpcodeTailCallReturnCall:
fnIndex := c.readI32u()
if state.unreachable {
break
}
c.lowerTailCallReturnCall(fnIndex)
state.unreachable = true
default:
panic("TODO: unsupported in wazevo yet: " + wasm.InstructionName(op))
}
@@ -3473,6 +3424,14 @@ func (c *Compiler) lowerCurrentOpcode() {
c.loweringState.pc++
}
func (c *Compiler) lowerReturn(builder ssa.Builder) {
results := c.nPeekDup(c.results())
instr := builder.AllocateInstruction()
instr.AsReturn(results)
builder.InsertInstruction(instr)
}
func (c *Compiler) lowerExtMul(v1, v2 ssa.Value, from, to ssa.VecLane, signed, low bool) ssa.Value {
// TODO: The sequence `Widen; Widen; VIMul` can be substituted for a single instruction on some ISAs.
builder := c.ssaBuilder
@@ -3533,7 +3492,83 @@ func (c *Compiler) lowerAccessTableWithBoundsCheck(tableIndex uint32, elementOff
return calcElementAddressInTable.Return()
}
func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) {
func (c *Compiler) prepareCall(fnIndex uint32) (isIndirect bool, sig *ssa.Signature, args ssa.Values, funcRefOrPtrValue uint64) {
builder := c.ssaBuilder
state := c.state()
var typIndex wasm.Index
if fnIndex < c.m.ImportFunctionCount {
// Before transfer the control to the callee, we have to store the current module's moduleContextPtr
// into execContext.callerModuleContextPtr in case when the callee is a Go function.
c.storeCallerModuleContext()
var fi int
for i := range c.m.ImportSection {
imp := &c.m.ImportSection[i]
if imp.Type == wasm.ExternTypeFunc {
if fi == int(fnIndex) {
typIndex = imp.DescFunc
break
}
fi++
}
}
} else {
typIndex = c.m.FunctionSection[fnIndex-c.m.ImportFunctionCount]
}
typ := &c.m.TypeSection[typIndex]
argN := len(typ.Params)
tail := len(state.values) - argN
vs := state.values[tail:]
state.values = state.values[:tail]
args = c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue)
sig = c.signatures[typ]
if fnIndex >= c.m.ImportFunctionCount {
args = args.Append(builder.VarLengthPool(), c.moduleCtxPtrValue) // This case the callee module is itself.
args = args.Append(builder.VarLengthPool(), vs...)
return false, sig, args, uint64(FunctionIndexToFuncRef(fnIndex))
} else {
// This case we have to read the address of the imported function from the module context.
moduleCtx := c.moduleCtxPtrValue
loadFuncPtr, loadModuleCtxPtr := builder.AllocateInstruction(), builder.AllocateInstruction()
funcPtrOffset, moduleCtxPtrOffset, _ := c.offset.ImportedFunctionOffset(fnIndex)
loadFuncPtr.AsLoad(moduleCtx, funcPtrOffset.U32(), ssa.TypeI64)
loadModuleCtxPtr.AsLoad(moduleCtx, moduleCtxPtrOffset.U32(), ssa.TypeI64)
builder.InsertInstruction(loadFuncPtr)
builder.InsertInstruction(loadModuleCtxPtr)
args = args.Append(builder.VarLengthPool(), loadModuleCtxPtr.Return())
args = args.Append(builder.VarLengthPool(), vs...)
return true, sig, args, uint64(loadFuncPtr.Return())
}
}
func (c *Compiler) lowerCall(fnIndex uint32) {
builder := c.ssaBuilder
state := c.state()
isIndirect, sig, args, funcRefOrPtrValue := c.prepareCall(fnIndex)
call := builder.AllocateInstruction()
if isIndirect {
call.AsCallIndirect(ssa.Value(funcRefOrPtrValue), sig, args)
} else {
call.AsCall(ssa.FuncRef(funcRefOrPtrValue), sig, args)
}
builder.InsertInstruction(call)
first, rest := call.Returns()
if first.Valid() {
state.push(first)
}
for _, v := range rest {
state.push(v)
}
c.reloadAfterCall()
}
func (c *Compiler) prepareCallIndirect(typeIndex, tableIndex uint32) (ssa.Value, *wasm.FunctionType, ssa.Values) {
builder := c.ssaBuilder
state := c.state()
@@ -3601,6 +3636,14 @@ func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) {
// into execContext.callerModuleContextPtr in case when the callee is a Go function.
c.storeCallerModuleContext()
return executablePtr, typ, args
}
func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) {
builder := c.ssaBuilder
state := c.state()
executablePtr, typ, args := c.prepareCallIndirect(typeIndex, tableIndex)
call := builder.AllocateInstruction()
call.AsCallIndirect(executablePtr, c.signatures[typ], args)
builder.InsertInstruction(call)
@@ -3616,6 +3659,62 @@ func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) {
c.reloadAfterCall()
}
func (c *Compiler) lowerTailCallReturnCall(fnIndex uint32) {
isIndirect, sig, args, funcRefOrPtrValue := c.prepareCall(fnIndex)
builder := c.ssaBuilder
state := c.state()
call := builder.AllocateInstruction()
if isIndirect {
call.AsTailCallReturnCallIndirect(ssa.Value(funcRefOrPtrValue), sig, args)
} else {
call.AsTailCallReturnCall(ssa.FuncRef(funcRefOrPtrValue), sig, args)
}
builder.InsertInstruction(call)
// In a proper tail call, the following code is unreachable since execution
// transfers to the callee. However, sometimes the backend might need to fall back to
// a regular call, so we include return handling and let the backend delete it
// when redundant.
// For details, see internal/engine/RATIONALE.md
first, rest := call.Returns()
if first.Valid() {
state.push(first)
}
for _, v := range rest {
state.push(v)
}
c.reloadAfterCall()
c.lowerReturn(builder)
}
func (c *Compiler) lowerTailCallReturnCallIndirect(typeIndex, tableIndex uint32) {
builder := c.ssaBuilder
state := c.state()
executablePtr, typ, args := c.prepareCallIndirect(typeIndex, tableIndex)
call := builder.AllocateInstruction()
call.AsTailCallReturnCallIndirect(executablePtr, c.signatures[typ], args)
builder.InsertInstruction(call)
// In a proper tail call, the following code is unreachable since execution
// transfers to the callee. However, sometimes the backend might need to fall back to
// a regular call, so we include return handling and let the backend delete it
// when redundant.
// For details, see internal/engine/RATIONALE.md
first, rest := call.Returns()
if first.Valid() {
state.push(first)
}
for _, v := range rest {
state.push(v)
}
c.reloadAfterCall()
c.lowerReturn(builder)
}
// memOpSetup inserts the bounds check and calculates the address of the memory operation (loads/stores).
func (c *Compiler) memOpSetup(baseAddr ssa.Value, constOffset, operationSizeInBytes uint64) (address ssa.Value) {
address = ssa.ValueInvalid

View File

@@ -174,20 +174,21 @@ func (m *moduleEngine) NewFunction(index wasm.Index) api.Function {
indexInModule: index,
executable: &p.executable[offset],
parent: m,
preambleExecutable: &m.parent.entryPreambles[typIndex][0],
preambleExecutable: p.entryPreamblesPtrs[typIndex],
sizeOfParamResultSlice: sizeOfParamResultSlice,
requiredParams: typ.ParamNumInUint64,
numberOfResults: typ.ResultNumInUint64,
}
ce.execCtx.memoryGrowTrampolineAddress = &m.parent.sharedFunctions.memoryGrowExecutable[0]
ce.execCtx.stackGrowCallTrampolineAddress = &m.parent.sharedFunctions.stackGrowExecutable[0]
ce.execCtx.checkModuleExitCodeTrampolineAddress = &m.parent.sharedFunctions.checkModuleExitCode[0]
ce.execCtx.tableGrowTrampolineAddress = &m.parent.sharedFunctions.tableGrowExecutable[0]
ce.execCtx.refFuncTrampolineAddress = &m.parent.sharedFunctions.refFuncExecutable[0]
ce.execCtx.memoryWait32TrampolineAddress = &m.parent.sharedFunctions.memoryWait32Executable[0]
ce.execCtx.memoryWait64TrampolineAddress = &m.parent.sharedFunctions.memoryWait64Executable[0]
ce.execCtx.memoryNotifyTrampolineAddress = &m.parent.sharedFunctions.memoryNotifyExecutable[0]
sharedFunctions := p.sharedFunctions
ce.execCtx.memoryGrowTrampolineAddress = sharedFunctions.memoryGrowAddress
ce.execCtx.stackGrowCallTrampolineAddress = sharedFunctions.stackGrowAddress
ce.execCtx.checkModuleExitCodeTrampolineAddress = sharedFunctions.checkModuleExitCodeAddress
ce.execCtx.tableGrowTrampolineAddress = sharedFunctions.tableGrowAddress
ce.execCtx.refFuncTrampolineAddress = sharedFunctions.refFuncAddress
ce.execCtx.memoryWait32TrampolineAddress = sharedFunctions.memoryWait32Address
ce.execCtx.memoryWait64TrampolineAddress = sharedFunctions.memoryWait64Address
ce.execCtx.memoryNotifyTrampolineAddress = sharedFunctions.memoryNotifyAddress
ce.execCtx.memmoveAddress = memmovPtr
ce.init()
return ce

View File

@@ -633,6 +633,14 @@ const (
// OpcodeFence is a memory fence operation.
OpcodeFence
// OpcodeTailCallReturnCall is the equivalent of OpcodeCall (a "near" call)
// for tail calls. Semantically, it combines Call + Return into a single operation.
OpcodeTailCallReturnCall
// OpcodeTailCallReturnCallIndirect is the equivalent of OpcodeCallIndirect (a call to a function address)
// for tail calls. Semantically, it combines CallIndirect + Return into a single operation.
OpcodeTailCallReturnCallIndirect
// opcodeEnd marks the end of the opcode list.
opcodeEnd
)
@@ -679,12 +687,44 @@ func (op AtomicRmwOp) String() string {
type returnTypesFn func(b *builder, instr *Instruction) (t1 Type, ts []Type)
var (
returnTypesFnNoReturns returnTypesFn = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return typeInvalid, nil }
returnTypesFnSingle = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return instr.typ, nil }
returnTypesFnI32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeI32, nil }
returnTypesFnF32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF32, nil }
returnTypesFnF64 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF64, nil }
returnTypesFnV128 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeV128, nil }
returnTypesFnNoReturns returnTypesFn = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return typeInvalid, nil }
returnTypesFnSingle = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return instr.typ, nil }
returnTypesFnI32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeI32, nil }
returnTypesFnF32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF32, nil }
returnTypesFnF64 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF64, nil }
returnTypesFnV128 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeV128, nil }
returnTypesFnCallIndirect = func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
sigID := SignatureID(instr.u1)
sig, ok := b.signatures[sigID]
if !ok {
panic("BUG")
}
switch len(sig.Results) {
case 0:
t1 = typeInvalid
case 1:
t1 = sig.Results[0]
default:
t1, ts = sig.Results[0], sig.Results[1:]
}
return
}
returnTypesFnCall = func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
sigID := SignatureID(instr.u2)
sig, ok := b.signatures[sigID]
if !ok {
panic("BUG")
}
switch len(sig.Results) {
case 0:
t1 = typeInvalid
case 1:
t1 = sig.Results[0]
default:
t1, ts = sig.Results[0], sig.Results[1:]
}
return
}
)
// sideEffect provides the info to determine if an instruction has side effects which
@@ -846,6 +886,8 @@ var instructionSideEffects = [opcodeEnd]sideEffect{
OpcodeAtomicStore: sideEffectStrict,
OpcodeAtomicCas: sideEffectStrict,
OpcodeFence: sideEffectStrict,
OpcodeTailCallReturnCall: sideEffectStrict,
OpcodeTailCallReturnCallIndirect: sideEffectStrict,
OpcodeWideningPairwiseDotProductS: sideEffectNone,
}
@@ -860,105 +902,75 @@ func (i *Instruction) sideEffect() sideEffect {
// instructionReturnTypes provides the function to determine the return types of an instruction.
var instructionReturnTypes = [opcodeEnd]returnTypesFn{
OpcodeExtIaddPairwise: returnTypesFnV128,
OpcodeVbor: returnTypesFnV128,
OpcodeVbxor: returnTypesFnV128,
OpcodeVband: returnTypesFnV128,
OpcodeVbnot: returnTypesFnV128,
OpcodeVbandnot: returnTypesFnV128,
OpcodeVbitselect: returnTypesFnV128,
OpcodeVanyTrue: returnTypesFnI32,
OpcodeVallTrue: returnTypesFnI32,
OpcodeVhighBits: returnTypesFnI32,
OpcodeVIadd: returnTypesFnV128,
OpcodeVSaddSat: returnTypesFnV128,
OpcodeVUaddSat: returnTypesFnV128,
OpcodeVIsub: returnTypesFnV128,
OpcodeVSsubSat: returnTypesFnV128,
OpcodeVUsubSat: returnTypesFnV128,
OpcodeVIcmp: returnTypesFnV128,
OpcodeVImin: returnTypesFnV128,
OpcodeVUmin: returnTypesFnV128,
OpcodeVImax: returnTypesFnV128,
OpcodeVUmax: returnTypesFnV128,
OpcodeVImul: returnTypesFnV128,
OpcodeVAvgRound: returnTypesFnV128,
OpcodeVIabs: returnTypesFnV128,
OpcodeVIneg: returnTypesFnV128,
OpcodeVIpopcnt: returnTypesFnV128,
OpcodeVIshl: returnTypesFnV128,
OpcodeVSshr: returnTypesFnV128,
OpcodeVUshr: returnTypesFnV128,
OpcodeExtractlane: returnTypesFnSingle,
OpcodeInsertlane: returnTypesFnV128,
OpcodeBand: returnTypesFnSingle,
OpcodeFcopysign: returnTypesFnSingle,
OpcodeBitcast: returnTypesFnSingle,
OpcodeBor: returnTypesFnSingle,
OpcodeBxor: returnTypesFnSingle,
OpcodeRotl: returnTypesFnSingle,
OpcodeRotr: returnTypesFnSingle,
OpcodeIshl: returnTypesFnSingle,
OpcodeSshr: returnTypesFnSingle,
OpcodeSdiv: returnTypesFnSingle,
OpcodeSrem: returnTypesFnSingle,
OpcodeUdiv: returnTypesFnSingle,
OpcodeUrem: returnTypesFnSingle,
OpcodeUshr: returnTypesFnSingle,
OpcodeJump: returnTypesFnNoReturns,
OpcodeUndefined: returnTypesFnNoReturns,
OpcodeIconst: returnTypesFnSingle,
OpcodeSelect: returnTypesFnSingle,
OpcodeSExtend: returnTypesFnSingle,
OpcodeUExtend: returnTypesFnSingle,
OpcodeSwidenLow: returnTypesFnV128,
OpcodeUwidenLow: returnTypesFnV128,
OpcodeSwidenHigh: returnTypesFnV128,
OpcodeUwidenHigh: returnTypesFnV128,
OpcodeSnarrow: returnTypesFnV128,
OpcodeUnarrow: returnTypesFnV128,
OpcodeSwizzle: returnTypesFnSingle,
OpcodeShuffle: returnTypesFnV128,
OpcodeSplat: returnTypesFnV128,
OpcodeIreduce: returnTypesFnSingle,
OpcodeFabs: returnTypesFnSingle,
OpcodeSqrt: returnTypesFnSingle,
OpcodeCeil: returnTypesFnSingle,
OpcodeFloor: returnTypesFnSingle,
OpcodeTrunc: returnTypesFnSingle,
OpcodeNearest: returnTypesFnSingle,
OpcodeCallIndirect: func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
sigID := SignatureID(instr.u1)
sig, ok := b.signatures[sigID]
if !ok {
panic("BUG")
}
switch len(sig.Results) {
case 0:
t1 = typeInvalid
case 1:
t1 = sig.Results[0]
default:
t1, ts = sig.Results[0], sig.Results[1:]
}
return
},
OpcodeCall: func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
sigID := SignatureID(instr.u2)
sig, ok := b.signatures[sigID]
if !ok {
panic("BUG")
}
switch len(sig.Results) {
case 0:
t1 = typeInvalid
case 1:
t1 = sig.Results[0]
default:
t1, ts = sig.Results[0], sig.Results[1:]
}
return
},
OpcodeExtIaddPairwise: returnTypesFnV128,
OpcodeVbor: returnTypesFnV128,
OpcodeVbxor: returnTypesFnV128,
OpcodeVband: returnTypesFnV128,
OpcodeVbnot: returnTypesFnV128,
OpcodeVbandnot: returnTypesFnV128,
OpcodeVbitselect: returnTypesFnV128,
OpcodeVanyTrue: returnTypesFnI32,
OpcodeVallTrue: returnTypesFnI32,
OpcodeVhighBits: returnTypesFnI32,
OpcodeVIadd: returnTypesFnV128,
OpcodeVSaddSat: returnTypesFnV128,
OpcodeVUaddSat: returnTypesFnV128,
OpcodeVIsub: returnTypesFnV128,
OpcodeVSsubSat: returnTypesFnV128,
OpcodeVUsubSat: returnTypesFnV128,
OpcodeVIcmp: returnTypesFnV128,
OpcodeVImin: returnTypesFnV128,
OpcodeVUmin: returnTypesFnV128,
OpcodeVImax: returnTypesFnV128,
OpcodeVUmax: returnTypesFnV128,
OpcodeVImul: returnTypesFnV128,
OpcodeVAvgRound: returnTypesFnV128,
OpcodeVIabs: returnTypesFnV128,
OpcodeVIneg: returnTypesFnV128,
OpcodeVIpopcnt: returnTypesFnV128,
OpcodeVIshl: returnTypesFnV128,
OpcodeVSshr: returnTypesFnV128,
OpcodeVUshr: returnTypesFnV128,
OpcodeExtractlane: returnTypesFnSingle,
OpcodeInsertlane: returnTypesFnV128,
OpcodeBand: returnTypesFnSingle,
OpcodeFcopysign: returnTypesFnSingle,
OpcodeBitcast: returnTypesFnSingle,
OpcodeBor: returnTypesFnSingle,
OpcodeBxor: returnTypesFnSingle,
OpcodeRotl: returnTypesFnSingle,
OpcodeRotr: returnTypesFnSingle,
OpcodeIshl: returnTypesFnSingle,
OpcodeSshr: returnTypesFnSingle,
OpcodeSdiv: returnTypesFnSingle,
OpcodeSrem: returnTypesFnSingle,
OpcodeUdiv: returnTypesFnSingle,
OpcodeUrem: returnTypesFnSingle,
OpcodeUshr: returnTypesFnSingle,
OpcodeJump: returnTypesFnNoReturns,
OpcodeUndefined: returnTypesFnNoReturns,
OpcodeIconst: returnTypesFnSingle,
OpcodeSelect: returnTypesFnSingle,
OpcodeSExtend: returnTypesFnSingle,
OpcodeUExtend: returnTypesFnSingle,
OpcodeSwidenLow: returnTypesFnV128,
OpcodeUwidenLow: returnTypesFnV128,
OpcodeSwidenHigh: returnTypesFnV128,
OpcodeUwidenHigh: returnTypesFnV128,
OpcodeSnarrow: returnTypesFnV128,
OpcodeUnarrow: returnTypesFnV128,
OpcodeSwizzle: returnTypesFnSingle,
OpcodeShuffle: returnTypesFnV128,
OpcodeSplat: returnTypesFnV128,
OpcodeIreduce: returnTypesFnSingle,
OpcodeFabs: returnTypesFnSingle,
OpcodeSqrt: returnTypesFnSingle,
OpcodeCeil: returnTypesFnSingle,
OpcodeFloor: returnTypesFnSingle,
OpcodeTrunc: returnTypesFnSingle,
OpcodeNearest: returnTypesFnSingle,
OpcodeCallIndirect: returnTypesFnCallIndirect,
OpcodeCall: returnTypesFnCall,
OpcodeLoad: returnTypesFnSingle,
OpcodeVZeroExtLoad: returnTypesFnV128,
OpcodeLoadSplat: returnTypesFnV128,
@@ -1032,6 +1044,8 @@ var instructionReturnTypes = [opcodeEnd]returnTypesFn{
OpcodeAtomicStore: returnTypesFnNoReturns,
OpcodeAtomicCas: returnTypesFnSingle,
OpcodeFence: returnTypesFnNoReturns,
OpcodeTailCallReturnCallIndirect: returnTypesFnCallIndirect,
OpcodeTailCallReturnCall: returnTypesFnCall,
OpcodeWideningPairwiseDotProductS: returnTypesFnV128,
}
@@ -2038,6 +2052,25 @@ func (i *Instruction) AtomicTargetSize() (size uint64) {
return i.u1
}
// AsTailCallReturnCall initializes this instruction as a call instruction with OpcodeTailCallReturnCall.
func (i *Instruction) AsTailCallReturnCall(ref FuncRef, sig *Signature, args Values) {
i.opcode = OpcodeTailCallReturnCall
i.u1 = uint64(ref)
i.vs = args
i.u2 = uint64(sig.ID)
sig.used = true
}
// AsTailCallReturnCallIndirect initializes this instruction as a call-indirect instruction with OpcodeTailCallReturnCallIndirect.
func (i *Instruction) AsTailCallReturnCallIndirect(funcPtr Value, sig *Signature, args Values) *Instruction {
i.opcode = OpcodeTailCallReturnCallIndirect
i.vs = args
i.v = funcPtr
i.u1 = uint64(sig.ID)
sig.used = true
return i
}
// ReturnVals returns the return values of OpcodeReturn.
func (i *Instruction) ReturnVals() []Value {
return i.vs.View()
@@ -2166,7 +2199,7 @@ func (i *Instruction) AsCall(ref FuncRef, sig *Signature, args Values) {
// CallData returns the call data for this instruction necessary for backends.
func (i *Instruction) CallData() (ref FuncRef, sigID SignatureID, args []Value) {
if i.opcode != OpcodeCall {
if i.opcode != OpcodeCall && i.opcode != OpcodeTailCallReturnCall {
panic("BUG: CallData only available for OpcodeCall")
}
ref = FuncRef(i.u1)
@@ -2195,8 +2228,8 @@ func (i *Instruction) AsCallGoRuntimeMemmove(funcPtr Value, sig *Signature, args
// CallIndirectData returns the call indirect data for this instruction necessary for backends.
func (i *Instruction) CallIndirectData() (funcPtr Value, sigID SignatureID, args []Value, isGoMemmove bool) {
if i.opcode != OpcodeCallIndirect {
panic("BUG: CallIndirectData only available for OpcodeCallIndirect")
if i.opcode != OpcodeCallIndirect && i.opcode != OpcodeTailCallReturnCallIndirect {
panic("BUG: CallIndirectData only available for OpcodeCallIndirect and OpcodeTailCallReturnCallIndirect")
}
funcPtr = i.v
sigID = SignatureID(i.u1)
@@ -2620,6 +2653,17 @@ func (i *Instruction) Format(b Builder) string {
instSuffix = fmt.Sprintf("_%d, %s, %s, %s", 8*i.u1, i.v.Format(b), i.v2.Format(b), i.v3.Format(b))
case OpcodeFence:
instSuffix = fmt.Sprintf(" %d", i.u1)
case OpcodeTailCallReturnCall, OpcodeTailCallReturnCallIndirect:
view := i.vs.View()
vs := make([]string, len(view))
for idx := range vs {
vs[idx] = view[idx].Format(b)
}
if i.opcode == OpcodeCallIndirect {
instSuffix = fmt.Sprintf(" %s:%s, %s", i.v.Format(b), SignatureID(i.u1), strings.Join(vs, ", "))
} else {
instSuffix = fmt.Sprintf(" %s:%s, %s", FuncRef(i.u1), SignatureID(i.u2), strings.Join(vs, ", "))
}
case OpcodeWideningPairwiseDotProductS:
instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b))
default:
@@ -2879,6 +2923,10 @@ func (o Opcode) String() (ret string) {
return "AtomicStore"
case OpcodeFence:
return "Fence"
case OpcodeTailCallReturnCall:
return "ReturnCall"
case OpcodeTailCallReturnCallIndirect:
return "ReturnCallIndirect"
case OpcodeVbor:
return "Vbor"
case OpcodeVbxor:

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"math/rand"
"os"
"sync"
"time"
)
@@ -91,7 +92,7 @@ type (
initialCompilationDone bool
maybeRandomizedIndexes []int
r *rand.Rand
values map[string]string
values sync.Map
}
verifierStateContextKey struct{}
currentFunctionNameKey struct{}
@@ -106,31 +107,24 @@ func NewDeterministicCompilationVerifierContext(ctx context.Context, localFuncti
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
return context.WithValue(ctx, verifierStateContextKey{}, &verifierState{
r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: map[string]string{},
r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: sync.Map{},
})
}
// DeterministicCompilationVerifierRandomizeIndexes randomizes the indexes for the deterministic compilation verifier.
// To get the randomized index, use DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex.
func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) {
// Returns a slice that maps an index to the randomized index.
func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) []int {
state := ctx.Value(verifierStateContextKey{}).(*verifierState)
if !state.initialCompilationDone {
// If this is the first attempt, we use the index as-is order.
state.initialCompilationDone = true
return
return state.maybeRandomizedIndexes
}
r := state.r
r.Shuffle(len(state.maybeRandomizedIndexes), func(i, j int) {
state.maybeRandomizedIndexes[i], state.maybeRandomizedIndexes[j] = state.maybeRandomizedIndexes[j], state.maybeRandomizedIndexes[i]
})
}
// DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex returns the randomized index for the given `index`
// which is assigned by DeterministicCompilationVerifierRandomizeIndexes.
func DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx context.Context, index int) int {
state := ctx.Value(verifierStateContextKey{}).(*verifierState)
ret := state.maybeRandomizedIndexes[index]
return ret
return state.maybeRandomizedIndexes
}
// VerifyOrSetDeterministicCompilationContextValue verifies that the `newValue` is the same as the previous value for the given `scope`
@@ -141,9 +135,8 @@ func VerifyOrSetDeterministicCompilationContextValue(ctx context.Context, scope
fn := ctx.Value(currentFunctionNameKey{}).(string)
key := fn + ": " + scope
verifierCtx := ctx.Value(verifierStateContextKey{}).(*verifierState)
oldValue, ok := verifierCtx.values[key]
if !ok {
verifierCtx.values[key] = newValue
oldValue, loaded := verifierCtx.values.LoadOrStore(key, newValue)
if !loaded {
return
}
if oldValue != newValue {

View File

@@ -69,7 +69,7 @@ type IDedPool[T any] struct {
// NewIDedPool returns a new IDedPool.
func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] {
return IDedPool[T]{pool: NewPool[T](resetFn), maxIDEncountered: -1}
return IDedPool[T]{pool: NewPool(resetFn), maxIDEncountered: -1}
}
// GetOrAllocate returns the T with the given id.
@@ -134,10 +134,10 @@ type VarLength[T any] struct {
// NewVarLengthPool returns a new VarLengthPool.
func NewVarLengthPool[T any]() VarLengthPool[T] {
return VarLengthPool[T]{
arrayPool: NewPool[varLengthPoolArray[T]](func(v *varLengthPoolArray[T]) {
arrayPool: NewPool(func(v *varLengthPoolArray[T]) {
v.next = 0
}),
slicePool: NewPool[[]T](func(i *[]T) {
slicePool: NewPool(func(i *[]T) {
*i = (*i)[:0]
}),
}
@@ -155,6 +155,9 @@ func (p *VarLengthPool[T]) Allocate(knownMin int) VarLength[T] {
return VarLength[T]{arr: arr}
}
slc := p.slicePool.Allocate()
if cap(*slc) < knownMin {
*slc = make([]T, 0, knownMin)
}
return VarLength[T]{slc: slc}
}
@@ -166,39 +169,36 @@ func (p *VarLengthPool[T]) Reset() {
// Append appends items to the backing slice just like the `append` builtin function in Go.
func (i VarLength[T]) Append(p *VarLengthPool[T], items ...T) VarLength[T] {
if i.slc != nil {
*i.slc = append(*i.slc, items...)
slc := i.slc
if slc != nil {
*slc = append(*slc, items...)
return i
}
if i.arr == nil {
i.arr = p.arrayPool.Allocate()
arr := i.arr
if arr == nil {
arr = p.arrayPool.Allocate()
i.arr = arr
}
arr := i.arr
if arr.next+len(items) <= arraySize {
for _, item := range items {
arr.arr[arr.next] = item
arr.next++
}
arr.next += copy(arr.arr[arr.next:], items)
} else {
slc := p.slicePool.Allocate()
slc = p.slicePool.Allocate()
// Copy the array to the slice.
for ptr := 0; ptr < arr.next; ptr++ {
*slc = append(*slc, arr.arr[ptr])
}
*slc = append(*slc, arr.arr[:arr.next]...)
*slc = append(*slc, items...)
i.slc = slc
*i.slc = append(*i.slc, items...)
}
return i
}
// View returns the backing slice.
func (i VarLength[T]) View() []T {
if i.slc != nil {
if slc := i.slc; slc != nil {
return *i.slc
} else if i.arr != nil {
arr := i.arr
}
if arr := i.arr; arr != nil {
return arr.arr[:arr.next]
}
return nil
@@ -207,9 +207,9 @@ func (i VarLength[T]) View() []T {
// Cut cuts the backing slice to the given length.
// Precondition: n <= len(i.backing).
func (i VarLength[T]) Cut(n int) {
if i.slc != nil {
*i.slc = (*i.slc)[:n]
} else if i.arr != nil {
i.arr.next = n
if slc := i.slc; slc != nil {
*slc = (*slc)[:n]
} else if arr := i.arr; arr != nil {
arr.next = n
}
}

View File

@@ -0,0 +1,6 @@
package expctxkeys
// CompilationWorkers is a context.Context Value key.
// Its associated value should be an int representing the number of workers
// we want to spawn to compile a given wasm input.
type CompilationWorkers struct{}

View File

@@ -2,8 +2,10 @@
package platform
import "sync"
// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods.
var CpuFeatures = loadCpuFeatureFlags()
var CpuFeatures = sync.OnceValue(loadCpuFeatureFlags)
// cpuFeatureFlags implements CpuFeatureFlags interface.
type cpuFeatureFlags struct {

View File

@@ -2,10 +2,13 @@
package platform
import "runtime"
import (
"runtime"
"sync"
)
// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods.
var CpuFeatures = loadCpuFeatureFlags()
var CpuFeatures = sync.OnceValue(loadCpuFeatureFlags)
// cpuFeatureFlags implements CpuFeatureFlags interface.
type cpuFeatureFlags struct {

View File

@@ -2,7 +2,7 @@
package platform
var CpuFeatures CpuFeatureFlags = &cpuFeatureFlags{}
var CpuFeatures = func() CpuFeatureFlags { return &cpuFeatureFlags{} }
// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms.
type cpuFeatureFlags struct{}

View File

@@ -59,12 +59,16 @@ func init() {
})
}
func mmapCodeSegment(size, prot int) ([]byte, error) {
flags := syscall.MAP_ANON | syscall.MAP_PRIVATE
func mmapCodeSegment(size int) ([]byte, error) {
flag := syscall.MAP_ANON | syscall.MAP_PRIVATE
prot := syscall.PROT_READ | syscall.PROT_WRITE
if noopMprotectRX {
prot = syscall.PROT_READ | syscall.PROT_WRITE | syscall.PROT_EXEC
}
for _, hugePagesConfig := range hugePagesConfigs {
if hugePagesConfig.match(size) {
b, err := syscall.Mmap(-1, 0, size, prot, flags|hugePagesConfig.flag)
b, err := syscall.Mmap(-1, 0, size, prot, flag|hugePagesConfig.flag)
if err != nil {
continue
}
@@ -72,5 +76,5 @@ func mmapCodeSegment(size, prot int) ([]byte, error) {
}
}
return syscall.Mmap(-1, 0, size, prot, flags)
return syscall.Mmap(-1, 0, size, prot, flag)
}

View File

@@ -5,7 +5,11 @@ package platform
import "syscall"
func mmapCodeSegment(size, prot int) ([]byte, error) {
func mmapCodeSegment(size int) ([]byte, error) {
prot := syscall.PROT_READ | syscall.PROT_WRITE
if noopMprotectRX {
prot = syscall.PROT_READ | syscall.PROT_WRITE | syscall.PROT_EXEC
}
return syscall.Mmap(
-1,
0,

View File

@@ -2,31 +2,8 @@
package platform
import (
"syscall"
)
const (
mmapProtAMD64 = syscall.PROT_READ | syscall.PROT_WRITE | syscall.PROT_EXEC
mmapProtARM64 = syscall.PROT_READ | syscall.PROT_WRITE
)
import "syscall"
func munmapCodeSegment(code []byte) error {
return syscall.Munmap(code)
}
// mmapCodeSegmentAMD64 gives all read-write-exec permission to the mmap region
// to enter the function. Otherwise, segmentation fault exception is raised.
func mmapCodeSegmentAMD64(size int) ([]byte, error) {
// The region must be RWX: RW for writing native codes, X for executing the region.
return mmapCodeSegment(size, mmapProtAMD64)
}
// mmapCodeSegmentARM64 cannot give all read-write-exec permission to the mmap region.
// Otherwise, the mmap systemcall would raise an error. Here we give read-write
// to the region so that we can write contents at call-sites. Callers are responsible to
// execute MprotectRX on the returned buffer.
func mmapCodeSegmentARM64(size int) ([]byte, error) {
// The region must be RW: RW for writing native codes.
return mmapCodeSegment(size, mmapProtARM64)
}

View File

@@ -13,11 +13,7 @@ func munmapCodeSegment(code []byte) error {
panic(errUnsupported)
}
func mmapCodeSegmentAMD64(size int) ([]byte, error) {
panic(errUnsupported)
}
func mmapCodeSegmentARM64(size int) ([]byte, error) {
func mmapCodeSegment(size int) ([]byte, error) {
panic(errUnsupported)
}

View File

@@ -56,16 +56,7 @@ func virtualProtect(address, size, newprotect uintptr, oldprotect *uint32) error
return nil
}
func mmapCodeSegmentAMD64(size int) ([]byte, error) {
p, err := allocateMemory(uintptr(size), windows_PAGE_EXECUTE_READWRITE)
if err != nil {
return nil, err
}
return unsafe.Slice((*byte)(unsafe.Pointer(p)), size), nil
}
func mmapCodeSegmentARM64(size int) ([]byte, error) {
func mmapCodeSegment(size int) ([]byte, error) {
p, err := allocateMemory(uintptr(size), windows_PAGE_READWRITE)
if err != nil {
return nil, err

View File

@@ -7,6 +7,8 @@ import (
"unsafe"
)
const noopMprotectRX = false
// MprotectRX is like syscall.Mprotect with RX permission, defined locally so that BSD compiles.
func MprotectRX(b []byte) (err error) {
var _p0 unsafe.Pointer

View File

@@ -4,6 +4,8 @@ package platform
import "syscall"
const noopMprotectRX = false
// MprotectRX is like syscall.Mprotect with RX permission.
func MprotectRX(b []byte) (err error) {
return syscall.Mprotect(b, syscall.PROT_READ|syscall.PROT_EXEC)

View File

@@ -2,8 +2,9 @@
package platform
import "syscall"
const noopMprotectRX = true
func MprotectRX(b []byte) error {
return syscall.ENOTSUP
// Assume we already called mmap with at least RX.
return nil
}

View File

@@ -21,13 +21,13 @@ func CompilerSupports(features api.CoreFeatures) bool {
case "linux", "darwin", "freebsd", "netbsd", "dragonfly", "windows":
if runtime.GOARCH == "arm64" {
if features.IsEnabled(experimental.CoreFeaturesThreads) {
return CpuFeatures.Has(CpuFeatureArm64Atomic)
return CpuFeatures().Has(CpuFeatureArm64Atomic)
}
return true
}
fallthrough
case "solaris", "illumos":
return runtime.GOARCH == "amd64" && CpuFeatures.Has(CpuFeatureAmd64SSE4_1)
return runtime.GOARCH == "amd64" && CpuFeatures().Has(CpuFeatureAmd64SSE4_1)
default:
return false
}
@@ -40,11 +40,7 @@ func MmapCodeSegment(size int) ([]byte, error) {
if size == 0 {
panic("BUG: MmapCodeSegment with zero length")
}
if runtime.GOARCH == "amd64" {
return mmapCodeSegmentAMD64(size)
} else {
return mmapCodeSegmentARM64(size)
}
return mmapCodeSegment(size)
}
// MunmapCodeSegment unmaps the given memory region.

View File

@@ -36,7 +36,9 @@ func openFile(path string, oflag sys.Oflag, perm fs.FileMode) (*os.File, sys.Err
// To match expectations of WASI, e.g. TinyGo TestStatBadDir, return
// ENOENT, not ENOTDIR.
case sys.ENOTDIR:
errno = sys.ENOENT
if !strings.HasSuffix(path, "/") {
errno = sys.ENOENT
}
case sys.ENOENT:
if isSymlink(path) {
// Either symlink or hard link not found. We change the returned

View File

@@ -1,4 +1,4 @@
//go:build (amd64 || arm64 || riscv64) && linux
//go:build (amd64 || arm64 || ppc64le || riscv64 || s390x) && linux
// Note: This expression is not the same as compiler support, even if it looks
// similar. Platform functions here are used in interpreter mode as well.

View File

@@ -1,4 +1,4 @@
//go:build (!((amd64 || arm64 || riscv64) && linux) && !((amd64 || arm64) && (darwin || freebsd)) && !((amd64 || arm64) && windows)) || js
//go:build (!((amd64 || arm64 || ppc64le || riscv64 || s390x) && linux) && !((amd64 || arm64) && (darwin || freebsd)) && !((amd64 || arm64) && windows)) || js
package sysfs

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"errors"
"fmt"
"slices"
"strconv"
"strings"
@@ -480,11 +481,9 @@ func (m *Module) validateFunctionWithMaxStackValues(
// function type might result in invalid value types if the block is the outermost label
// which equals the function's type.
if lnLabel.op != OpcodeLoop { // Loop operation doesn't require results since the continuation is the beginning of the loop.
defaultLabelType = make([]ValueType, len(lnLabel.blockType.Results))
copy(defaultLabelType, lnLabel.blockType.Results)
defaultLabelType = slices.Clone(lnLabel.blockType.Results)
} else {
defaultLabelType = make([]ValueType, len(lnLabel.blockType.Params))
copy(defaultLabelType, lnLabel.blockType.Params)
defaultLabelType = slices.Clone(lnLabel.blockType.Params)
}
if enabledFeatures.IsEnabled(api.CoreFeatureReferenceTypes) {
@@ -534,7 +533,7 @@ func (m *Module) validateFunctionWithMaxStackValues(
// br_table instruction is stack-polymorphic.
valueTypeStack.unreachable()
} else if op == OpcodeCall {
} else if op == OpcodeCall || op == OpcodeTailCallReturnCall {
pc++
index, num, err := leb128.LoadUint32(body[pc:])
if err != nil {
@@ -544,16 +543,35 @@ func (m *Module) validateFunctionWithMaxStackValues(
if int(index) >= len(functions) {
return fmt.Errorf("invalid function index")
}
var opcodeName string
if op == OpcodeCall {
opcodeName = OpcodeCallName
} else {
opcodeName = OpcodeTailCallReturnCallName
}
funcType := &m.TypeSection[functions[index]]
for i := 0; i < len(funcType.Params); i++ {
if err := valueTypeStack.popAndVerifyType(funcType.Params[len(funcType.Params)-1-i]); err != nil {
return fmt.Errorf("type mismatch on %s operation param type: %v", OpcodeCallName, err)
return fmt.Errorf("type mismatch on %s operation param type: %v", opcodeName, err)
}
}
for _, exp := range funcType.Results {
valueTypeStack.push(exp)
}
} else if op == OpcodeCallIndirect {
if op == OpcodeTailCallReturnCall {
if err := enabledFeatures.RequireEnabled(experimental.CoreFeaturesTailCall); err != nil {
return fmt.Errorf("%s invalid as %v", OpcodeTailCallReturnCallName, err)
}
// Same formatting as OpcodeEnd on the outer-most block
if err := valueTypeStack.requireStackValues(false, "", functionType.Results, false); err != nil {
return err
}
// behaves as a jump.
valueTypeStack.unreachable()
}
} else if op == OpcodeCallIndirect || op == OpcodeTailCallReturnCallIndirect {
pc++
typeIndex, num, err := leb128.LoadUint32(body[pc:])
if err != nil {
@@ -561,8 +579,15 @@ func (m *Module) validateFunctionWithMaxStackValues(
}
pc += num
var opcodeName string
if op == OpcodeCallIndirect {
opcodeName = OpcodeCallIndirectName
} else {
opcodeName = OpcodeTailCallReturnCallIndirectName
}
if int(typeIndex) >= len(m.TypeSection) {
return fmt.Errorf("invalid type index at %s: %d", OpcodeCallIndirectName, typeIndex)
return fmt.Errorf("invalid type index at %s: %d", opcodeName, typeIndex)
}
tableIndex, num, err := leb128.LoadUint32(body[pc:])
@@ -582,21 +607,33 @@ func (m *Module) validateFunctionWithMaxStackValues(
table := tables[tableIndex]
if table.Type != RefTypeFuncref {
return fmt.Errorf("table is not funcref type but was %s for %s", RefTypeName(table.Type), OpcodeCallIndirectName)
return fmt.Errorf("table is not funcref type but was %s for %s", RefTypeName(table.Type), opcodeName)
}
if err = valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
return fmt.Errorf("cannot pop the offset in table for %s", OpcodeCallIndirectName)
return fmt.Errorf("cannot pop the offset in table for %s", opcodeName)
}
funcType := &m.TypeSection[typeIndex]
for i := 0; i < len(funcType.Params); i++ {
if err = valueTypeStack.popAndVerifyType(funcType.Params[len(funcType.Params)-1-i]); err != nil {
return fmt.Errorf("type mismatch on %s operation input type", OpcodeCallIndirectName)
return fmt.Errorf("type mismatch on %s operation input type", opcodeName)
}
}
for _, exp := range funcType.Results {
valueTypeStack.push(exp)
}
if op == OpcodeTailCallReturnCallIndirect {
if err := enabledFeatures.RequireEnabled(experimental.CoreFeaturesTailCall); err != nil {
return fmt.Errorf("%s invalid as %v", OpcodeTailCallReturnCallIndirectName, err)
}
// Same formatting as OpcodeEnd on the outer-most block
if err := valueTypeStack.requireStackValues(false, "", functionType.Results, false); err != nil {
return err
}
// behaves as a jump.
valueTypeStack.unreachable()
}
} else if OpcodeI32Eqz <= op && op <= OpcodeI64Extend32S {
switch op {
case OpcodeI32Eqz:

View File

@@ -777,6 +777,16 @@ const (
OpcodeAtomicI64Rmw32CmpxchgU OpcodeAtomic = 0x4e
)
// OpcodeTailCall represents an opcode of a tail call instructions.
//
// These opcodes are toggled with CoreFeaturesTailCall.
type OpcodeTailCall = byte
const (
OpcodeTailCallReturnCall OpcodeTailCall = 0x12
OpcodeTailCallReturnCallIndirect OpcodeTailCall = 0x13
)
const (
OpcodeUnreachableName = "unreachable"
OpcodeNopName = "nop"
@@ -1864,3 +1874,18 @@ var atomicInstructionName = map[OpcodeAtomic]string{
func AtomicInstructionName(oc OpcodeAtomic) (ret string) {
return atomicInstructionName[oc]
}
const (
OpcodeTailCallReturnCallName = "return_call"
OpcodeTailCallReturnCallIndirectName = "return_call_indirect"
)
var tailCallInstructionName = map[OpcodeTailCall]string{
OpcodeTailCallReturnCall: OpcodeTailCallReturnCallName,
OpcodeTailCallReturnCallIndirect: OpcodeTailCallReturnCallIndirectName,
}
// TailCallInstructionName returns the instruction name corresponding to the tail call Opcode.
func TailCallInstructionName(oc OpcodeTailCall) (ret string) {
return tailCallInstructionName[oc]
}

View File

@@ -326,10 +326,14 @@ func (t *TableInstance) Grow(delta uint32, initialRef Reference) (currentLen uin
newLen >= math.MaxUint32 || (t.Max != nil && newLen > int64(*t.Max)) {
return 0xffffffff // = -1 in signed 32-bit integer.
}
t.References = append(t.References, make([]uintptr, delta)...)
if initialRef == 0 {
return
}
// Uses the copy trick for faster filling the new region with the initial value.
// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
// https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517
newRegion := t.References[currentLen:]
newRegion[0] = initialRef
for i := 1; i < len(newRegion); i *= 2 {

View File

@@ -1,4 +1,4 @@
//go:build (amd64 || arm64 || riscv64) && linux
//go:build (amd64 || arm64 || ppc64le || riscv64 || s390x) && linux
// Note: This expression is not the same as compiler support, even if it looks
// similar. Platform functions here are used in interpreter mode as well.

View File

@@ -1,4 +1,4 @@
//go:build (!((amd64 || arm64 || riscv64) && linux) && !((amd64 || arm64) && (darwin || freebsd)) && !((amd64 || arm64) && windows)) || js
//go:build (!((amd64 || arm64 || ppc64le || riscv64 || s390x) && linux) && !((amd64 || arm64) && (darwin || freebsd)) && !((amd64 || arm64) && windows)) || js
package sys

4
vendor/modules.txt vendored
View File

@@ -1326,8 +1326,8 @@ github.com/stretchr/testify/assert
github.com/stretchr/testify/assert/yaml
github.com/stretchr/testify/require
github.com/stretchr/testify/suite
# github.com/tetratelabs/wazero v1.9.0
## explicit; go 1.22.0
# github.com/tetratelabs/wazero v1.10.1
## explicit; go 1.23.0
github.com/tetratelabs/wazero
github.com/tetratelabs/wazero/api
github.com/tetratelabs/wazero/experimental