diff --git a/builder/musl.go b/builder/musl.go index 3c79c7c43a..54ec2c83ed 100644 --- a/builder/musl.go +++ b/builder/musl.go @@ -113,6 +113,7 @@ var libMusl = Library{ librarySources: func(target string) ([]string, error) { arch := compileopts.MuslArchitecture(target) globs := []string{ + "conf/*.c", "env/*.c", "errno/*.c", "exit/*.c", diff --git a/builder/sizes.go b/builder/sizes.go index 485a652d97..9aa74ffa82 100644 --- a/builder/sizes.go +++ b/builder/sizes.go @@ -490,9 +490,9 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz continue } if section.Type == elf.SHT_NOBITS { - if section.Name == ".stack" { + if section.Name == ".stack" || section.Name == ".stack1" { // TinyGo emits stack sections on microcontroller using the - // ".stack" name. + // ".stack" (or ".stack1") name. // This is a bit ugly, but I don't think there is a way to // mark the stack section in a linker script. sections = append(sections, memorySection{ diff --git a/compileopts/config.go b/compileopts/config.go index ee5c34537c..1c88519cd6 100644 --- a/compileopts/config.go +++ b/compileopts/config.go @@ -99,6 +99,11 @@ func (c *Config) BuildTags() []string { "math_big_pure_go", // to get math/big to work "gc." + c.GC(), "scheduler." + c.Scheduler(), // used inside the runtime package "serial." + c.Serial()}...) // used inside the machine package + switch c.Scheduler() { + case "threads", "cores": + default: + tags = append(tags, "tinygo.unicore") + } for i := 1; i <= c.GoMinorVersion; i++ { tags = append(tags, fmt.Sprintf("go1.%d", i)) } diff --git a/compileopts/options.go b/compileopts/options.go index 30e0e4dbed..4366dd5c11 100644 --- a/compileopts/options.go +++ b/compileopts/options.go @@ -10,7 +10,7 @@ import ( var ( validBuildModeOptions = []string{"default", "c-shared"} validGCOptions = []string{"none", "leaking", "conservative", "custom", "precise"} - validSchedulerOptions = []string{"none", "tasks", "asyncify"} + validSchedulerOptions = []string{"none", "tasks", "asyncify", "threads", "cores"} validSerialOptions = []string{"none", "uart", "usb", "rtt"} validPrintSizeOptions = []string{"none", "short", "full", "html"} validPanicStrategyOptions = []string{"print", "trap"} diff --git a/compileopts/options_test.go b/compileopts/options_test.go index ee63c4c46d..d52ef5690e 100644 --- a/compileopts/options_test.go +++ b/compileopts/options_test.go @@ -10,7 +10,7 @@ import ( func TestVerifyOptions(t *testing.T) { expectedGCError := errors.New(`invalid gc option 'incorrect': valid values are none, leaking, conservative, custom, precise`) - expectedSchedulerError := errors.New(`invalid scheduler option 'incorrect': valid values are none, tasks, asyncify`) + expectedSchedulerError := errors.New(`invalid scheduler option 'incorrect': valid values are none, tasks, asyncify, threads`) expectedPrintSizeError := errors.New(`invalid size option 'incorrect': valid values are none, short, full, html`) expectedPanicStrategyError := errors.New(`invalid panic option 'incorrect': valid values are print, trap`) diff --git a/compileopts/target.go b/compileopts/target.go index 7893e58290..961e69a5d8 100644 --- a/compileopts/target.go +++ b/compileopts/target.go @@ -248,7 +248,6 @@ func defaultTarget(options *Options) (*TargetSpec, error) { GOARCH: options.GOARCH, BuildTags: []string{options.GOOS, options.GOARCH}, GC: "precise", - Scheduler: "tasks", Linker: "cc", DefaultStackSize: 1024 * 64, // 64kB GDB: []string{"gdb"}, @@ -381,6 +380,7 @@ func defaultTarget(options *Options) (*TargetSpec, error) { platformVersion = "11.0.0" // first macosx platform with arm64 support } llvmvendor = "apple" + spec.Scheduler = "tasks" spec.Linker = "ld.lld" spec.Libc = "darwin-libSystem" // Use macosx* instead of darwin, otherwise darwin/arm64 will refer to @@ -398,6 +398,7 @@ func defaultTarget(options *Options) (*TargetSpec, error) { "src/runtime/runtime_unix.c", "src/runtime/signal.c") case "linux": + spec.Scheduler = "threads" spec.Linker = "ld.lld" spec.RTLib = "compiler-rt" spec.Libc = "musl" @@ -418,9 +419,11 @@ func defaultTarget(options *Options) (*TargetSpec, error) { } spec.ExtraFiles = append(spec.ExtraFiles, "src/internal/futex/futex_linux.c", + "src/internal/task/task_threads.c", "src/runtime/runtime_unix.c", "src/runtime/signal.c") case "windows": + spec.Scheduler = "tasks" spec.Linker = "ld.lld" spec.Libc = "mingw-w64" // Note: using a medium code model, low image base and no ASLR @@ -498,6 +501,7 @@ func defaultTarget(options *Options) (*TargetSpec, error) { } spec.ExtraFiles = append(spec.ExtraFiles, "src/runtime/asm_"+asmGoarch+suffix+".S") spec.ExtraFiles = append(spec.ExtraFiles, "src/internal/task/task_stack_"+asmGoarch+suffix+".S") + spec.ExtraFiles = append(spec.ExtraFiles, "src/internal/task/task_cores.c") } // Configure the emulator. diff --git a/compiler/symbol.go b/compiler/symbol.go index ff7ef05508..8ad2184d39 100644 --- a/compiler/symbol.go +++ b/compiler/symbol.go @@ -208,6 +208,8 @@ func (c *compilerContext) getFunction(fn *ssa.Function) (llvm.Type, llvm.Value) // > circumstances, and should not be exposed to source languages. llvmutil.AppendToGlobal(c.mod, "llvm.compiler.used", llvmFn) } + case "tinygo_exitTask", "tinygo_schedulerUnlock": + llvmutil.AppendToGlobal(c.mod, "llvm.used", llvmFn) } // External/exported functions may not retain pointer values. diff --git a/src/internal/task/atomic-cooperative.go b/src/internal/task/atomic-cooperative.go index 60eb917a8e..e05ea7de0d 100644 --- a/src/internal/task/atomic-cooperative.go +++ b/src/internal/task/atomic-cooperative.go @@ -1,3 +1,5 @@ +//go:build tinygo.unicore + package task // Atomics implementation for cooperative systems. The atomic types here aren't diff --git a/src/internal/task/atomic-preemptive.go b/src/internal/task/atomic-preemptive.go new file mode 100644 index 0000000000..b395ef48a3 --- /dev/null +++ b/src/internal/task/atomic-preemptive.go @@ -0,0 +1,14 @@ +//go:build !tinygo.unicore + +package task + +// Atomics implementation for non-cooperative systems (multithreaded, etc). +// These atomic types use real atomic instructions. + +import "sync/atomic" + +type ( + Uintptr = atomic.Uintptr + Uint32 = atomic.Uint32 + Uint64 = atomic.Uint64 +) diff --git a/src/internal/task/futex-cooperative.go b/src/internal/task/futex-cooperative.go index 8351f88774..ae9efb5a73 100644 --- a/src/internal/task/futex-cooperative.go +++ b/src/internal/task/futex-cooperative.go @@ -1,3 +1,5 @@ +//go:build tinygo.unicore + package task // A futex is a way for userspace to wait with the pointer as the key, and for diff --git a/src/internal/task/futex-cores.go b/src/internal/task/futex-cores.go new file mode 100644 index 0000000000..0ffccba2e4 --- /dev/null +++ b/src/internal/task/futex-cores.go @@ -0,0 +1,64 @@ +//go:build scheduler.cores + +package task + +import "runtime/interrupt" + +// A futex is a way for userspace to wait with the pointer as the key, and for +// another thread to wake one or all waiting threads keyed on the same pointer. +// +// A futex does not change the underlying value, it only reads it before to prevent +// lost wake-ups. +type Futex struct { + Uint32 + + waiters Stack +} + +// Atomically check for cmp to still be equal to the futex value and if so, go +// to sleep. Return true if we were definitely awoken by a call to Wake or +// WakeAll, and false if we can't be sure of that. +func (f *Futex) Wait(cmp uint32) (awoken bool) { + mask := futexLock() + + if f.Uint32.Load() != cmp { + futexUnlock(mask) + return false + } + + // Push the current goroutine onto the waiter stack. + f.waiters.Push(Current()) + + futexUnlock(mask) + + // Pause until this task is awoken by Wake/WakeAll. + Pause() + + // We were awoken by a call to Wake or WakeAll. There is no chance for + // spurious wakeups. + return true +} + +// Wake a single waiter. +func (f *Futex) Wake() { + mask := futexLock() + if t := f.waiters.Pop(); t != nil { + scheduleTask(t) + } + futexUnlock(mask) +} + +// Wake all waiters. +func (f *Futex) WakeAll() { + mask := futexLock() + for t := f.waiters.Pop(); t != nil; t = f.waiters.Pop() { + scheduleTask(t) + } + futexUnlock(mask) +} + +//go:linkname futexLock runtime.futexLock +func futexLock() interrupt.State + +//go:linkname futexUnlock runtime.futexUnlock +func futexUnlock(interrupt.State) diff --git a/src/internal/task/futex-threads.go b/src/internal/task/futex-threads.go new file mode 100644 index 0000000000..7f9e89580c --- /dev/null +++ b/src/internal/task/futex-threads.go @@ -0,0 +1,7 @@ +//go:build scheduler.threads + +package task + +import "internal/futex" + +type Futex = futex.Futex diff --git a/src/internal/task/linux.go b/src/internal/task/linux.go new file mode 100644 index 0000000000..7d28f708c4 --- /dev/null +++ b/src/internal/task/linux.go @@ -0,0 +1,9 @@ +//go:build linux && !baremetal + +package task + +import "unsafe" + +// Musl uses a pointer (or unsigned long for C++) so unsafe.Pointer should be +// fine. +type threadID unsafe.Pointer diff --git a/src/internal/task/mutex-cooperative.go b/src/internal/task/mutex-cooperative.go index e40966bed4..90274df2bb 100644 --- a/src/internal/task/mutex-cooperative.go +++ b/src/internal/task/mutex-cooperative.go @@ -1,3 +1,5 @@ +//go:build tinygo.unicore + package task type Mutex struct { diff --git a/src/internal/task/mutex-preemptive.go b/src/internal/task/mutex-preemptive.go new file mode 100644 index 0000000000..ec83a6135d --- /dev/null +++ b/src/internal/task/mutex-preemptive.go @@ -0,0 +1,71 @@ +//go:build !tinygo.unicore + +package task + +// Futex-based mutex. +// This is largely based on the paper "Futexes are Tricky" by Ulrich Drepper. +// It describes a few ways to implement mutexes using a futex, and how some +// seemingly-obvious implementations don't exactly work as intended. +// Unfortunately, Go atomic operations work slightly differently so we can't +// copy the algorithm verbatim. +// +// The implementation works like this. The futex can have 3 different values, +// depending on the state: +// +// - 0: the futex is currently unlocked. +// - 1: the futex is locked, but is uncontended. There is one special case: if +// a contended futex is unlocked, it is set to 0. It is possible for another +// thread to lock the futex before the next waiter is woken. But because a +// waiter will be woken (if there is one), it will always change to 2 +// regardless. So this is not a problem. +// - 2: the futex is locked, and is contended. At least one thread is trying +// to obtain the lock (and is in the contended loop, see below). +// +// For the paper, see: +// https://dept-info.labri.fr/~denis/Enseignement/2008-IR/Articles/01-futex.pdf) + +type Mutex struct { + futex Futex +} + +func (m *Mutex) Lock() { + // Fast path: try to take an uncontended lock. + if m.futex.CompareAndSwap(0, 1) { + // We obtained the mutex. + return + } + + // The futex is contended, so we enter the contended loop. + // If we manage to change the futex from 0 to 2, we managed to take the + // lock. Else, we have to wait until a call to Unlock unlocks this mutex. + // (Unlock will wake one waiter when it finds the futex is set to 2 when + // unlocking). + for m.futex.Swap(2) != 0 { + // Wait until we get resumed in Unlock. + m.futex.Wait(2) + } +} + +func (m *Mutex) Unlock() { + if old := m.futex.Swap(0); old == 0 { + // Mutex wasn't locked before. + panic("sync: unlock of unlocked Mutex") + } else if old == 2 { + // Mutex was a contended lock, so we need to wake the next waiter. + m.futex.Wake() + } +} + +// TryLock tries to lock m and reports whether it succeeded. +// +// Note that while correct uses of TryLock do exist, they are rare, +// and use of TryLock is often a sign of a deeper problem +// in a particular use of mutexes. +func (m *Mutex) TryLock() bool { + // Fast path: try to take an uncontended lock. + if m.futex.CompareAndSwap(0, 1) { + // We obtained the mutex. + return true + } + return false +} diff --git a/src/internal/task/pmutex-cooperative.go b/src/internal/task/pmutex-cooperative.go index ae2aa4bad8..b61e92d829 100644 --- a/src/internal/task/pmutex-cooperative.go +++ b/src/internal/task/pmutex-cooperative.go @@ -1,3 +1,5 @@ +//go:build tinygo.unicore + package task // PMutex is a real mutex on systems that can be either preemptive or threaded, diff --git a/src/internal/task/pmutex-preemptive.go b/src/internal/task/pmutex-preemptive.go new file mode 100644 index 0000000000..92263ed256 --- /dev/null +++ b/src/internal/task/pmutex-preemptive.go @@ -0,0 +1,11 @@ +//go:build !tinygo.unicore + +package task + +// PMutex is a real mutex on systems that can be either preemptive or threaded, +// and a dummy lock on other (purely cooperative) systems. +// +// It is mainly useful for short operations that need a lock when threading may +// be involved, but which do not need a lock with a purely cooperative +// scheduler. +type PMutex = Mutex diff --git a/src/internal/task/semaphore.go b/src/internal/task/semaphore.go new file mode 100644 index 0000000000..914f09bc5e --- /dev/null +++ b/src/internal/task/semaphore.go @@ -0,0 +1,32 @@ +package task + +// Barebones semaphore implementation. +// The main limitation is that if there are multiple waiters, a single Post() +// call won't do anything. Only when Post() has been called to awaken all +// waiters will the waiters proceed. +// This limitation is not a problem when there will only be a single waiter. +type Semaphore struct { + futex Futex +} + +// Post (unlock) the semaphore, incrementing the value in the semaphore. +func (s *Semaphore) Post() { + newValue := s.futex.Add(1) + if newValue == 0 { + s.futex.WakeAll() + } +} + +// Wait (lock) the semaphore, decrementing the value in the semaphore. +func (s *Semaphore) Wait() { + delta := int32(-1) + value := s.futex.Add(uint32(delta)) + for { + if int32(value) >= 0 { + // Semaphore unlocked! + return + } + s.futex.Wait(value) + value = s.futex.Load() + } +} diff --git a/src/internal/task/task.go b/src/internal/task/task.go index 546f5ba117..3f2a36cda9 100644 --- a/src/internal/task/task.go +++ b/src/internal/task/task.go @@ -21,11 +21,19 @@ type Task struct { // state is the underlying running state of the task. state state + RunState uint8 + // DeferFrame stores a pointer to the (stack allocated) defer frame of the // goroutine that is used for the recover builtin. DeferFrame unsafe.Pointer } +const ( + RunStatePaused = iota + RunStateRunning + RunStateResuming +) + // DataUint32 returns the Data field as a uint32. The value is only valid after // setting it through SetDataUint32 or by storing to it using DataAtomicUint32. func (t *Task) DataUint32() uint32 { @@ -53,3 +61,11 @@ func runtime_alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer //go:linkname scheduleTask runtime.scheduleTask func scheduleTask(*Task) + +//go:linkname runtimePanic runtime.runtimePanic +func runtimePanic(str string) + +// Stack canary, to detect a stack overflow. The number is a random number +// generated by random.org. The bit fiddling dance is necessary because +// otherwise Go wouldn't allow the cast to a smaller integer size. +const stackCanary = uintptr(uint64(0x670c1333b83bf575) & uint64(^uintptr(0))) diff --git a/src/internal/task/task_asyncify.go b/src/internal/task/task_asyncify.go index 637a6b2237..bc4197995d 100644 --- a/src/internal/task/task_asyncify.go +++ b/src/internal/task/task_asyncify.go @@ -6,14 +6,6 @@ import ( "unsafe" ) -// Stack canary, to detect a stack overflow. The number is a random number -// generated by random.org. The bit fiddling dance is necessary because -// otherwise Go wouldn't allow the cast to a smaller integer size. -const stackCanary = uintptr(uint64(0x670c1333b83bf575) & uint64(^uintptr(0))) - -//go:linkname runtimePanic runtime.runtimePanic -func runtimePanic(str string) - // state is a structure which holds a reference to the state of the task. // When the task is suspended, the stack pointers are saved here. type state struct { diff --git a/src/internal/task/task_none.go b/src/internal/task/task_none.go index 280f1c4a81..8b667eec38 100644 --- a/src/internal/task/task_none.go +++ b/src/internal/task/task_none.go @@ -7,9 +7,6 @@ import "unsafe" // There is only one goroutine so the task struct can be a global. var mainTask Task -//go:linkname runtimePanic runtime.runtimePanic -func runtimePanic(str string) - func Pause() { runtimePanic("scheduler is disabled") } diff --git a/src/internal/task/task_stack.go b/src/internal/task/task_stack.go index 88a0970685..cbba8e3807 100644 --- a/src/internal/task/task_stack.go +++ b/src/internal/task/task_stack.go @@ -1,4 +1,4 @@ -//go:build scheduler.tasks +//go:build scheduler.tasks || scheduler.cores package task @@ -7,14 +7,6 @@ import ( "unsafe" ) -//go:linkname runtimePanic runtime.runtimePanic -func runtimePanic(str string) - -// Stack canary, to detect a stack overflow. The number is a random number -// generated by random.org. The bit fiddling dance is necessary because -// otherwise Go wouldn't allow the cast to a smaller integer size. -const stackCanary = uintptr(uint64(0x670c1333b83bf575) & uint64(^uintptr(0))) - // state is a structure which holds a reference to the state of the task. // When the task is suspended, the registers are stored onto the stack and the stack pointer is stored into sp. type state struct { @@ -33,40 +25,54 @@ type state struct { } // currentTask is the current running task, or nil if currently in the scheduler. -var currentTask *Task +//var currentTask *Task // Current returns the current active task. -func Current() *Task { - return currentTask +//func Current() *Task { +// return currentTask +//} + +//go:linkname Current runtime.currentTask +func Current() *Task + +//go:linkname schedulerLock runtime.schedulerLock +func schedulerLock() + +func Pause() { + schedulerLock() + PauseLocked() } // Pause suspends the current task and returns to the scheduler. // This function may only be called when running on a goroutine stack, not when running on the system stack or in an interrupt. -func Pause() { +func PauseLocked() { // Check whether the canary (the lowest address of the stack) is still // valid. If it is not, a stack overflow has occurred. - if *currentTask.state.canaryPtr != stackCanary { + if *Current().state.canaryPtr != stackCanary { runtimePanic("goroutine stack overflow") } if interrupt.In() { runtimePanic("blocked inside interrupt") } - currentTask.state.pause() + current := Current() + current.RunState = RunStatePaused + current.state.pause() } -//export tinygo_pause -func pause() { +//export tinygo_task_exit +func taskExit() { + println("-- exiting task") Pause() } // Resume the task until it pauses or completes. // This may only be called from the scheduler. func (t *Task) Resume() { - currentTask = t - t.gcData.swap() + //currentTask = t + //t.gcData.swap() t.state.resume() - t.gcData.swap() - currentTask = nil + //t.gcData.swap() + //currentTask = nil } // initialize the state and prepare to call the specified function with the specified argument bundle. diff --git a/src/internal/task/task_stack_cortexm.S b/src/internal/task/task_stack_cortexm.S index dfe713552d..8e6520106e 100644 --- a/src/internal/task/task_stack_cortexm.S +++ b/src/internal/task/task_stack_cortexm.S @@ -28,7 +28,7 @@ tinygo_startTask: blx r4 // After return, exit this goroutine. This is a tail call. - bl tinygo_pause + bl tinygo_task_exit .cfi_endproc .size tinygo_startTask, .-tinygo_startTask diff --git a/src/internal/task/task_stack_cortexm.go b/src/internal/task/task_stack_cortexm.go index 226a088c88..653dc06e17 100644 --- a/src/internal/task/task_stack_cortexm.go +++ b/src/internal/task/task_stack_cortexm.go @@ -1,4 +1,4 @@ -//go:build scheduler.tasks && cortexm +//go:build (scheduler.tasks || scheduler.cores) && cortexm package task diff --git a/src/internal/task/task_threads.c b/src/internal/task/task_threads.c new file mode 100644 index 0000000000..6ada95fa84 --- /dev/null +++ b/src/internal/task/task_threads.c @@ -0,0 +1,113 @@ +//go:build none + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +// BDWGC also uses SIGRTMIN+6 on Linux, which seems like a reasonable choice. +#ifdef __linux__ +#define taskPauseSignal (SIGRTMIN + 6) +#endif + +// Pointer to the current task.Task structure. +// Ideally the entire task.Task structure would be a thread-local variable but +// this also works. +static __thread void *current_task; + +struct state_pass { + void *(*start)(void*); + void *args; + void *task; + uintptr_t *stackTop; + sem_t startlock; +}; + +// Handle the GC pause in Go. +void tinygo_task_gc_pause(int sig); + +// Initialize the main thread. +void tinygo_task_init(void *mainTask, pthread_t *thread, int *numCPU, void *context) { + // Make sure the current task pointer is set correctly for the main + // goroutine as well. + current_task = mainTask; + + // Store the thread ID of the main thread. + *thread = pthread_self(); + + // Register the "GC pause" signal for the entire process. + // Using pthread_kill, we can still send the signal to a specific thread. + struct sigaction act = { 0 }; + act.sa_flags = SA_SIGINFO; + act.sa_handler = &tinygo_task_gc_pause; + sigaction(taskPauseSignal, &act, NULL); + + // Obtain the number of CPUs available on program start (for NumCPU). + int num = sysconf(_SC_NPROCESSORS_ONLN); + if (num <= 0) { + // Fallback in case there is an error. + num = 1; + } + *numCPU = num; +} + +void tinygo_task_exited(void*); + +// Helper to start a goroutine while also storing the 'task' structure. +static void* start_wrapper(void *arg) { + struct state_pass *state = arg; + void *(*start)(void*) = state->start; + void *args = state->args; + current_task = state->task; + + // Save the current stack pointer in the goroutine state, for the GC. + int stackAddr; + *(state->stackTop) = (uintptr_t)(&stackAddr); + + // Notify the caller that the thread has successfully started and + // initialized. + sem_post(&state->startlock); + + // Run the goroutine function. + start(args); + + // Notify the Go side this thread will exit. + tinygo_task_exited(current_task); + + return NULL; +}; + +// Start a new goroutine in an OS thread. +int tinygo_task_start(uintptr_t fn, void *args, void *task, pthread_t *thread, uintptr_t *stackTop, void *context) { + // Sanity check. Should get optimized away. + if (sizeof(pthread_t) != sizeof(void*)) { + __builtin_trap(); + } + + struct state_pass state = { + .start = (void*)fn, + .args = args, + .task = task, + .stackTop = stackTop, + }; + sem_init(&state.startlock, 0, 0); + int result = pthread_create(thread, NULL, &start_wrapper, &state); + + // Wait until the thread has been created and read all state_pass variables. + sem_wait(&state.startlock); + + return result; +} + +// Return the current task (for task.Current()). +void* tinygo_task_current(void) { + return current_task; +} + +// Send a signal to cause the task to pause for the GC mark phase. +void tinygo_task_send_gc_signal(pthread_t thread) { + pthread_kill(thread, taskPauseSignal); +} diff --git a/src/internal/task/task_threads.go b/src/internal/task/task_threads.go new file mode 100644 index 0000000000..327b2df46e --- /dev/null +++ b/src/internal/task/task_threads.go @@ -0,0 +1,268 @@ +//go:build scheduler.threads + +package task + +import ( + "sync/atomic" + "unsafe" +) + +// If true, print verbose debug logs. +const verbose = false + +// Scheduler-specific state. +type state struct { + // Goroutine ID. The number here is not really significant and after a while + // it could wrap around. But it is useful for debugging. + id uintptr + + // Thread ID, pthread_t or similar (typically implemented as a pointer). + thread threadID + + // Highest address of the stack. It is stored when the goroutine starts, and + // is needed to be able to scan the stack. + stackTop uintptr + + // Next task in the activeTasks queue. + QueueNext *Task + + // Semaphore to pause/resume the thread atomically. + pauseSem Semaphore + + // Semaphore used for stack scanning. + // We can't reuse pauseSem here since the thread might have been paused for + // other reasons (for example, because it was waiting on a channel). + gcSem Semaphore +} + +// Goroutine counter, starting at 0 for the main goroutine. +var goroutineID uintptr + +var numCPU int32 + +var mainTask Task + +// Queue of tasks (see QueueNext) that currently exist in the program. +var activeTasks = &mainTask +var activeTaskLock PMutex + +func OnSystemStack() bool { + runtimePanic("todo: task.OnSystemStack") + return false +} + +// Initialize the main goroutine state. Must be called by the runtime on +// startup, before starting any other goroutines. +func Init(sp uintptr) { + mainTask.state.stackTop = sp + tinygo_task_init(&mainTask, &mainTask.state.thread, &numCPU) +} + +// Return the task struct for the current thread. +func Current() *Task { + t := (*Task)(tinygo_task_current()) + if t == nil { + runtimePanic("unknown current task") + } + return t +} + +// Pause pauses the current task, until it is resumed by another task. +// It is possible that another task has called Resume() on the task before it +// hits Pause(), in which case the task won't be paused but continues +// immediately. +func Pause() { + // Wait until resumed + t := Current() + if verbose { + println("*** pause: ", t.state.id) + } + t.state.pauseSem.Wait() +} + +// Resume the given task. +// It is legal to resume a task before it gets paused, it means that the next +// call to Pause() won't pause but will continue immediately. This happens in +// practice sometimes in channel operations, where the Resume() might get called +// between the channel unlock and the call to Pause(). +func (t *Task) Resume() { + if verbose { + println("*** resume: ", t.state.id) + } + // Increment the semaphore counter. + // If the task is currently paused in Wait(), it will resume. + // If the task is not yet paused, the next call to Wait() will continue + // immediately. + t.state.pauseSem.Post() +} + +// Start a new OS thread. +func start(fn uintptr, args unsafe.Pointer, stackSize uintptr) { + t := &Task{} + t.state.id = atomic.AddUintptr(&goroutineID, 1) + if verbose { + println("*** start: ", t.state.id, "from", Current().state.id) + } + + // Start the new thread, and add it to the list of threads. + // Do this with a lock so that only started threads are part of the queue + // and the stop-the-world GC won't see threads that haven't started yet or + // are not fully started yet. + activeTaskLock.Lock() + errCode := tinygo_task_start(fn, args, t, &t.state.thread, &t.state.stackTop) + if errCode != 0 { + runtimePanic("could not start thread") + } + t.state.QueueNext = activeTasks + activeTasks = t + activeTaskLock.Unlock() +} + +//export tinygo_task_exited +func taskExited(t *Task) { + if verbose { + println("*** exit:", t.state.id) + } + + // Remove from the queue. + // TODO: this can be made more efficient by using a doubly linked list. + activeTaskLock.Lock() + found := false + for q := &activeTasks; *q != nil; q = &(*q).state.QueueNext { + if *q == t { + *q = t.state.QueueNext + found = true + break + } + } + activeTaskLock.Unlock() + + // Sanity check. + if !found { + runtimePanic("taskExited failed") + } +} + +// Futex to wait on until all tasks have finished scanning the stack. +// This is basically a sync.WaitGroup. +var scanDoneFutex Futex + +// GC scan phase. Because we need to stop the world while scanning, this kinda +// needs to be done in the tasks package. +func GCScan() { + current := Current() + + // Don't allow new goroutines to be started while pausing/resuming threads + // in the stop-the-world phase. + activeTaskLock.Lock() + + // Pause all other threads. + numOtherThreads := uint32(0) + for t := activeTasks; t != nil; t = t.state.QueueNext { + if t != current { + numOtherThreads++ + tinygo_task_send_gc_signal(t.state.thread) + } + } + + // Store the number of threads to wait for in the futex. + // This is the equivalent of doing an initial wg.Add(numOtherThreads). + scanDoneFutex.Store(numOtherThreads) + + // Scan the current stack, and all current registers. + scanCurrentStack() + + // Wake each paused thread for the first time so it will scan the stack. + for t := activeTasks; t != nil; t = t.state.QueueNext { + if t != current { + t.state.gcSem.Post() + } + } + + // Wait until all threads have finished scanning their stack. + // This is the equivalent of wg.Wait() + for { + val := scanDoneFutex.Load() + if val == 0 { + break + } + scanDoneFutex.Wait(val) + } + + // Scan all globals (implemented in the runtime). + gcScanGlobals() + + // Wake each paused thread for the second time, so they will resume normal + // operation. + for t := activeTasks; t != nil; t = t.state.QueueNext { + if t != current { + t.state.gcSem.Post() + } + } + + // Allow goroutines to start and exit again. + activeTaskLock.Unlock() +} + +// Scan globals, implemented in the runtime package. +func gcScanGlobals() + +var stackScanLock PMutex + +//export tinygo_task_gc_pause +func tingyo_task_gc_pause() { + // Wait until we get the signal to start scanning the stack. + Current().state.gcSem.Wait() + + // Scan the thread stack. + // Only scan a single thread stack at a time, because the GC marking phase + // doesn't support parallelism. + // TODO: it may be possible to call markRoots directly (without saving + // registers) since we are in a signal handler that already saved a bunch of + // registers. This is an optimization left for a future time. + stackScanLock.Lock() + scanCurrentStack() + stackScanLock.Unlock() + + // Equivalent of wg.Done(): subtract one from the futex and if the result is + // 0 (meaning we were the last in the waitgroup), wake the waiting thread. + n := uint32(1) + if scanDoneFutex.Add(-n) == 0 { + scanDoneFutex.Wake() + } + + // Wait until we get the signal we can resume normally (after the mark phase + // has finished). + Current().state.gcSem.Wait() +} + +//go:export tinygo_scanCurrentStack +func scanCurrentStack() + +// Return the highest address of the current stack. +func StackTop() uintptr { + return Current().state.stackTop +} + +// Using //go:linkname instead of //export so that we don't tell the compiler +// that the 't' parameter won't escape (because it will). +// +//go:linkname tinygo_task_init tinygo_task_init +func tinygo_task_init(t *Task, thread *threadID, numCPU *int32) + +// Here same as for tinygo_task_init. +// +//go:linkname tinygo_task_start tinygo_task_start +func tinygo_task_start(fn uintptr, args unsafe.Pointer, t *Task, thread *threadID, stackTop *uintptr) int32 + +// Pause the thread by sending it a signal. +// +//export tinygo_task_send_gc_signal +func tinygo_task_send_gc_signal(threadID) + +//export tinygo_task_current +func tinygo_task_current() unsafe.Pointer + +func NumCPU() int { + return int(numCPU) +} diff --git a/src/runtime/atomics_critical.go b/src/runtime/atomics_critical.go index 2d98881a10..edc6a82a3d 100644 --- a/src/runtime/atomics_critical.go +++ b/src/runtime/atomics_critical.go @@ -6,7 +6,6 @@ package runtime import ( - "runtime/interrupt" _ "unsafe" ) @@ -23,27 +22,27 @@ import ( func __atomic_load_2(ptr *uint16, ordering uintptr) uint16 { // The LLVM docs for this say that there is a val argument after the pointer. // That is a typo, and the GCC docs omit it. - mask := interrupt.Disable() + mask := atomicLock() val := *ptr - interrupt.Restore(mask) + atomicUnlock(mask) return val } //export __atomic_store_2 func __atomic_store_2(ptr *uint16, val uint16, ordering uintptr) { - mask := interrupt.Disable() + mask := atomicLock() *ptr = val - interrupt.Restore(mask) + atomicUnlock(mask) } //go:inline func doAtomicCAS16(ptr *uint16, expected, desired uint16) uint16 { - mask := interrupt.Disable() + mask := atomicLock() old := *ptr if old == expected { *ptr = desired } - interrupt.Restore(mask) + atomicUnlock(mask) return old } @@ -61,10 +60,10 @@ func __atomic_compare_exchange_2(ptr, expected *uint16, desired uint16, successO //go:inline func doAtomicSwap16(ptr *uint16, new uint16) uint16 { - mask := interrupt.Disable() + mask := atomicLock() old := *ptr *ptr = new - interrupt.Restore(mask) + atomicUnlock(mask) return old } @@ -80,11 +79,11 @@ func __atomic_exchange_2(ptr *uint16, new uint16, ordering uintptr) uint16 { //go:inline func doAtomicAdd16(ptr *uint16, value uint16) (old, new uint16) { - mask := interrupt.Disable() + mask := atomicLock() old = *ptr new = old + value *ptr = new - interrupt.Restore(mask) + atomicUnlock(mask) return old, new } @@ -112,27 +111,27 @@ func __atomic_add_fetch_2(ptr *uint16, value uint16, ordering uintptr) uint16 { func __atomic_load_4(ptr *uint32, ordering uintptr) uint32 { // The LLVM docs for this say that there is a val argument after the pointer. // That is a typo, and the GCC docs omit it. - mask := interrupt.Disable() + mask := atomicLock() val := *ptr - interrupt.Restore(mask) + atomicUnlock(mask) return val } //export __atomic_store_4 func __atomic_store_4(ptr *uint32, val uint32, ordering uintptr) { - mask := interrupt.Disable() + mask := atomicLock() *ptr = val - interrupt.Restore(mask) + atomicUnlock(mask) } //go:inline func doAtomicCAS32(ptr *uint32, expected, desired uint32) uint32 { - mask := interrupt.Disable() + mask := atomicLock() old := *ptr if old == expected { *ptr = desired } - interrupt.Restore(mask) + atomicUnlock(mask) return old } @@ -150,10 +149,10 @@ func __atomic_compare_exchange_4(ptr, expected *uint32, desired uint32, successO //go:inline func doAtomicSwap32(ptr *uint32, new uint32) uint32 { - mask := interrupt.Disable() + mask := atomicLock() old := *ptr *ptr = new - interrupt.Restore(mask) + atomicUnlock(mask) return old } @@ -169,11 +168,11 @@ func __atomic_exchange_4(ptr *uint32, new uint32, ordering uintptr) uint32 { //go:inline func doAtomicAdd32(ptr *uint32, value uint32) (old, new uint32) { - mask := interrupt.Disable() + mask := atomicLock() old = *ptr new = old + value *ptr = new - interrupt.Restore(mask) + atomicUnlock(mask) return old, new } @@ -201,27 +200,27 @@ func __atomic_add_fetch_4(ptr *uint32, value uint32, ordering uintptr) uint32 { func __atomic_load_8(ptr *uint64, ordering uintptr) uint64 { // The LLVM docs for this say that there is a val argument after the pointer. // That is a typo, and the GCC docs omit it. - mask := interrupt.Disable() + mask := atomicLock() val := *ptr - interrupt.Restore(mask) + atomicUnlock(mask) return val } //export __atomic_store_8 func __atomic_store_8(ptr *uint64, val uint64, ordering uintptr) { - mask := interrupt.Disable() + mask := atomicLock() *ptr = val - interrupt.Restore(mask) + atomicUnlock(mask) } //go:inline func doAtomicCAS64(ptr *uint64, expected, desired uint64) uint64 { - mask := interrupt.Disable() + mask := atomicLock() old := *ptr if old == expected { *ptr = desired } - interrupt.Restore(mask) + atomicUnlock(mask) return old } @@ -239,10 +238,10 @@ func __atomic_compare_exchange_8(ptr, expected *uint64, desired uint64, successO //go:inline func doAtomicSwap64(ptr *uint64, new uint64) uint64 { - mask := interrupt.Disable() + mask := atomicLock() old := *ptr *ptr = new - interrupt.Restore(mask) + atomicUnlock(mask) return old } @@ -258,11 +257,11 @@ func __atomic_exchange_8(ptr *uint64, new uint64, ordering uintptr) uint64 { //go:inline func doAtomicAdd64(ptr *uint64, value uint64) (old, new uint64) { - mask := interrupt.Disable() + mask := atomicLock() old = *ptr new = old + value *ptr = new - interrupt.Restore(mask) + atomicUnlock(mask) return old, new } diff --git a/src/runtime/baremetal-multicore.go b/src/runtime/baremetal-multicore.go new file mode 100644 index 0000000000..8395d8f735 --- /dev/null +++ b/src/runtime/baremetal-multicore.go @@ -0,0 +1,13 @@ +//go:build baremetal && !tinygo.unicore + +package runtime + +import "runtime/interrupt" + +func atomicLock() interrupt.State { + return atomicLockImpl() +} + +func atomicUnlock(mask interrupt.State) { + atomicUnlockImpl(mask) +} diff --git a/src/runtime/baremetal-unicore.go b/src/runtime/baremetal-unicore.go new file mode 100644 index 0000000000..17b8f10212 --- /dev/null +++ b/src/runtime/baremetal-unicore.go @@ -0,0 +1,13 @@ +//go:build baremetal && tinygo.unicore + +package runtime + +import "runtime/interrupt" + +func atomicLock() interrupt.State { + return interrupt.Disable() +} + +func atomicUnlock(mask interrupt.State) { + interrupt.Restore(mask) +} diff --git a/src/runtime/debug.go b/src/runtime/debug.go index 139e18bcd2..230515908f 100644 --- a/src/runtime/debug.go +++ b/src/runtime/debug.go @@ -1,14 +1,5 @@ package runtime -// NumCPU returns the number of logical CPUs usable by the current process. -// -// The set of available CPUs is checked by querying the operating system -// at process startup. Changes to operating system CPU allocation after -// process startup are not reflected. -func NumCPU() int { - return 1 -} - // Stub for NumCgoCall, does not return the real value func NumCgoCall() int { return 0 diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go index d58bfd92a2..bce87ef271 100644 --- a/src/runtime/gc_blocks.go +++ b/src/runtime/gc_blocks.go @@ -57,6 +57,7 @@ var ( gcMallocs uint64 // total number of allocations gcFrees uint64 // total number of objects freed gcFreedBlocks uint64 // total number of freed blocks + gcLock task.PMutex // lock to avoid race conditions on multicore systems ) // zeroSizedAlloc is just a sentinel that gets returned when allocating 0 bytes. @@ -317,6 +318,10 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { runtimePanicAt(returnAddress(0), "heap alloc in interrupt") } + // Make sure there are no concurrent allocations. The heap is not currently + // designed for concurrent alloc/GC. + gcLock.Lock() + gcTotalAlloc += uint64(size) gcMallocs++ @@ -399,6 +404,9 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { i.setState(blockStateTail) } + // We've claimed this allocation, now we can unlock the heap. + gcLock.Unlock() + // Return a pointer to this allocation. pointer := thisAlloc.pointer() if preciseHeap { @@ -444,7 +452,9 @@ func free(ptr unsafe.Pointer) { // GC performs a garbage collection cycle. func GC() { + gcLock.Lock() runGC() + gcLock.Unlock() } // runGC performs a garbage collection cycle. It is the internal implementation @@ -456,8 +466,7 @@ func runGC() (freeBytes uintptr) { } // Mark phase: mark all reachable objects, recursively. - markStack() - findGlobals(markRoots) + gcMarkReachable() if baremetal && hasScheduler { // Channel operations in interrupts may move task pointers around while we are marking. @@ -714,6 +723,7 @@ func dumpHeap() { // The returned memory statistics are up to date as of the // call to ReadMemStats. This would not do GC implicitly for you. func ReadMemStats(m *MemStats) { + gcLock.Lock() m.HeapIdle = 0 m.HeapInuse = 0 for block := gcBlock(0); block < endBlock; block++ { @@ -733,6 +743,7 @@ func ReadMemStats(m *MemStats) { m.Sys = uint64(heapEnd - heapStart) m.HeapAlloc = (gcTotalBlocks - gcFreedBlocks) * uint64(bytesPerBlock) m.Alloc = m.HeapAlloc + gcLock.Unlock() } func SetFinalizer(obj interface{}, finalizer interface{}) { diff --git a/src/runtime/gc_stack_portable.go b/src/runtime/gc_stack_portable.go index d35e16e30c..750a34ec2c 100644 --- a/src/runtime/gc_stack_portable.go +++ b/src/runtime/gc_stack_portable.go @@ -8,6 +8,11 @@ import ( "unsafe" ) +func gcMarkReachable() { + markStack() + findGlobals(markRoots) +} + //go:extern runtime.stackChainStart var stackChainStart *stackChainObject diff --git a/src/runtime/gc_stack_raw.go b/src/runtime/gc_stack_raw.go index 5ee18622db..f4483e58c5 100644 --- a/src/runtime/gc_stack_raw.go +++ b/src/runtime/gc_stack_raw.go @@ -1,9 +1,14 @@ -//go:build (gc.conservative || gc.precise) && !tinygo.wasm +//go:build (gc.conservative || gc.precise) && !tinygo.wasm && !scheduler.threads && !scheduler.cores package runtime import "internal/task" +func gcMarkReachable() { + markStack() + findGlobals(markRoots) +} + // markStack marks all root pointers found on the stack. // // This implementation is conservative and relies on the stack top (provided by diff --git a/src/runtime/gc_stack_threads.go b/src/runtime/gc_stack_threads.go new file mode 100644 index 0000000000..9c77fa0c7b --- /dev/null +++ b/src/runtime/gc_stack_threads.go @@ -0,0 +1,25 @@ +//go:build scheduler.threads + +package runtime + +import "internal/task" + +func gcMarkReachable() { + task.GCScan() +} + +// Scan globals inside the stop-the-world phase. Called from the STW +// implementation in the internal/task package. +// +//go:linkname gcScanGlobals internal/task.gcScanGlobals +func gcScanGlobals() { + findGlobals(markRoots) +} + +// Function called from assembly with all registers pushed, to actually scan the +// stack. +// +//go:export tinygo_scanstack +func scanstack(sp uintptr) { + markRoots(sp, task.StackTop()) +} diff --git a/src/runtime/print.go b/src/runtime/print.go index a4de460253..ef5c8a7dcc 100644 --- a/src/runtime/print.go +++ b/src/runtime/print.go @@ -13,11 +13,15 @@ type stringer interface { // This is a no-op lock on systems that do not have parallelism. var printLock task.PMutex +//var printLocked interrupt.State + func printlock() { + //printLocked = serialLock() printLock.Lock() } func printunlock() { + //serialUnlock(printLocked) printLock.Unlock() } diff --git a/src/runtime/runtime_nrf.go b/src/runtime/runtime_nrf.go index 729c6bb20f..30a6cda9cb 100644 --- a/src/runtime/runtime_nrf.go +++ b/src/runtime/runtime_nrf.go @@ -15,6 +15,8 @@ type timeUnit int64 //go:linkname systemInit SystemInit func systemInit() +const numCPU = 1 + //export Reset_Handler func main() { if nrf.FPUPresent { @@ -145,3 +147,31 @@ func rtc_sleep(ticks uint32) { waitForEvents() } } + +func atomicLockImpl() interrupt.State { + mask := interrupt.Disable() + return mask +} + +func atomicUnlockImpl(mask interrupt.State) { + interrupt.Restore(mask) +} + +func futexLock() interrupt.State { + mask := interrupt.Disable() + return mask +} + +func futexUnlock(mask interrupt.State) { + interrupt.Restore(mask) +} + +func schedulerLock() { +} + +func schedulerUnlock() { +} + +func currentCPU() uint32 { + return 0 +} diff --git a/src/runtime/runtime_rp2040.go b/src/runtime/runtime_rp2040.go index 1d36a771e5..3c9e8935a0 100644 --- a/src/runtime/runtime_rp2040.go +++ b/src/runtime/runtime_rp2040.go @@ -4,8 +4,13 @@ package runtime import ( "device/arm" + "device/rp" + "internal/task" "machine" "machine/usb/cdc" + "reflect" + "runtime/interrupt" + "unsafe" ) // machineTicks is provided by package machine. @@ -16,6 +21,8 @@ func machineLightSleep(uint64) type timeUnit int64 +const numCPU = 2 + // ticks returns the number of ticks (microseconds) elapsed since power up. func ticks() timeUnit { t := machineTicks() @@ -50,14 +57,18 @@ func waitForEvents() { } func putchar(c byte) { + //mask := serialLock() machine.Serial.WriteByte(c) + //serialUnlock(mask) } func getchar() byte { + mask := serialLock() for machine.Serial.Buffered() == 0 { Gosched() } v, _ := machine.Serial.ReadByte() + serialUnlock(mask) return v } @@ -71,9 +82,11 @@ func machineInit() func init() { machineInit() + mask := serialLock() cdc.EnableUSBCDC() machine.USBDev.Configure(machine.UARTConfig{}) machine.InitSerial() + serialUnlock(mask) } //export Reset_Handler @@ -82,3 +95,174 @@ func main() { run() exit(0) } + +func multicore_fifo_rvalid() bool { + return rp.SIO.FIFO_ST.Get()&rp.SIO_FIFO_ST_VLD != 0 +} + +func multicore_fifo_wready() bool { + return rp.SIO.FIFO_ST.Get()&rp.SIO_FIFO_ST_RDY != 0 +} + +func multicore_fifo_drain() { + for multicore_fifo_rvalid() { + rp.SIO.FIFO_RD.Get() + } +} + +func multicore_fifo_push_blocking(data uint32) { + for !multicore_fifo_wready() { + } + rp.SIO.FIFO_WR.Set(data) + arm.Asm("sev") +} + +func multicore_fifo_pop_blocking() uint32 { + for !multicore_fifo_rvalid() { + arm.Asm("wfe") + } + + return rp.SIO.FIFO_RD.Get() +} + +//go:extern __isr_vector +var __isr_vector [0]uint32 + +//go:extern _stack1_top +var _stack1_top [0]uint32 + +var core1StartSequence = [...]uint32{ + 0, 0, 1, + uint32(uintptr(unsafe.Pointer(&__isr_vector))), + uint32(uintptr(unsafe.Pointer(&_stack1_top))), + uint32(uintptr(reflect.ValueOf(runCore1).Pointer())), +} + +func startSecondaryCores() { + // Start the second core of the RP2040. + // See section 2.8.2 in the datasheet. + seq := 0 + for { + cmd := core1StartSequence[seq] + if cmd == 0 { + multicore_fifo_drain() + arm.Asm("sev") + } + multicore_fifo_push_blocking(cmd) + response := multicore_fifo_pop_blocking() + if cmd != response { + seq = 0 + continue + } + seq = seq + 1 + if seq >= len(core1StartSequence) { + break + } + } +} + +var core1Task task.Task + +func runCore1() { + //until := ticks() + nanosecondsToTicks(1900e6) + //for ticks() < until { + //} + println("starting core 1") + + runSecondary(1, &core1Task) + + // Just blink a LED to show that this core is running. + // TODO: use a real scheduler. + //led := machine.GP16 + //led.Configure(machine.PinConfig{Mode: machine.PinOutput}) + //const cycles = 7000_000 + //for { + // for i := 0; i < cycles; i++ { + // led.Low() + // } + + // for i := 0; i < cycles; i++ { + // led.High() + // } + //} +} + +func currentCPU() uint32 { + return rp.SIO.CPUID.Get() +} + +const ( + spinlockAtomic = iota + spinlockFutex + spinlockScheduler +) + +func atomicLockImpl() interrupt.State { + mask := interrupt.Disable() + for rp.SIO.SPINLOCK0.Get() == 0 { + } + return mask +} + +func atomicUnlockImpl(mask interrupt.State) { + rp.SIO.SPINLOCK0.Set(0) + interrupt.Restore(mask) +} + +func futexLock() interrupt.State { + // Disable interrupts. + // This is necessary since we might do some futex operations (like Wake) + // inside an interrupt and we don't want to deadlock with a non-interrupt + // goroutine that has taken the spinlock at the same time. + mask := interrupt.Disable() + + // Acquire the spinlock. + for rp.SIO.SPINLOCK1.Get() == 0 { + // Spin, until the lock is released. + } + + return mask +} + +func futexUnlock(mask interrupt.State) { + // Release the spinlock. + rp.SIO.SPINLOCK1.Set(0) + + // Restore interrupts. + interrupt.Restore(mask) +} + +var schedulerLockMasks [numCPU]interrupt.State +var schedulerLocked = false + +// WARNING: doesn't check for deadlocks! +func schedulerLock() { + //schedulerLockMasks[currentCPU()] = interrupt.Disable() + for rp.SIO.SPINLOCK2.Get() == 0 { + } + schedulerLocked = true +} + +func schedulerUnlock() { + if !schedulerLocked { + println("!!! not locked at unlock") + for { + } + } + schedulerLocked = false + rp.SIO.SPINLOCK2.Set(0) + //interrupt.Restore(schedulerLockMasks[currentCPU()]) +} + +func serialLock() interrupt.State { + //mask := interrupt.Disable() + for rp.SIO.SPINLOCK3.Get() == 0 { + } + //return mask + return 0 +} + +func serialUnlock(mask interrupt.State) { + rp.SIO.SPINLOCK3.Set(0) + //interrupt.Restore(mask) +} diff --git a/src/runtime/runtime_unix.go b/src/runtime/runtime_unix.go index 08e3e74269..fd71d7c488 100644 --- a/src/runtime/runtime_unix.go +++ b/src/runtime/runtime_unix.go @@ -11,6 +11,12 @@ import ( "unsafe" ) +const numCPU = 1 + +func currentCPU() int { + return 0 +} + //export write func libc_write(fd int32, buf unsafe.Pointer, count uint) int @@ -73,6 +79,7 @@ type timespec struct { tv_nsec int64 // unsigned 64-bit integer on all time64 platforms } +// Highest address of the stack of the main thread. var stackTop uintptr // Entry point for Go. Initialize all packages and call main.main(). diff --git a/src/runtime/scheduler.go b/src/runtime/scheduler.go index 727c7f5f2c..7461c966ed 100644 --- a/src/runtime/scheduler.go +++ b/src/runtime/scheduler.go @@ -6,6 +6,8 @@ const schedulerDebug = false var mainExited bool +var timerQueue *timerNode + // Simple logging, for debugging. func scheduleLog(msg string) { if schedulerDebug { @@ -27,6 +29,34 @@ func scheduleLogChan(msg string, ch *channel, t *task.Task) { } } +func timerQueueAdd(tn *timerNode) { + q := &timerQueue + for ; *q != nil; q = &(*q).next { + if tn.whenTicks() < (*q).whenTicks() { + // this will finish earlier than the next - insert here + break + } + } + tn.next = *q + *q = tn +} + +func timerQueueRemove(t *timer) bool { + removedTimer := false + for q := &timerQueue; *q != nil; q = &(*q).next { + if (*q).timer == t { + scheduleLog("removed timer") + *q = (*q).next + removedTimer = true + break + } + } + if !removedTimer { + scheduleLog("did not remove timer") + } + return removedTimer +} + // Goexit terminates the currently running goroutine. No other goroutines are affected. func Goexit() { panicOrGoexit(nil, panicGoexit) diff --git a/src/runtime/scheduler_cooperative.go b/src/runtime/scheduler_cooperative.go index 91ba86409f..bffda72284 100644 --- a/src/runtime/scheduler_cooperative.go +++ b/src/runtime/scheduler_cooperative.go @@ -32,7 +32,6 @@ var ( runqueue task.Queue sleepQueue *task.Task sleepQueueBaseTime timeUnit - timerQueue *timerNode ) // deadlock is called when a goroutine cannot proceed any more, but is in theory @@ -58,6 +57,11 @@ func Gosched() { task.Pause() } +// NumCPU returns the number of logical CPUs usable by the current process. +func NumCPU() int { + return 1 +} + // Add this task to the sleep queue, assuming its state is set to sleeping. func addSleepTask(t *task.Task, duration timeUnit) { if schedulerDebug { @@ -100,36 +104,15 @@ func addSleepTask(t *task.Task, duration timeUnit) { // sleepQueue. func addTimer(tim *timerNode) { mask := interrupt.Disable() - - // Add to timer queue. - q := &timerQueue - for ; *q != nil; q = &(*q).next { - if tim.whenTicks() < (*q).whenTicks() { - // this will finish earlier than the next - insert here - break - } - } - tim.next = *q - *q = tim + timerQueueAdd(tim) interrupt.Restore(mask) } // removeTimer is the implementation of time.stopTimer. It removes a timer from // the timer queue, returning true if the timer is present in the timer queue. func removeTimer(tim *timer) bool { - removedTimer := false mask := interrupt.Disable() - for t := &timerQueue; *t != nil; t = &(*t).next { - if (*t).timer == tim { - scheduleLog("removed timer") - *t = (*t).next - removedTimer = true - break - } - } - if !removedTimer { - scheduleLog("did not remove timer") - } + removedTimer := timerQueueRemove(tim) interrupt.Restore(mask) return removedTimer } diff --git a/src/runtime/scheduler_cores.go b/src/runtime/scheduler_cores.go new file mode 100644 index 0000000000..4a664acf80 --- /dev/null +++ b/src/runtime/scheduler_cores.go @@ -0,0 +1,262 @@ +//go:build scheduler.cores + +package runtime + +import ( + "device/arm" + "internal/task" +) + +const hasScheduler = true + +const hasParallelism = true + +const coresVerbose = false + +var ( + mainTask task.Task + cpuTasks [numCPU]*task.Task + sleepQueue *task.Task + runQueue *task.Task +) + +func deadlock() { + // Call yield without requesting a wakeup. + task.Pause() + trap() +} + +func scheduleTask(t *task.Task) { + schedulerLock() + switch t.RunState { + case task.RunStatePaused: + // Paused, state is saved on the stack. + + if coresVerbose { + println("## schedule: add to runQueue") + } + addToRunQueue(t) + arm.Asm("sev") + case task.RunStateRunning: + // Not yet paused (probably going to pause very soon), so let the + // Pause() function know it can resume immediately. + t.RunState = task.RunStateResuming + if coresVerbose { + println("## schedule: mark as resuming") + } + default: + println("Unknown run state??") + for { + } + } + schedulerUnlock() +} + +// Add task to runQueue. +// Scheduler lock must be held when calling this function. +func addToRunQueue(t *task.Task) { + t.Next = runQueue + runQueue = t +} + +func addSleepTask(t *task.Task, wakeup timeUnit) { + // Save the timestamp when the task should be woken up. + t.Data = uint64(wakeup) + + // Find the position where we should insert this task in the queue. + q := &sleepQueue + for { + if *q == nil { + // Found the end of the time queue. Insert it here, at the end. + break + } + if timeUnit((*q).Data) > timeUnit(t.Data) { + // Found a task in the queue that has a timeout before the + // to-be-sleeping task. Insert our task right before. + break + } + q = &(*q).Next + } + + // Insert the task into the queue (this could be at the end, if *q is nil). + t.Next = *q + *q = t +} + +func Gosched() { + // TODO +} + +// NumCPU returns the number of logical CPUs usable by the current process. +func NumCPU() int { + // Return the hardcoded number of physical CPU cores. + return numCPU +} + +func addTimer(tn *timerNode) { + runtimePanic("todo: timers") +} + +func removeTimer(t *timer) bool { + runtimePanic("todo: timers") + return false +} + +func schedulerRunQueue() *task.Queue { + println("todo: schedulerRunQueue") + for { + } + return nil +} + +// Pause the current task for a given time. +// +//go:linkname sleep time.Sleep +func sleep(duration int64) { + if duration <= 0 { + return + } + + wakeup := ticks() + nanosecondsToTicks(duration) + + schedulerLock() + addSleepTask(task.Current(), wakeup) + task.PauseLocked() +} + +func run() { + initHeap() + cpuTasks[0] = &mainTask + initAll() // TODO: move into main goroutine! + + until := ticks() + nanosecondsToTicks(200e6) + for ticks() < until { + } + println("\n\n=====") + + go func() { + //initAll() + startSecondaryCores() + callMain() + mainExited = true + }() + schedulerLock() + scheduler() +} + +func runSecondary(core uint32, t *task.Task) { + println("-- runSecondary") + cpuTasks[core] = t + println("-- locking for 2nd core") + schedulerLock() + println("-- locked!") + scheduler() +} + +var schedulerIsRunning = false + +func scheduler() { + if coresVerbose { + println("** scheduler on core:", currentCPU()) + } + for { + //until := ticks() + nanosecondsToTicks(100e6) + //for ticks() < until { + //} + + // Check for ready-to-run tasks. + if runnable := runQueue; runnable != nil { + if coresVerbose { + println("** scheduler", currentCPU(), "run runnable") + } + // Pop off the run queue. + runQueue = runnable.Next + runnable.Next = nil + + // Resume it now. + setCurrentTask(runnable) + schedulerUnlock() + runnable.Resume() + if coresVerbose { + println("** scheduler", currentCPU(), " returned (from runqueue resume)") + } + setCurrentTask(nil) + + continue + } + + // If another core is using the clock, let it handle the sleep queue. + if schedulerIsRunning { + if coresVerbose { + println("** scheduler", currentCPU(), "wait for other core") + } + schedulerUnlock() + waitForEvents() + schedulerLock() + continue + } + + if sleepingTask := sleepQueue; sleepingTask != nil { + now := ticks() + if now >= timeUnit(sleepingTask.Data) { + if coresVerbose { + println("** scheduler", currentCPU(), "run sleeping") + } + // This task is done sleeping. + // Resume it now. + sleepQueue = sleepQueue.Next + sleepingTask.Next = nil + + setCurrentTask(sleepingTask) + schedulerUnlock() + sleepingTask.Resume() + if coresVerbose { + println("** scheduler", currentCPU(), " returned (from sleepQueue resume)") + } + setCurrentTask(nil) + continue + } + + delay := timeUnit(sleepingTask.Data) - now + if coresVerbose { + println("** scheduler", currentCPU(), "sleep", ticksToNanoseconds(delay)/1e6) + } + + // Sleep for a bit until the next task is ready to run. + schedulerIsRunning = true + schedulerUnlock() + sleepTicks(delay) + schedulerLock() + schedulerIsRunning = false + continue + } + + if coresVerbose { + println("** scheduler", currentCPU(), "wait for events") + } + schedulerUnlock() + waitForEvents() + schedulerLock() + } +} + +func currentTask() *task.Task { + return cpuTasks[currentCPU()] +} + +func setCurrentTask(task *task.Task) { + cpuTasks[currentCPU()] = task +} + +func runtimeTicks() uint64 { + return uint64(ticks()) +} + +func runtimeSleepTicks(delay uint64) { + sleepTicks(timeUnit(delay)) +} + +//export tinygo_schedulerUnlock +func tinygo_schedulerUnlock() { + schedulerUnlock() +} diff --git a/src/runtime/scheduler_none.go b/src/runtime/scheduler_none.go index a5acfd4309..7e2ddeb668 100644 --- a/src/runtime/scheduler_none.go +++ b/src/runtime/scheduler_none.go @@ -40,6 +40,11 @@ func Gosched() { // There are no other goroutines, so there's nothing to schedule. } +// NumCPU returns the number of logical CPUs usable by the current process. +func NumCPU() int { + return 1 +} + func addTimer(tim *timerNode) { runtimePanic("timers not supported without a scheduler") } diff --git a/src/runtime/scheduler_threads.go b/src/runtime/scheduler_threads.go new file mode 100644 index 0000000000..292499354e --- /dev/null +++ b/src/runtime/scheduler_threads.go @@ -0,0 +1,129 @@ +//go:build scheduler.threads + +package runtime + +import "internal/task" + +const hasScheduler = false // not using the cooperative scheduler + +// We use threads, so yes there is parallelism. +const hasParallelism = true + +var ( + timerQueueLock task.PMutex + timerQueueStarted bool + timerFutex task.Futex +) + +// Because we just use OS threads, we don't need to do anything special here. We +// can just initialize everything and run main.main on the main thread. +func run() { + initHeap() + task.Init(stackTop) + initAll() + callMain() +} + +// Pause the current task for a given time. +// +//go:linkname sleep time.Sleep +func sleep(duration int64) { + if duration <= 0 { + return + } + + sleepTicks(nanosecondsToTicks(duration)) +} + +func deadlock() { + // TODO: exit the thread via pthread_exit. + task.Pause() +} + +func scheduleTask(t *task.Task) { + t.Resume() +} + +func Gosched() { + // Each goroutine runs in a thread, so there's not much we can do here. + // There is sched_yield but it's only really intended for realtime + // operation, so is probably best not to use. +} + +// NumCPU returns the number of logical CPUs usable by the current process. +func NumCPU() int { + return task.NumCPU() +} + +// Separate goroutine (thread) that runs timer callbacks when they expire. +func timerRunner() { + for { + timerQueueLock.Lock() + + if timerQueue == nil { + // No timer in the queue, so wait until one becomes available. + val := timerFutex.Load() + timerQueueLock.Unlock() + timerFutex.Wait(val) + continue + } + + now := ticks() + if now < timerQueue.whenTicks() { + // There is a timer in the queue, but we need to wait until it + // expires. + // Using a futex, so that the wait is exited early when adding a new + // (sooner-to-expire) timer. + val := timerFutex.Load() + timerQueueLock.Unlock() + timeout := ticksToNanoseconds(timerQueue.whenTicks() - now) + timerFutex.WaitUntil(val, uint64(timeout)) + continue + } + + // Pop timer from queue. + tn := timerQueue + timerQueue = tn.next + tn.next = nil + + timerQueueLock.Unlock() + + // Run the callback stored in this timer node. + delay := ticksToNanoseconds(now - tn.whenTicks()) + tn.callback(tn, delay) + } +} + +func addTimer(tim *timerNode) { + timerQueueLock.Lock() + + if !timerQueueStarted { + timerQueueStarted = true + go timerRunner() + } + + timerQueueAdd(tim) + + timerFutex.Add(1) + timerFutex.Wake() + + timerQueueLock.Unlock() +} + +func removeTimer(tim *timer) bool { + timerQueueLock.Lock() + removed := timerQueueRemove(tim) + timerQueueLock.Unlock() + return removed +} + +func schedulerRunQueue() *task.Queue { + // This function is not actually used, it is only called when hasScheduler + // is true. So we can just return nil here. + return nil +} + +func runqueueForGC() *task.Queue { + // There is only a runqueue when using the cooperative scheduler. + return nil +} diff --git a/src/sync/mutex.go b/src/sync/mutex.go index 08c674d7ea..890af78606 100644 --- a/src/sync/mutex.go +++ b/src/sync/mutex.go @@ -6,131 +6,96 @@ import ( type Mutex = task.Mutex -type RWMutex struct { - // waitingWriters are all of the tasks waiting for write locks. - waitingWriters task.Stack - - // waitingReaders are all of the tasks waiting for a read lock. - waitingReaders task.Stack +//go:linkname runtimePanic runtime.runtimePanic +func runtimePanic(msg string) - // state is the current state of the RWMutex. - // Iff the mutex is completely unlocked, it contains rwMutexStateUnlocked (aka 0). - // Iff the mutex is write-locked, it contains rwMutexStateWLocked. - // While the mutex is read-locked, it contains the current number of readers. - state uint32 +type RWMutex struct { + // Reader count, with the number of readers that currently have read-locked + // this mutex. + // The value can be in two states: one where 0 means no readers and another + // where -rwMutexMaxReaders means no readers. A base of 0 is normal + // uncontended operation, a base of -rwMutexMaxReaders means a writer has + // the lock or is trying to get the lock. In the second case, readers should + // wait until the reader count becomes non-negative again to give the writer + // a chance to obtain the lock. + readers task.Futex + + // Writer futex, normally 0. If there is a writer waiting until all readers + // have unlocked, this value is 1. It will be changed to a 2 (and get a + // wake) when the last reader unlocks. + writer task.Futex + + // Writer lock. Held between Lock() and Unlock(). + writerLock Mutex } -const ( - rwMutexStateUnlocked = uint32(0) - rwMutexStateWLocked = ^uint32(0) - rwMutexMaxReaders = rwMutexStateWLocked - 1 -) +const rwMutexMaxReaders = 1 << 30 func (rw *RWMutex) Lock() { - if rw.state == 0 { - // The mutex is completely unlocked. - // Lock without waiting. - rw.state = rwMutexStateWLocked + // Exclusive lock for writers. + rw.writerLock.Lock() + + // Flag that we need to be awakened after the last read-lock unlocks. + rw.writer.Store(1) + + // Signal to readers that they can't lock this mutex anymore. + n := uint32(rwMutexMaxReaders) + waiting := rw.readers.Add(-n) + if int32(waiting) == -rwMutexMaxReaders { + // All readers were already unlocked, so we don't need to wait for them. + rw.writer.Store(0) return } - // Wait for the lock to be released. - rw.waitingWriters.Push(task.Current()) - task.Pause() + // There is at least one reader. + // Wait until all readers are unlocked. The last reader to unlock will set + // rw.writer to 2 and awaken us. + for rw.writer.Load() == 1 { + rw.writer.Wait(1) + } + rw.writer.Store(0) } func (rw *RWMutex) Unlock() { - switch rw.state { - case rwMutexStateWLocked: - // This is correct. - - case rwMutexStateUnlocked: - // The mutex is already unlocked. - panic("sync: unlock of unlocked RWMutex") - - default: - // The mutex is read-locked instead of write-locked. - panic("sync: write-unlock of read-locked RWMutex") + // Signal that new readers can lock this mutex. + waiting := rw.readers.Add(rwMutexMaxReaders) + if waiting != 0 { + // Awaken all waiting readers. + rw.readers.WakeAll() } - switch { - case rw.maybeUnblockReaders(): - // Switched over to read mode. - - case rw.maybeUnblockWriter(): - // Transferred to another writer. - - default: - // Nothing is waiting for the lock. - rw.state = rwMutexStateUnlocked - } + // Done with this lock (next writer can try to get a lock). + rw.writerLock.Unlock() } func (rw *RWMutex) RLock() { - if rw.state == rwMutexStateWLocked { - // Wait for the write lock to be released. - rw.waitingReaders.Push(task.Current()) - task.Pause() - return - } + // Add us as a reader. + newVal := rw.readers.Add(1) - if rw.state == rwMutexMaxReaders { - panic("sync: too many readers on RWMutex") + // Wait until the RWMutex is available for readers. + for int32(newVal) <= 0 { + rw.readers.Wait(newVal) + newVal = rw.readers.Load() } - - // Increase the reader count. - rw.state++ } func (rw *RWMutex) RUnlock() { - switch rw.state { - case rwMutexStateUnlocked: - // The mutex is already unlocked. - panic("sync: unlock of unlocked RWMutex") - - case rwMutexStateWLocked: - // The mutex is write-locked instead of read-locked. - panic("sync: read-unlock of write-locked RWMutex") - } - - rw.state-- + // Remove us as a reader. + one := uint32(1) + readers := int32(rw.readers.Add(-one)) - if rw.state == rwMutexStateUnlocked { - // This was the last reader. - // Try to unblock a writer. - rw.maybeUnblockWriter() + // Check whether RUnlock was called too often. + if readers == -1 || readers == (-rwMutexMaxReaders)-1 { + runtimePanic("sync: RUnlock of unlocked RWMutex") } -} -func (rw *RWMutex) maybeUnblockReaders() bool { - var n uint32 - for { - t := rw.waitingReaders.Pop() - if t == nil { - break + if readers == -rwMutexMaxReaders { + // This was the last read lock. Check whether we need to wake up a write + // lock. + if rw.writer.CompareAndSwap(1, 2) { + rw.writer.Wake() } - - n++ - scheduleTask(t) - } - if n == 0 { - return false } - - rw.state = n - return true -} - -func (rw *RWMutex) maybeUnblockWriter() bool { - t := rw.waitingWriters.Pop() - if t == nil { - return false - } - - rw.state = rwMutexStateWLocked - scheduleTask(t) - - return true } type Locker interface { diff --git a/targets/arm.ld b/targets/arm.ld index cdf5b1dd43..631ec9cab2 100644 --- a/targets/arm.ld +++ b/targets/arm.ld @@ -32,6 +32,15 @@ SECTIONS _stack_top = .; } >RAM + /* Stack for second core (core 1), if there is one. This memory area won't be + * reserved if there is no second core. */ + .stack1 (NOLOAD) : + { + . = ALIGN(4); + . += _stack_size; + _stack1_top = .; + } >RAM + /* Start address (in flash) of .data, used by startup code. */ _sidata = LOADADDR(.data); diff --git a/testdata/channel.go b/testdata/channel.go index 9c0fee5b73..a247c808f2 100644 --- a/testdata/channel.go +++ b/testdata/channel.go @@ -12,6 +12,7 @@ var wg sync.WaitGroup type intchan chan int func main() { + time.Sleep(time.Second * 2) ch := make(chan int, 2) ch <- 1 println("len, cap of channel:", len(ch), cap(ch), ch == nil) diff --git a/testdata/goroutines.go b/testdata/goroutines.go index cf19cc3ca0..bed2ecb1a7 100644 --- a/testdata/goroutines.go +++ b/testdata/goroutines.go @@ -5,19 +5,27 @@ import ( "time" ) -func init() { - println("init") - go println("goroutine in init") - time.Sleep(1 * time.Millisecond) -} +//func init() { +// println("\n----") +// println("init") +// go println("goroutine in init") +// time.Sleep(1 * time.Millisecond) +//} func main() { + //for i := 0; i < 2; i++ { + // println("...") + // time.Sleep(time.Second) + //} + println("main 1") go sub() time.Sleep(1 * time.Millisecond) println("main 2") time.Sleep(2 * time.Millisecond) println("main 3") + //time.Sleep(2 * time.Millisecond) + //println("main 4") // Await a blocking call. println("wait:") @@ -101,6 +109,8 @@ func sub() { println("sub 1") time.Sleep(2 * time.Millisecond) println("sub 2") + //time.Sleep(2 * time.Millisecond) + //println("sub 3") } func wait() { diff --git a/tools/gen-critical-atomics/gen-critical-atomics.go b/tools/gen-critical-atomics/gen-critical-atomics.go index 75ea327076..f949a7933a 100644 --- a/tools/gen-critical-atomics/gen-critical-atomics.go +++ b/tools/gen-critical-atomics/gen-critical-atomics.go @@ -26,7 +26,6 @@ package runtime import ( _ "unsafe" - "runtime/interrupt" ) // Documentation: @@ -41,29 +40,29 @@ import ( func __atomic_load_{{.}}(ptr *uint{{$bits}}, ordering uintptr) uint{{$bits}} { // The LLVM docs for this say that there is a val argument after the pointer. // That is a typo, and the GCC docs omit it. - mask := interrupt.Disable() + mask := atomicLock() val := *ptr - interrupt.Restore(mask) + atomicUnlock(mask) return val } {{end}} {{- define "store"}}{{$bits := mul . 8 -}} //export __atomic_store_{{.}} func __atomic_store_{{.}}(ptr *uint{{$bits}}, val uint{{$bits}}, ordering uintptr) { - mask := interrupt.Disable() + mask := atomicLock() *ptr = val - interrupt.Restore(mask) + atomicUnlock(mask) } {{end}} {{- define "cas"}}{{$bits := mul . 8 -}} //go:inline func doAtomicCAS{{$bits}}(ptr *uint{{$bits}}, expected, desired uint{{$bits}}) uint{{$bits}} { - mask := interrupt.Disable() + mask := atomicLock() old := *ptr if old == expected { *ptr = desired } - interrupt.Restore(mask) + atomicUnlock(mask) return old } @@ -82,10 +81,10 @@ func __atomic_compare_exchange_{{.}}(ptr, expected *uint{{$bits}}, desired uint{ {{- define "swap"}}{{$bits := mul . 8 -}} //go:inline func doAtomicSwap{{$bits}}(ptr *uint{{$bits}}, new uint{{$bits}}) uint{{$bits}} { - mask := interrupt.Disable() + mask := atomicLock() old := *ptr *ptr = new - interrupt.Restore(mask) + atomicUnlock(mask) return old } @@ -111,11 +110,11 @@ func __atomic_exchange_{{.}}(ptr *uint{{$bits}}, new uint{{$bits}}, ordering uin //go:inline func {{$opfn}}(ptr *{{$type}}, value {{$type}}) (old, new {{$type}}) { - mask := interrupt.Disable() + mask := atomicLock() old = *ptr {{$opdef}} *ptr = new - interrupt.Restore(mask) + atomicUnlock(mask) return old, new }