From 21bb0dd6c4d40397e154eebcf1d6cefdcc213fc7 Mon Sep 17 00:00:00 2001 From: rdon Date: Mon, 11 May 2026 21:48:31 +0900 Subject: [PATCH 1/5] usb/cdc: serialize TX pump across cores --- builder/sizes_test.go | 2 +- src/machine/usb/cdc/usbcdc.go | 59 +++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/builder/sizes_test.go b/builder/sizes_test.go index fd985a8977..8a85ee292d 100644 --- a/builder/sizes_test.go +++ b/builder/sizes_test.go @@ -44,7 +44,7 @@ func TestBinarySize(t *testing.T) { // microcontrollers {"hifive1b", "examples/echo", 3680, 280, 0, 2252}, {"microbit", "examples/serial", 2694, 342, 8, 2248}, - {"wioterminal", "examples/pininterrupt", 7074, 1510, 120, 7248}, + {"wioterminal", "examples/pininterrupt", 7184, 1508, 120, 7256}, // TODO: also check wasm. Right now this is difficult, because // wasm binaries are run through wasm-opt and therefore the diff --git a/src/machine/usb/cdc/usbcdc.go b/src/machine/usb/cdc/usbcdc.go index b9e68369a2..f749950f5e 100644 --- a/src/machine/usb/cdc/usbcdc.go +++ b/src/machine/usb/cdc/usbcdc.go @@ -26,11 +26,19 @@ type cdcLineInfo struct { // USBCDC is the USB CDC aka serial over USB interface. type USBCDC struct { - tx ring512 - rx ring512 + tx ring512 + rx ring512 + + // inflight is the number of bytes currently submitted to the USB IN endpoint. inflight atomic.Uint32 - rbuf [1]byte - wbuf [1]byte + + // txActive serializes the USB CDC TX pump between Write and the TX + // completion handler. This matters on multicore targets where they can run + // concurrently. + txActive atomic.Uint32 + + rbuf [1]byte + wbuf [1]byte } var ( @@ -81,7 +89,7 @@ func (usbcdc *USBCDC) Configure(config machine.UARTConfig) error { // Flush flushes buffered data. func (usbcdc *USBCDC) Flush() { - for usbcdc.tx.Used() > 0 { + for usbcdc.tx.Used() > 0 || usbcdc.txActive.Load() != 0 { gosched() } } @@ -105,33 +113,50 @@ func (usbcdc *USBCDC) Write(data []byte) (n int, err error) { return n, nil } -// kickTx starts a transfer if none is in flight. Called from main context only. +// kickTx starts the TX pump if it is idle. func (usbcdc *USBCDC) kickTx() { - if usbcdc.inflight.Load() > 0 { - return // txhandler will chain the next packet. + if !usbcdc.txActive.CompareAndSwap(0, 1) { + return } usbcdc.sendFromRing() } func (usbcdc *USBCDC) txhandler() { inflight := usbcdc.inflight.Load() - usbcdc.inflight.Store(0) + if inflight == 0 { + return + } usbcdc.tx.Discard(inflight) + usbcdc.inflight.Store(0) usbcdc.sendFromRing() } // sendFromRing sends one USB packet from the ring and sets inflight. -// Called from kickTx (main) or txhandler (ISR), but never concurrently -// because kickTx only runs when inflight==0 and txhandler only runs -// when inflight>0. +// +// The caller must own txActive. Ownership starts in kickTx and is kept across +// TX completion interrupts until the TX ring is empty. func (usbcdc *USBCDC) sendFromRing() { - d1, _ := usbcdc.tx.Peek() - if len(d1) == 0 { + for { + d1, _ := usbcdc.tx.Peek() + if len(d1) == 0 { + usbcdc.txActive.Store(0) + + // Avoid a missed wakeup: Write may append data while txActive is + // still set, causing kickTx to return without starting a transfer. + if usbcdc.tx.Used() == 0 { + return + } + if !usbcdc.txActive.CompareAndSwap(0, 1) { + return + } + continue + } + + chunk := d1[:min(usb.EndpointPacketSize, len(d1))] + usbcdc.inflight.Store(uint32(len(chunk))) + machine.SendUSBInPacket(cdcEndpointIn, chunk) return } - chunk := d1[:min(usb.EndpointPacketSize, len(d1))] - usbcdc.inflight.Store(uint32(len(chunk))) - machine.SendUSBInPacket(cdcEndpointIn, chunk) } // WriteByte writes a byte of data to the USB CDC interface. From c690c2cc9eabbb3d5daaaa76e87fd1af9ad33b77 Mon Sep 17 00:00:00 2001 From: rdon Date: Sun, 17 May 2026 23:25:03 +0900 Subject: [PATCH 2/5] usb/cdc: apply review suggestion --- src/machine/usb/cdc/usbcdc.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/machine/usb/cdc/usbcdc.go b/src/machine/usb/cdc/usbcdc.go index f749950f5e..d3718ad913 100644 --- a/src/machine/usb/cdc/usbcdc.go +++ b/src/machine/usb/cdc/usbcdc.go @@ -143,10 +143,10 @@ func (usbcdc *USBCDC) sendFromRing() { // Avoid a missed wakeup: Write may append data while txActive is // still set, causing kickTx to return without starting a transfer. - if usbcdc.tx.Used() == 0 { + switch { + case usbcdc.tx.Used() == 0: return - } - if !usbcdc.txActive.CompareAndSwap(0, 1) { + case !usbcdc.txActive.CompareAndSwap(0, 1): return } continue From f2aae75370591146289109ed4afd8d0158fadef4 Mon Sep 17 00:00:00 2001 From: rdon Date: Fri, 29 May 2026 21:20:16 +0900 Subject: [PATCH 3/5] Clarify TX pump loop comment --- src/machine/usb/cdc/usbcdc.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/machine/usb/cdc/usbcdc.go b/src/machine/usb/cdc/usbcdc.go index d3718ad913..4e7893ce2d 100644 --- a/src/machine/usb/cdc/usbcdc.go +++ b/src/machine/usb/cdc/usbcdc.go @@ -134,8 +134,14 @@ func (usbcdc *USBCDC) txhandler() { // sendFromRing sends one USB packet from the ring and sets inflight. // // The caller must own txActive. Ownership starts in kickTx and is kept across -// TX completion interrupts until the TX ring is empty. +// TX completion handling until the TX ring is empty. func (usbcdc *USBCDC) sendFromRing() { + // This is the main TX pump loop. While txActive is held, each iteration + // peeks the TX ring and sends one packet if data is available. + // + // The empty-ring case below is the shutdown path: release txActive, then + // re-check the ring to avoid missing a Write that appended data while + // txActive was still set. for { d1, _ := usbcdc.tx.Peek() if len(d1) == 0 { @@ -149,6 +155,9 @@ func (usbcdc *USBCDC) sendFromRing() { case !usbcdc.txActive.CompareAndSwap(0, 1): return } + + // New data appeared while txActive was set, and this TX pump owner + // re-acquired ownership. Continue the pump and re-peek the ring. continue } From 93fa56177d791ffa55d236a7a53bc8b558394930 Mon Sep 17 00:00:00 2001 From: rdon Date: Mon, 1 Jun 2026 10:45:46 +0900 Subject: [PATCH 4/5] clarify txActive comments --- src/machine/usb/cdc/usbcdc.go | 44 +++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/src/machine/usb/cdc/usbcdc.go b/src/machine/usb/cdc/usbcdc.go index 4e7893ce2d..520a5d31a6 100644 --- a/src/machine/usb/cdc/usbcdc.go +++ b/src/machine/usb/cdc/usbcdc.go @@ -131,36 +131,50 @@ func (usbcdc *USBCDC) txhandler() { usbcdc.sendFromRing() } -// sendFromRing sends one USB packet from the ring and sets inflight. +// sendFromRing submits one USB IN packet from the TX ring, or shuts down +// the TX pump if the ring is empty. // -// The caller must own txActive. Ownership starts in kickTx and is kept across -// TX completion handling until the TX ring is empty. +// The caller must own txActive (either having just acquired it via CAS, +// or continuing ownership from a previous packet submission). +// For kickTx: newly acquired via CAS. +// For txhandler: inherited from the previous packet submission. +// +// While the caller owns txActive: +// - If data is available, one packet is sent and txActive remains set +// for the in-flight packet (ownership continues to txhandler). +// - If the ring is empty, txActive is cleared and the pump shuts down +// (with a final re-check to avoid a missed wakeup from Write). func (usbcdc *USBCDC) sendFromRing() { - // This is the main TX pump loop. While txActive is held, each iteration - // peeks the TX ring and sends one packet if data is available. + // This loop sends at most one USB IN packet per entry. + // + // If the TX ring has data, one packet is handed to the USB hardware and + // txActive remains set until txhandler continues the pump after + // completion. // - // The empty-ring case below is the shutdown path: release txActive, then - // re-check the ring to avoid missing a Write that appended data while - // txActive was still set. + // If the TX ring appears empty, this is the shutdown path. Clear + // txActive, then re-check the ring to avoid missing a Write that added + // data while txActive was still set. for { d1, _ := usbcdc.tx.Peek() if len(d1) == 0 { usbcdc.txActive.Store(0) - - // Avoid a missed wakeup: Write may append data while txActive is - // still set, causing kickTx to return without starting a transfer. + // Avoid a missed wakeup. + // + // A concurrent Write may have appended data while txActive was + // still set. In that case kickTx would see an active pump and + // return without starting another transfer. After clearing + // txActive, re-check the ring and reclaim txActive if this pump + // must continue. switch { case usbcdc.tx.Used() == 0: return case !usbcdc.txActive.CompareAndSwap(0, 1): return } - - // New data appeared while txActive was set, and this TX pump owner - // re-acquired ownership. Continue the pump and re-peek the ring. + // New data appeared during shutdown, and this caller reclaimed + // txActive. Continue and re-peek the ring. continue } - chunk := d1[:min(usb.EndpointPacketSize, len(d1))] usbcdc.inflight.Store(uint32(len(chunk))) machine.SendUSBInPacket(cdcEndpointIn, chunk) From a7c291acf0bc94533a0dd1c3986c87f6f64778d5 Mon Sep 17 00:00:00 2001 From: rdon Date: Thu, 4 Jun 2026 12:28:40 +0900 Subject: [PATCH 5/5] usb/cdc: document txActive ownership and lost-wakeup recheck --- src/machine/usb/cdc/usbcdc.go | 74 +++++++++++++++-------------------- 1 file changed, 32 insertions(+), 42 deletions(-) diff --git a/src/machine/usb/cdc/usbcdc.go b/src/machine/usb/cdc/usbcdc.go index 520a5d31a6..11b5a8f2e2 100644 --- a/src/machine/usb/cdc/usbcdc.go +++ b/src/machine/usb/cdc/usbcdc.go @@ -32,9 +32,13 @@ type USBCDC struct { // inflight is the number of bytes currently submitted to the USB IN endpoint. inflight atomic.Uint32 - // txActive serializes the USB CDC TX pump between Write and the TX - // completion handler. This matters on multicore targets where they can run - // concurrently. + // txActive is the TX-pump ownership flag: 0 = idle, 1 = a pump owns the TX + // path. Claimed once (kickTx, CAS 0->1), held across every in-flight packet + // and the TX-complete IRQ, and released only when the ring drains. While it + // is set, kickTx's CAS fails and no second pump starts, which serializes the + // pump against Write across cores. Same model as Linux NAPI_STATE_SCHED: + // held across completion, dropped only with a recheck + // (Documentation/networking/napi.rst). txActive atomic.Uint32 rbuf [1]byte @@ -113,7 +117,9 @@ func (usbcdc *USBCDC) Write(data []byte) (n int, err error) { return n, nil } -// kickTx starts the TX pump if it is idle. +// kickTx claims the TX pump for a producer. This CAS is the only start-from-idle +// edge; if it fails, a pump already owns the path and will drain what we just +// enqueued -- see the recheck in sendFromRing. func (usbcdc *USBCDC) kickTx() { if !usbcdc.txActive.CompareAndSwap(0, 1) { return @@ -122,6 +128,9 @@ func (usbcdc *USBCDC) kickTx() { } func (usbcdc *USBCDC) txhandler() { + // TX-complete IRQ. The pump is still owned here (txActive stayed 1 across the + // in-flight packet), so continue WITHOUT re-claiming -- pairs with the CAS in + // kickTx. A CAS here would see the flag already set, bail, and stall the chain. inflight := usbcdc.inflight.Load() if inflight == 0 { return @@ -131,54 +140,35 @@ func (usbcdc *USBCDC) txhandler() { usbcdc.sendFromRing() } -// sendFromRing submits one USB IN packet from the TX ring, or shuts down -// the TX pump if the ring is empty. -// -// The caller must own txActive (either having just acquired it via CAS, -// or continuing ownership from a previous packet submission). -// For kickTx: newly acquired via CAS. -// For txhandler: inherited from the previous packet submission. -// -// While the caller owns txActive: -// - If data is available, one packet is sent and txActive remains set -// for the in-flight packet (ownership continues to txhandler). -// - If the ring is empty, txActive is cleared and the pump shuts down -// (with a final re-check to avoid a missed wakeup from Write). +// sendFromRing runs one step of the TX pump: submit one IN packet, or release the +// pump if the ring is empty. Precondition: txActive == 1 (from kickTx's CAS, or +// still held from the previous packet when entered via txhandler). func (usbcdc *USBCDC) sendFromRing() { - // This loop sends at most one USB IN packet per entry. - // - // If the TX ring has data, one packet is handed to the USB hardware and - // txActive remains set until txhandler continues the pump after - // completion. - // - // If the TX ring appears empty, this is the shutdown path. Clear - // txActive, then re-check the ring to avoid missing a Write that added - // data while txActive was still set. for { d1, _ := usbcdc.tx.Peek() if len(d1) == 0 { + // Release the pump, then re-scan the ring: closes the missed-wakeup + // race where Write Put()s data and kickTx's CAS then fails (txActive + // still set), leaving the data for this pump to drain. The Store(0) + // is ordered before the Used() load -- and, in the producer, Put() + // before its CAS -- by the sequential consistency of Go's atomics, so + // neither side misses the other (assumes the ring's accesses are + // atomic too). cf. napi_complete_done() clearing NAPI_STATE_SCHED + // then rechecking. usbcdc.txActive.Store(0) - // Avoid a missed wakeup. - // - // A concurrent Write may have appended data while txActive was - // still set. In that case kickTx would see an active pump and - // return without starting another transfer. After clearing - // txActive, re-check the ring and reclaim txActive if this pump - // must continue. - switch { - case usbcdc.tx.Used() == 0: - return - case !usbcdc.txActive.CompareAndSwap(0, 1): - return + if usbcdc.tx.Used() == 0 { + return // ring empty and pump released; done } - // New data appeared during shutdown, and this caller reclaimed - // txActive. Continue and re-peek the ring. - continue + if !usbcdc.txActive.CompareAndSwap(0, 1) { + return // another producer re-claimed the pump; let it run + } + continue // re-claimed; re-peek and keep pumping } + chunk := d1[:min(usb.EndpointPacketSize, len(d1))] usbcdc.inflight.Store(uint32(len(chunk))) machine.SendUSBInPacket(cdcEndpointIn, chunk) - return + return // in flight; txActive stays set, txhandler continues } }