@ -3,7 +3,6 @@ package app
import (
import (
"context"
"context"
"fmt"
"fmt"
"strings"
"sync"
"sync"
"time"
"time"
@ -18,6 +17,7 @@ import (
"next.orly.dev/pkg/encoders/kind"
"next.orly.dev/pkg/encoders/kind"
"next.orly.dev/pkg/interfaces/publisher"
"next.orly.dev/pkg/interfaces/publisher"
"next.orly.dev/pkg/interfaces/typer"
"next.orly.dev/pkg/interfaces/typer"
"next.orly.dev/pkg/protocol/publish"
"next.orly.dev/pkg/utils"
"next.orly.dev/pkg/utils"
)
)
@ -33,6 +33,9 @@ type Subscription struct {
// connections.
// connections.
type Map map [ * websocket . Conn ] map [ string ] Subscription
type Map map [ * websocket . Conn ] map [ string ] Subscription
// WriteChanMap maps websocket connections to their write channels
type WriteChanMap map [ * websocket . Conn ] chan <- publish . WriteRequest
type W struct {
type W struct {
* websocket . Conn
* websocket . Conn
@ -69,19 +72,37 @@ type P struct {
Mx sync . RWMutex
Mx sync . RWMutex
// Map is the map of subscribers and subscriptions from the websocket api.
// Map is the map of subscribers and subscriptions from the websocket api.
Map
Map
// WriteChans maps websocket connections to their write channels
WriteChans WriteChanMap
}
}
var _ publisher . I = & P { }
var _ publisher . I = & P { }
func NewPublisher ( c context . Context ) ( publisher * P ) {
func NewPublisher ( c context . Context ) ( publisher * P ) {
return & P {
return & P {
c : c ,
c : c ,
Map : make ( Map ) ,
Map : make ( Map ) ,
WriteChans : make ( WriteChanMap , 100 ) ,
}
}
}
}
func ( p * P ) Type ( ) ( typeName string ) { return Type }
func ( p * P ) Type ( ) ( typeName string ) { return Type }
// SetWriteChan stores the write channel for a websocket connection
func ( p * P ) SetWriteChan ( conn * websocket . Conn , writeChan chan <- publish . WriteRequest ) {
p . Mx . Lock ( )
defer p . Mx . Unlock ( )
p . WriteChans [ conn ] = writeChan
}
// GetWriteChan returns the write channel for a websocket connection
func ( p * P ) GetWriteChan ( conn * websocket . Conn ) ( chan <- publish . WriteRequest , bool ) {
p . Mx . RLock ( )
defer p . Mx . RUnlock ( )
ch , ok := p . WriteChans [ conn ]
return ch , ok
}
// Receive handles incoming messages to manage websocket listener subscriptions
// Receive handles incoming messages to manage websocket listener subscriptions
// and associated filters.
// and associated filters.
//
//
@ -269,61 +290,40 @@ func (p *P) Deliver(ev *event.E) {
log . D . F ( "attempting delivery of event %s (kind=%d, len=%d) to subscription %s @ %s" ,
log . D . F ( "attempting delivery of event %s (kind=%d, len=%d) to subscription %s @ %s" ,
hex . Enc ( ev . ID ) , ev . Kind , len ( msgData ) , d . id , d . sub . remote )
hex . Enc ( ev . ID ) , ev . Kind , len ( msgData ) , d . id , d . sub . remote )
// Use a separate context with timeout for writes to prevent race conditions
// Get write channel for this connection
// where the publisher context gets cancelled while writing events
p . Mx . RLock ( )
deadline := time . Now ( ) . Add ( DefaultWriteTimeout )
writeChan , hasChan := p . GetWriteChan ( d . w )
d . w . SetWriteDeadline ( deadline )
stillSubscribed := p . Map [ d . w ] != nil
p . Mx . RUnlock ( )
deliveryStart := time . Now ( )
if err = d . w . WriteMessage ( websocket . TextMessage , msgData ) ; err != nil {
deliveryDuration := time . Since ( deliveryStart )
// Log detailed failure information
if ! stillSubscribed {
log . E . F ( "subscription delivery FAILED: event=%s to=%s sub=%s duration=%v error=%v" ,
log . D . F ( "skipping delivery to %s - connection no longer subscribed" , d . sub . remote )
hex . Enc ( ev . ID ) , d . sub . remote , d . id , deliveryDuration , err )
// Check for timeout specifically
isTimeout := strings . Contains ( err . Error ( ) , "timeout" ) || strings . Contains ( err . Error ( ) , "deadline exceeded" )
if isTimeout {
log . E . F ( "subscription delivery TIMEOUT: event=%s to=%s after %v (limit=%v)" ,
hex . Enc ( ev . ID ) , d . sub . remote , deliveryDuration , DefaultWriteTimeout )
}
// Only close connection on permanent errors, not transient timeouts
// WebSocket write errors typically indicate connection issues, but we should
// distinguish between timeouts (client might be slow) and connection errors
isConnectionError := strings . Contains ( err . Error ( ) , "use of closed network connection" ) ||
strings . Contains ( err . Error ( ) , "broken pipe" ) ||
strings . Contains ( err . Error ( ) , "connection reset" ) ||
websocket . IsCloseError ( err , websocket . CloseAbnormalClosure ,
websocket . CloseGoingAway ,
websocket . CloseNoStatusReceived )
if isConnectionError {
log . D . F ( "removing failed subscriber connection due to connection error: %s" , d . sub . remote )
p . removeSubscriber ( d . w )
_ = d . w . Close ( )
} else if isTimeout {
// For timeouts, log but don't immediately close - give it another chance
// The read deadline will catch dead connections eventually
log . W . F ( "subscription delivery timeout for %s (client may be slow), skipping event but keeping connection" , d . sub . remote )
} else {
// Unknown error - be conservative and close
log . D . F ( "removing failed subscriber connection due to unknown error: %s" , d . sub . remote )
p . removeSubscriber ( d . w )
_ = d . w . Close ( )
}
continue
continue
}
}
deliveryDuration := time . Since ( deliveryStart )
if ! hasChan {
log . D . F ( "subscription delivery SUCCESS: event=%s to=%s sub=%s duration=%v len=%d" ,
log . D . F ( "skipping delivery to %s - no write channel available" , d . sub . remote )
hex . Enc ( ev . ID ) , d . sub . remote , d . id , deliveryDuration , len ( msgData ) )
continue
}
// Log slow deliveries for performance monitoring
// Send to write channel - non-blocking with timeout
if deliveryDuration > time . Millisecond * 50 {
select {
log . D . F ( "SLOW subscription delivery: event=%s to=%s duration=%v (>50ms)" ,
case <- p . c . Done ( ) :
hex . Enc ( ev . ID ) , d . sub . remote , deliveryDuration )
continue
case writeChan <- publish . WriteRequest { Data : msgData , MsgType : websocket . TextMessage , IsControl : false } :
log . D . F ( "subscription delivery QUEUED: event=%s to=%s sub=%s len=%d" ,
hex . Enc ( ev . ID ) , d . sub . remote , d . id , len ( msgData ) )
case <- time . After ( DefaultWriteTimeout ) :
log . E . F ( "subscription delivery TIMEOUT: event=%s to=%s sub=%s (write channel full)" ,
hex . Enc ( ev . ID ) , d . sub . remote , d . id )
// Check if connection is still valid
p . Mx . RLock ( )
stillSubscribed = p . Map [ d . w ] != nil
p . Mx . RUnlock ( )
if ! stillSubscribed {
log . D . F ( "removing failed subscriber connection due to channel timeout: %s" , d . sub . remote )
p . removeSubscriber ( d . w )
}
}
}
}
}
}
}
@ -340,6 +340,7 @@ func (p *P) removeSubscriberId(ws *websocket.Conn, id string) {
// Check the actual map after deletion, not the original reference
// Check the actual map after deletion, not the original reference
if len ( p . Map [ ws ] ) == 0 {
if len ( p . Map [ ws ] ) == 0 {
delete ( p . Map , ws )
delete ( p . Map , ws )
delete ( p . WriteChans , ws )
}
}
}
}
}
}
@ -350,6 +351,7 @@ func (p *P) removeSubscriber(ws *websocket.Conn) {
defer p . Mx . Unlock ( )
defer p . Mx . Unlock ( )
clear ( p . Map [ ws ] )
clear ( p . Map [ ws ] )
delete ( p . Map , ws )
delete ( p . Map , ws )
delete ( p . WriteChans , ws )
}
}
// canSeePrivateEvent checks if the authenticated user can see an event with a private tag
// canSeePrivateEvent checks if the authenticated user can see an event with a private tag