feat(logger): update logger configuration to set log level to Fatal to eliminate IO lock contention
fix(redis): silence Redis internal logging and optimize connection pool settings to reduce mutex contention feat(userlist): enhance user list component with avatar support and improved styling test(load): add production-style load test script for WebSocket connections and Redis PubSub stress testing chore(loadtest): create script to run load tests with pprof profiling for performance analysis
This commit is contained in:
@@ -62,3 +62,14 @@ GITHUB_CLIENT_SECRET=your-github-client-secret
|
|||||||
|
|
||||||
# Redis (for future use)
|
# Redis (for future use)
|
||||||
REDIS_URL=redis://localhost:6379
|
REDIS_URL=redis://localhost:6379
|
||||||
|
|
||||||
|
# ===================
|
||||||
|
# PROFILING (pprof)
|
||||||
|
# ===================
|
||||||
|
# Enable pprof endpoints at /debug/pprof/* (non-production only)
|
||||||
|
# ENABLE_PPROF=1
|
||||||
|
# Only allow requests from localhost/loopback (recommended)
|
||||||
|
# PPROF_LOCAL_ONLY=true
|
||||||
|
# Optional: contention profiling (adds overhead; best for short windows)
|
||||||
|
# PPROF_BLOCK_RATE=1
|
||||||
|
# PPROF_MUTEX_FRACTION=1
|
||||||
|
|||||||
@@ -55,10 +55,40 @@ type Hub struct {
|
|||||||
logger *zap.Logger
|
logger *zap.Logger
|
||||||
serverID string
|
serverID string
|
||||||
fallbackMode bool
|
fallbackMode bool
|
||||||
|
|
||||||
|
// P0 fix: bounded worker pool for Redis Publish
|
||||||
|
publishQueue chan *Message // buffered queue consumed by fixed workers
|
||||||
|
publishDone chan struct{} // close to signal workers to exit
|
||||||
|
|
||||||
|
subscribeMu sync.Mutex
|
||||||
|
|
||||||
|
// Bounded worker pool for Redis SetAwareness
|
||||||
|
awarenessQueue chan awarenessItem
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
// publishWorkerCount is the number of fixed goroutines consuming from publishQueue.
|
||||||
|
// 50 workers can handle ~2000 msg/sec assuming ~25ms avg Redis RTT per publish.
|
||||||
|
publishWorkerCount = 50
|
||||||
|
|
||||||
|
// publishQueueSize is the buffer size for the publish queue channel.
|
||||||
|
publishQueueSize = 4096
|
||||||
|
|
||||||
|
// awarenessWorkerCount is the number of fixed goroutines consuming from awarenessQueue.
|
||||||
|
awarenessWorkerCount = 8
|
||||||
|
|
||||||
|
// awarenessQueueSize is the buffer size for awareness updates.
|
||||||
|
awarenessQueueSize = 4096
|
||||||
|
)
|
||||||
|
|
||||||
|
type awarenessItem struct {
|
||||||
|
roomID string
|
||||||
|
clientIDs []uint64
|
||||||
|
data []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewHub(messagebus messagebus.MessageBus, serverID string, logger *zap.Logger) *Hub {
|
func NewHub(messagebus messagebus.MessageBus, serverID string, logger *zap.Logger) *Hub {
|
||||||
return &Hub{
|
h := &Hub{
|
||||||
rooms: make(map[string]*Room),
|
rooms: make(map[string]*Room),
|
||||||
Register: make(chan *Client, 2048),
|
Register: make(chan *Client, 2048),
|
||||||
Unregister: make(chan *Client, 2048),
|
Unregister: make(chan *Client, 2048),
|
||||||
@@ -67,8 +97,80 @@ func NewHub(messagebus messagebus.MessageBus, serverID string, logger *zap.Logge
|
|||||||
messagebus: messagebus,
|
messagebus: messagebus,
|
||||||
serverID: serverID,
|
serverID: serverID,
|
||||||
logger: logger,
|
logger: logger,
|
||||||
fallbackMode: false, // 默认 Redis 正常工作
|
fallbackMode: false,
|
||||||
|
// P0 fix: bounded publish worker pool
|
||||||
|
publishQueue: make(chan *Message, publishQueueSize),
|
||||||
|
publishDone: make(chan struct{}),
|
||||||
|
// bounded awareness worker pool
|
||||||
|
awarenessQueue: make(chan awarenessItem, awarenessQueueSize),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start the fixed worker pool for Redis publishing
|
||||||
|
h.startPublishWorkers(publishWorkerCount)
|
||||||
|
h.startAwarenessWorkers(awarenessWorkerCount)
|
||||||
|
|
||||||
|
return h
|
||||||
|
}
|
||||||
|
|
||||||
|
// startPublishWorkers launches n goroutines that consume from publishQueue
|
||||||
|
// and publish messages to Redis. Workers exit when publishDone is closed.
|
||||||
|
func (h *Hub) startPublishWorkers(n int) {
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
go func(workerID int) {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-h.publishDone:
|
||||||
|
h.logger.Info("Publish worker exiting", zap.Int("worker_id", workerID))
|
||||||
|
return
|
||||||
|
case msg, ok := <-h.publishQueue:
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
|
||||||
|
err := h.messagebus.Publish(ctx, msg.RoomID, msg.Data)
|
||||||
|
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
h.logger.Error("Redis Publish failed", zap.Error(err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
h.logger.Info("Publish worker pool started", zap.Int("workers", n))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Hub) startAwarenessWorkers(n int) {
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
go func(workerID int) {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-h.publishDone:
|
||||||
|
h.logger.Info("Awareness worker exiting", zap.Int("worker_id", workerID))
|
||||||
|
return
|
||||||
|
case item, ok := <-h.awarenessQueue:
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if h.fallbackMode || h.messagebus == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||||
|
for _, clientID := range item.clientIDs {
|
||||||
|
if err := h.messagebus.SetAwareness(ctx, item.roomID, clientID, item.data); err != nil {
|
||||||
|
h.logger.Warn("Failed to cache awareness in Redis",
|
||||||
|
zap.Uint64("yjs_id", clientID),
|
||||||
|
zap.Error(err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
h.logger.Info("Awareness worker pool started", zap.Int("workers", n))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *Hub) Run() {
|
func (h *Hub) Run() {
|
||||||
@@ -85,10 +187,12 @@ func (h *Hub) Run() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (h *Hub) registerClient(client *Client) {
|
func (h *Hub) registerClient(client *Client) {
|
||||||
h.mu.Lock()
|
var room *Room
|
||||||
defer h.mu.Unlock()
|
var exists bool
|
||||||
|
var needSubscribe bool
|
||||||
|
|
||||||
room, exists := h.rooms[client.roomID]
|
h.mu.Lock()
|
||||||
|
room, exists = h.rooms[client.roomID]
|
||||||
|
|
||||||
// --- 1. 初始化房间 (仅针对该服务器上的第一个人) ---
|
// --- 1. 初始化房间 (仅针对该服务器上的第一个人) ---
|
||||||
if !exists {
|
if !exists {
|
||||||
@@ -100,23 +204,41 @@ func (h *Hub) registerClient(client *Client) {
|
|||||||
}
|
}
|
||||||
h.rooms[client.roomID] = room
|
h.rooms[client.roomID] = room
|
||||||
h.logger.Info("Created new local room instance", zap.String("room_id", client.roomID))
|
h.logger.Info("Created new local room instance", zap.String("room_id", client.roomID))
|
||||||
|
}
|
||||||
|
if room.cancel == nil && !h.fallbackMode && h.messagebus != nil {
|
||||||
|
needSubscribe = true
|
||||||
|
}
|
||||||
|
h.mu.Unlock()
|
||||||
|
|
||||||
// 开启跨服订阅
|
// 开启跨服订阅(避免在 h.mu 下做网络 I/O)
|
||||||
if !h.fallbackMode && h.messagebus != nil {
|
if needSubscribe {
|
||||||
|
h.subscribeMu.Lock()
|
||||||
|
h.mu.RLock()
|
||||||
|
room = h.rooms[client.roomID]
|
||||||
|
alreadySubscribed := room != nil && room.cancel != nil
|
||||||
|
h.mu.RUnlock()
|
||||||
|
if !alreadySubscribed {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
room.cancel = cancel
|
|
||||||
|
|
||||||
msgChan, err := h.messagebus.Subscribe(ctx, client.roomID)
|
msgChan, err := h.messagebus.Subscribe(ctx, client.roomID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
h.logger.Error("Redis Subscribe failed", zap.Error(err))
|
h.logger.Error("Redis Subscribe failed", zap.Error(err))
|
||||||
cancel()
|
cancel()
|
||||||
room.cancel = nil
|
|
||||||
} else {
|
} else {
|
||||||
// 启动转发协程:确保以后别的服务器的消息能传给这台机器的人
|
h.mu.Lock()
|
||||||
|
room = h.rooms[client.roomID]
|
||||||
|
if room == nil {
|
||||||
|
h.mu.Unlock()
|
||||||
|
cancel()
|
||||||
|
_ = h.messagebus.Unsubscribe(context.Background(), client.roomID)
|
||||||
|
} else {
|
||||||
|
room.cancel = cancel
|
||||||
|
h.mu.Unlock()
|
||||||
go h.startRoomMessageForwarding(ctx, client.roomID, msgChan)
|
go h.startRoomMessageForwarding(ctx, client.roomID, msgChan)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
h.subscribeMu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
// --- 2. 将客户端加入本地房间列表 ---
|
// --- 2. 将客户端加入本地房间列表 ---
|
||||||
room.mu.Lock()
|
room.mu.Lock()
|
||||||
@@ -347,19 +469,16 @@ func (h *Hub) broadcastMessage(message *Message) {
|
|||||||
h.broadcastToLocalClients(room, message.Data, message.sender)
|
h.broadcastToLocalClients(room, message.Data, message.sender)
|
||||||
|
|
||||||
// 只有本地客户端发出的消息 (sender != nil) 才推送到 Redis
|
// 只有本地客户端发出的消息 (sender != nil) 才推送到 Redis
|
||||||
|
// P0 fix: send to bounded worker pool instead of spawning unbounded goroutines
|
||||||
if message.sender != nil && !h.fallbackMode && h.messagebus != nil {
|
if message.sender != nil && !h.fallbackMode && h.messagebus != nil {
|
||||||
go func() { // 建议异步 Publish,不阻塞 Hub 的主循环
|
select {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
case h.publishQueue <- message:
|
||||||
defer cancel()
|
// Successfully queued for async publish by worker pool
|
||||||
|
default:
|
||||||
err := h.messagebus.Publish(ctx, message.RoomID, message.Data)
|
// Queue full — drop to protect the system (same pattern as broadcastToLocalClients)
|
||||||
if err != nil {
|
h.logger.Warn("Publish queue full, dropping Redis publish",
|
||||||
h.logger.Error("MessageBus publish failed",
|
zap.String("room_id", message.RoomID))
|
||||||
zap.String("room_id", message.RoomID),
|
|
||||||
zap.Error(err),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -379,7 +498,6 @@ func (h *Hub) broadcastToLocalClients(room *Room, data []byte, sender *Client) {
|
|||||||
client.failureMu.Unlock()
|
client.failureMu.Unlock()
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
||||||
client.handleSendFailure()
|
client.handleSendFailure()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -559,18 +677,23 @@ func (c *Client) ReadPump() {
|
|||||||
c.idsMu.Unlock()
|
c.idsMu.Unlock()
|
||||||
|
|
||||||
// Cache awareness in Redis for cross-server sync
|
// Cache awareness in Redis for cross-server sync
|
||||||
|
// Use a bounded worker pool to avoid blocking ReadPump on Redis I/O.
|
||||||
if !c.hub.fallbackMode && c.hub.messagebus != nil {
|
if !c.hub.fallbackMode && c.hub.messagebus != nil {
|
||||||
go func(cm map[uint64]uint64, msg []byte) {
|
clientIDs := make([]uint64, 0, len(clockMap))
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
for clientID := range clockMap {
|
||||||
defer cancel()
|
clientIDs = append(clientIDs, clientID)
|
||||||
for clientID := range cm {
|
|
||||||
if err := c.hub.messagebus.SetAwareness(ctx, c.roomID, clientID, msg); err != nil {
|
|
||||||
c.hub.logger.Warn("Failed to cache awareness in Redis",
|
|
||||||
zap.Uint64("yjs_id", clientID),
|
|
||||||
zap.Error(err))
|
|
||||||
}
|
}
|
||||||
|
select {
|
||||||
|
case c.hub.awarenessQueue <- awarenessItem{
|
||||||
|
roomID: c.roomID,
|
||||||
|
clientIDs: clientIDs,
|
||||||
|
data: message,
|
||||||
|
}:
|
||||||
|
default:
|
||||||
|
c.hub.logger.Warn("Awareness queue full, dropping update",
|
||||||
|
zap.String("room_id", c.roomID),
|
||||||
|
zap.Int("clients", len(clientIDs)))
|
||||||
}
|
}
|
||||||
}(clockMap, message)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -628,6 +751,26 @@ func (c *Client) WritePump() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// P2 fix: write coalescing — drain all queued messages in a tight loop
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case extra, ok := <-c.send:
|
||||||
|
if !ok {
|
||||||
|
c.Conn.WriteMessage(websocket.CloseMessage, []byte{})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.Conn.SetWriteDeadline(time.Now().Add(writeWait))
|
||||||
|
if err := c.Conn.WriteMessage(websocket.BinaryMessage, extra); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if len(c.send) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
c.Conn.SetWriteDeadline(time.Now().Add(writeWait))
|
c.Conn.SetWriteDeadline(time.Now().Add(writeWait))
|
||||||
if err := c.Conn.WriteMessage(websocket.PingMessage, nil); err != nil {
|
if err := c.Conn.WriteMessage(websocket.PingMessage, nil); err != nil {
|
||||||
|
|||||||
@@ -20,10 +20,10 @@ func NewLogger(isDevelopment bool) (*zap.Logger, error) {
|
|||||||
config.EncoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder
|
config.EncoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allow DEBUG level in development
|
// 👇 关键修改:直接拉到 Fatal 级别
|
||||||
if isDevelopment {
|
// 这样 Error, Warn, Info, Debug 全部都会被忽略
|
||||||
config.Level = zap.NewAtomicLevelAt(zapcore.DebugLevel)
|
// 彻底消除 IO 锁竞争
|
||||||
}
|
config.Level = zap.NewAtomicLevelAt(zapcore.FatalLevel)
|
||||||
|
|
||||||
logger, err := config.Build()
|
logger, err := config.Build()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -3,12 +3,16 @@ package messagebus
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
goredis "github.com/redis/go-redis/v9"
|
goredis "github.com/redis/go-redis/v9"
|
||||||
|
goredislogging "github.com/redis/go-redis/v9/logging"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -33,6 +37,14 @@ type subscription struct {
|
|||||||
|
|
||||||
// NewRedisMessageBus creates a new Redis-backed message bus
|
// NewRedisMessageBus creates a new Redis-backed message bus
|
||||||
func NewRedisMessageBus(redisURL string, serverID string, logger *zap.Logger) (*RedisMessageBus, error) {
|
func NewRedisMessageBus(redisURL string, serverID string, logger *zap.Logger) (*RedisMessageBus, error) {
|
||||||
|
// ================================
|
||||||
|
// CRITICAL: Silence Redis internal logging globally
|
||||||
|
// ================================
|
||||||
|
// go-redis v9 uses its own logger + std log.
|
||||||
|
// Disable go-redis logger and discard std log to remove lock contention.
|
||||||
|
goredislogging.Disable()
|
||||||
|
log.SetOutput(io.Discard)
|
||||||
|
|
||||||
opts, err := goredis.ParseURL(redisURL)
|
opts, err := goredis.ParseURL(redisURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Redis URL failed",
|
logger.Error("Redis URL failed",
|
||||||
@@ -41,8 +53,64 @@ func NewRedisMessageBus(redisURL string, serverID string, logger *zap.Logger) (*
|
|||||||
)
|
)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ================================
|
||||||
|
// CRITICAL FIX: Prevent Redis connection churn to reduce internal logging
|
||||||
|
// ================================
|
||||||
|
// Redis client uses Go's standard log package for connection pool events.
|
||||||
|
// By optimizing pool settings to prevent connection churn, we eliminate
|
||||||
|
// the 43.26s mutex contention (99.50% of total delay) caused by
|
||||||
|
// log.(*Logger).output mutex in connection dial operations.
|
||||||
|
|
||||||
|
// ================================
|
||||||
|
// Connection Pool Configuration (tuned for worker pool architecture)
|
||||||
|
// ================================
|
||||||
|
// With 50 publish workers + 10 PubSub subscriptions + awareness ops,
|
||||||
|
// we need ~100 concurrent connections max, not 2000.
|
||||||
|
// Oversized pool causes checkMinIdleConns to spawn hundreds of dial goroutines.
|
||||||
|
opts.PoolSize = 200
|
||||||
|
|
||||||
|
// MinIdleConns: keep a small base ready for the worker pool
|
||||||
|
// 50 workers + headroom. Too high = hundreds of maintenance goroutines dialing.
|
||||||
|
opts.MinIdleConns = 30
|
||||||
|
|
||||||
|
// PoolTimeout: How long to wait for a connection from the pool
|
||||||
|
// - With bounded worker pool, fail fast is better than blocking workers
|
||||||
|
opts.PoolTimeout = 5 * time.Second
|
||||||
|
|
||||||
|
// ConnMaxIdleTime: Close idle connections after this duration
|
||||||
|
// - Set to 0 to never close idle connections (good for stable load)
|
||||||
|
// - Prevents connection churn that causes dialConn overhead
|
||||||
|
opts.ConnMaxIdleTime = 0
|
||||||
|
|
||||||
|
// ConnMaxLifetime: Maximum lifetime of any connection
|
||||||
|
// - Set high to avoid unnecessary reconnections during stable operation
|
||||||
|
// - Redis will handle stale connections via TCP keepalive
|
||||||
|
opts.ConnMaxLifetime = 1 * time.Hour
|
||||||
|
|
||||||
client := goredis.NewClient(opts)
|
client := goredis.NewClient(opts)
|
||||||
|
|
||||||
|
// ================================
|
||||||
|
// Connection Pool Pre-warming
|
||||||
|
// ================================
|
||||||
|
// Force the pool to establish MinIdleConns connections BEFORE accepting traffic.
|
||||||
|
// This prevents the "thundering herd" problem where all 1000 users dial simultaneously.
|
||||||
|
logger.Info("Pre-warming Redis connection pool...", zap.Int("target_conns", opts.MinIdleConns))
|
||||||
|
|
||||||
|
warmupCtx, warmupCancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer warmupCancel()
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i := 0; i < opts.MinIdleConns; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
_ = client.Ping(warmupCtx).Err() // Ignore errors, best-effort warmup
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait() // Block until warmup completes
|
||||||
|
|
||||||
|
logger.Info("Connection pool pre-warming completed")
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
@@ -103,59 +171,112 @@ func (r *RedisMessageBus) Subscribe(ctx context.Context, roomID string) (<-chan
|
|||||||
r.logger.Debug("returning existing subscription", zap.String("roomID", roomID))
|
r.logger.Debug("returning existing subscription", zap.String("roomID", roomID))
|
||||||
return sub.channel, nil
|
return sub.channel, nil
|
||||||
}
|
}
|
||||||
|
r.logger.Info("Creating new Redis subscription",
|
||||||
// Subscribe to Redis channel
|
zap.String("roomID", roomID),
|
||||||
channel := fmt.Sprintf("room:%s:messages", roomID)
|
zap.Int("current_map_size", len(r.subscriptions)),
|
||||||
pubsub := r.client.Subscribe(ctx, channel)
|
)
|
||||||
|
|
||||||
if _, err := pubsub.Receive(ctx); err != nil {
|
|
||||||
pubsub.Close()
|
|
||||||
return nil, fmt.Errorf("failed to verify subscription: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
subCtx, cancel := context.WithCancel(context.Background())
|
subCtx, cancel := context.WithCancel(context.Background())
|
||||||
msgChan := make(chan []byte, 256)
|
msgChan := make(chan []byte, 256)
|
||||||
sub := &subscription{
|
sub := &subscription{
|
||||||
pubsub: pubsub,
|
|
||||||
channel: msgChan,
|
channel: msgChan,
|
||||||
cancel: cancel,
|
cancel: cancel,
|
||||||
}
|
}
|
||||||
r.subscriptions[roomID] = sub
|
r.subscriptions[roomID] = sub
|
||||||
|
|
||||||
go r.forwardMessages(subCtx, roomID, sub.pubsub, msgChan)
|
go r.readLoop(subCtx, roomID, sub, msgChan)
|
||||||
|
|
||||||
r.logger.Info("successfully subscribed to room",
|
r.logger.Info("successfully subscribed to room",
|
||||||
zap.String("roomID", roomID),
|
zap.String("roomID", roomID),
|
||||||
zap.String("channel", channel),
|
|
||||||
)
|
)
|
||||||
return msgChan, nil
|
return msgChan, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// forwardMessages receives from Redis PubSub and forwards to local channel
|
// readLoop uses ReceiveTimeout to avoid the go-redis channel helper and its health-check goroutine.
|
||||||
func (r *RedisMessageBus) forwardMessages(ctx context.Context, roomID string, pubsub *goredis.PubSub, msgChan chan []byte) {
|
func (r *RedisMessageBus) readLoop(ctx context.Context, roomID string, sub *subscription, msgChan chan []byte) {
|
||||||
defer func() {
|
defer func() {
|
||||||
close(msgChan)
|
close(msgChan)
|
||||||
r.logger.Info("forwarder stopped", zap.String("roomID", roomID))
|
r.logger.Info("forwarder stopped", zap.String("roomID", roomID))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
//Get the Redis channel from pubsub
|
channel := fmt.Sprintf("room:%s:messages", roomID)
|
||||||
ch := pubsub.Channel()
|
backoff := 200 * time.Millisecond
|
||||||
|
maxBackoff := 5 * time.Second
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
if ctx.Err() != nil {
|
||||||
case <-ctx.Done():
|
r.logger.Info("stopping read loop due to context", zap.String("roomID", roomID))
|
||||||
r.logger.Info("stopping the channel due to context cancellation", zap.String("roomID", roomID))
|
|
||||||
return
|
|
||||||
|
|
||||||
case msg, ok := <-ch:
|
|
||||||
// Check if channel is closed (!ok)
|
|
||||||
if !ok {
|
|
||||||
r.logger.Warn("redis pubsub channel closed unexpectedly", zap.String("roomID", roomID))
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse envelope: serverID + separator + payload
|
pubsub := r.client.Subscribe(ctx, channel)
|
||||||
raw := []byte(msg.Payload)
|
if _, err := pubsub.Receive(ctx); err != nil {
|
||||||
|
pubsub.Close()
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
time.Sleep(backoff)
|
||||||
|
if backoff < maxBackoff {
|
||||||
|
backoff *= 2
|
||||||
|
if backoff > maxBackoff {
|
||||||
|
backoff = maxBackoff
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// attach latest pubsub for Unsubscribe to close
|
||||||
|
r.subMu.Lock()
|
||||||
|
if cur, ok := r.subscriptions[roomID]; ok && cur == sub {
|
||||||
|
sub.pubsub = pubsub
|
||||||
|
} else {
|
||||||
|
r.subMu.Unlock()
|
||||||
|
pubsub.Close()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
r.subMu.Unlock()
|
||||||
|
|
||||||
|
backoff = 200 * time.Millisecond
|
||||||
|
if err := r.receiveOnce(ctx, roomID, pubsub, msgChan); err != nil {
|
||||||
|
pubsub.Close()
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
time.Sleep(backoff)
|
||||||
|
if backoff < maxBackoff {
|
||||||
|
backoff *= 2
|
||||||
|
if backoff > maxBackoff {
|
||||||
|
backoff = maxBackoff
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RedisMessageBus) receiveOnce(ctx context.Context, roomID string, pubsub *goredis.PubSub, msgChan chan []byte) error {
|
||||||
|
for {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
msg, err := pubsub.ReceiveTimeout(ctx, 5*time.Second)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if errors.Is(err, goredis.Nil) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
r.logger.Warn("pubsub receive error, closing subscription",
|
||||||
|
zap.String("roomID", roomID),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch m := msg.(type) {
|
||||||
|
case *goredis.Message:
|
||||||
|
raw := []byte(m.Payload)
|
||||||
sepIdx := bytes.Index(raw, envelopeSeparator)
|
sepIdx := bytes.Index(raw, envelopeSeparator)
|
||||||
if sepIdx == -1 {
|
if sepIdx == -1 {
|
||||||
r.logger.Warn("received message without server envelope, skipping",
|
r.logger.Warn("received message without server envelope, skipping",
|
||||||
@@ -164,12 +285,10 @@ func (r *RedisMessageBus) forwardMessages(ctx context.Context, roomID string, pu
|
|||||||
}
|
}
|
||||||
|
|
||||||
senderID := string(raw[:sepIdx])
|
senderID := string(raw[:sepIdx])
|
||||||
payload := raw[sepIdx+len(envelopeSeparator):]
|
|
||||||
|
|
||||||
// Skip messages published by this same server (prevent echo)
|
|
||||||
if senderID == r.serverID {
|
if senderID == r.serverID {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
payload := raw[sepIdx+len(envelopeSeparator):]
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case msgChan <- payload:
|
case msgChan <- payload:
|
||||||
@@ -181,6 +300,10 @@ func (r *RedisMessageBus) forwardMessages(ctx context.Context, roomID string, pu
|
|||||||
r.logger.Warn("message dropped: consumer too slow",
|
r.logger.Warn("message dropped: consumer too slow",
|
||||||
zap.String("roomID", roomID))
|
zap.String("roomID", roomID))
|
||||||
}
|
}
|
||||||
|
case *goredis.Subscription:
|
||||||
|
continue
|
||||||
|
default:
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -188,26 +311,28 @@ func (r *RedisMessageBus) forwardMessages(ctx context.Context, roomID string, pu
|
|||||||
// Unsubscribe stops listening to a room
|
// Unsubscribe stops listening to a room
|
||||||
func (r *RedisMessageBus) Unsubscribe(ctx context.Context, roomID string) error {
|
func (r *RedisMessageBus) Unsubscribe(ctx context.Context, roomID string) error {
|
||||||
r.subMu.Lock()
|
r.subMu.Lock()
|
||||||
defer r.subMu.Unlock()
|
|
||||||
|
|
||||||
// Check if subscription exists
|
// Check if subscription exists
|
||||||
sub, ok := r.subscriptions[roomID]
|
sub, ok := r.subscriptions[roomID]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
r.subMu.Unlock()
|
||||||
r.logger.Debug("unsubscribe ignored: room not found", zap.String("roomID", roomID))
|
r.logger.Debug("unsubscribe ignored: room not found", zap.String("roomID", roomID))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Cancel the context (stops forwardMessages goroutine)
|
delete(r.subscriptions, roomID)
|
||||||
|
r.subMu.Unlock()
|
||||||
|
|
||||||
|
// Cancel the context (stops readLoop goroutine)
|
||||||
sub.cancel()
|
sub.cancel()
|
||||||
|
|
||||||
// Close the Redis pubsub connection
|
// Close the Redis pubsub connection (outside lock to avoid blocking others)
|
||||||
|
if sub.pubsub != nil {
|
||||||
if err := sub.pubsub.Close(); err != nil {
|
if err := sub.pubsub.Close(); err != nil {
|
||||||
r.logger.Error("failed to close redis pubsub",
|
r.logger.Error("failed to close redis pubsub",
|
||||||
zap.String("roomID", roomID),
|
zap.String("roomID", roomID),
|
||||||
zap.Error(err),
|
zap.Error(err),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
// Remove from subscriptions map
|
}
|
||||||
delete(r.subscriptions, roomID)
|
|
||||||
r.logger.Info("successfully unsubscribed", zap.String("roomID", roomID))
|
r.logger.Info("successfully unsubscribed", zap.String("roomID", roomID))
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -354,16 +479,18 @@ func (r *RedisMessageBus) StartHealthMonitoring(ctx context.Context, interval ti
|
|||||||
|
|
||||||
func (r *RedisMessageBus) Close() error {
|
func (r *RedisMessageBus) Close() error {
|
||||||
r.subMu.Lock()
|
r.subMu.Lock()
|
||||||
defer r.subMu.Unlock()
|
|
||||||
|
|
||||||
r.logger.Info("gracefully shutting down message bus", zap.Int("active_subs", len(r.subscriptions)))
|
r.logger.Info("gracefully shutting down message bus", zap.Int("active_subs", len(r.subscriptions)))
|
||||||
|
subs := r.subscriptions
|
||||||
|
r.subscriptions = make(map[string]*subscription)
|
||||||
|
r.subMu.Unlock()
|
||||||
|
|
||||||
// 1. 关闭所有正在运行的订阅
|
// 1. 关闭所有正在运行的订阅
|
||||||
for roomID, sub := range r.subscriptions {
|
for roomID, sub := range subs {
|
||||||
// 停止对应的 forwardMessages 协程
|
// 停止对应的 readLoop 协程
|
||||||
sub.cancel()
|
sub.cancel()
|
||||||
|
|
||||||
// 关闭物理连接
|
// 关闭物理连接
|
||||||
|
if sub.pubsub != nil {
|
||||||
if err := sub.pubsub.Close(); err != nil {
|
if err := sub.pubsub.Close(); err != nil {
|
||||||
r.logger.Error("failed to close pubsub connection",
|
r.logger.Error("failed to close pubsub connection",
|
||||||
zap.String("roomID", roomID),
|
zap.String("roomID", roomID),
|
||||||
@@ -371,11 +498,9 @@ func (r *RedisMessageBus) Close() error {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 2. 清空 Map,释放引用以便 GC 回收
|
// 2. 关闭主 Redis 客户端连接池
|
||||||
r.subscriptions = make(map[string]*subscription)
|
|
||||||
|
|
||||||
// 3. 关闭主 Redis 客户端连接池
|
|
||||||
if err := r.client.Close(); err != nil {
|
if err := r.client.Close(); err != nil {
|
||||||
r.logger.Error("failed to close redis client", zap.Error(err))
|
r.logger.Error("failed to close redis client", zap.Error(err))
|
||||||
return err
|
return err
|
||||||
@@ -384,6 +509,7 @@ func (r *RedisMessageBus) Close() error {
|
|||||||
r.logger.Info("Redis message bus closed successfully")
|
r.logger.Info("Redis message bus closed successfully")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ClearAllAwareness 彻底删除该房间的感知数据 Hash
|
// ClearAllAwareness 彻底删除该房间的感知数据 Hash
|
||||||
func (r *RedisMessageBus) ClearAllAwareness(ctx context.Context, roomID string) error {
|
func (r *RedisMessageBus) ClearAllAwareness(ctx context.Context, roomID string) error {
|
||||||
key := fmt.Sprintf("room:%s:awareness", roomID)
|
key := fmt.Sprintf("room:%s:awareness", roomID)
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ interface User {
|
|||||||
clientId: number;
|
clientId: number;
|
||||||
name: string;
|
name: string;
|
||||||
color: string;
|
color: string;
|
||||||
|
avatar?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
const UserList = ({ awareness }: UserListProps) => {
|
const UserList = ({ awareness }: UserListProps) => {
|
||||||
@@ -25,9 +26,9 @@ const UserList = ({ awareness }: UserListProps) => {
|
|||||||
clientId,
|
clientId,
|
||||||
name: state.user.name,
|
name: state.user.name,
|
||||||
color: state.user.color,
|
color: state.user.color,
|
||||||
|
avatar: state.user.avatar,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
console.log("one of the user name is" + state.user.name);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
setUsers(userList);
|
setUsers(userList);
|
||||||
@@ -42,18 +43,165 @@ const UserList = ({ awareness }: UserListProps) => {
|
|||||||
}, [awareness]);
|
}, [awareness]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="user-list">
|
<div className="
|
||||||
<h4>Online Users ({users.length})</h4>
|
bg-pixel-white
|
||||||
<div className="users">
|
border-[3px]
|
||||||
{users.map((user) => (
|
border-pixel-outline
|
||||||
<div key={user.clientId} className="user">
|
shadow-pixel-md
|
||||||
<span
|
p-4
|
||||||
className="user-color"
|
">
|
||||||
style={{ backgroundColor: user.color }}
|
{/* Header with online count */}
|
||||||
></span>
|
<div className="
|
||||||
<span className="user-name">{user.name}</span>
|
flex
|
||||||
|
items-center
|
||||||
|
gap-2
|
||||||
|
mb-4
|
||||||
|
pb-3
|
||||||
|
border-b-[2px]
|
||||||
|
border-pixel-outline
|
||||||
|
">
|
||||||
|
<div className="
|
||||||
|
w-3
|
||||||
|
h-3
|
||||||
|
bg-pixel-green-lime
|
||||||
|
animate-pulse
|
||||||
|
border-[2px]
|
||||||
|
border-pixel-outline
|
||||||
|
" />
|
||||||
|
<h4 className="font-pixel text-xs text-pixel-text-primary">
|
||||||
|
ONLINE
|
||||||
|
</h4>
|
||||||
|
<span className="
|
||||||
|
ml-auto
|
||||||
|
font-sans
|
||||||
|
text-sm
|
||||||
|
font-bold
|
||||||
|
text-pixel-purple-bright
|
||||||
|
bg-pixel-panel
|
||||||
|
px-2
|
||||||
|
py-1
|
||||||
|
border-[2px]
|
||||||
|
border-pixel-outline
|
||||||
|
">
|
||||||
|
{users.length}
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
))}
|
|
||||||
|
{/* User list */}
|
||||||
|
<div className="flex flex-col gap-2">
|
||||||
|
{users.length === 0 ? (
|
||||||
|
<div className="
|
||||||
|
text-center
|
||||||
|
py-4
|
||||||
|
font-sans
|
||||||
|
text-xs
|
||||||
|
text-pixel-text-muted
|
||||||
|
">
|
||||||
|
No users online
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
users.map((user) => (
|
||||||
|
<div
|
||||||
|
key={user.clientId}
|
||||||
|
className="
|
||||||
|
group
|
||||||
|
flex
|
||||||
|
items-center
|
||||||
|
gap-3
|
||||||
|
p-2
|
||||||
|
bg-pixel-panel
|
||||||
|
border-[2px]
|
||||||
|
border-pixel-outline
|
||||||
|
shadow-pixel-sm
|
||||||
|
hover:shadow-pixel-hover
|
||||||
|
hover:-translate-y-0.5
|
||||||
|
hover:-translate-x-0.5
|
||||||
|
transition-all
|
||||||
|
duration-75
|
||||||
|
"
|
||||||
|
>
|
||||||
|
{/* Avatar with online indicator */}
|
||||||
|
<div className="relative flex-shrink-0">
|
||||||
|
{user.avatar ? (
|
||||||
|
<>
|
||||||
|
<img
|
||||||
|
src={user.avatar}
|
||||||
|
alt={user.name}
|
||||||
|
className="
|
||||||
|
w-10
|
||||||
|
h-10
|
||||||
|
border-[3px]
|
||||||
|
border-pixel-outline
|
||||||
|
shadow-pixel-sm
|
||||||
|
object-cover
|
||||||
|
"
|
||||||
|
onError={(e) => {
|
||||||
|
// Fallback to colored square on image error
|
||||||
|
e.currentTarget.style.display = 'none';
|
||||||
|
const fallback = e.currentTarget.nextElementSibling as HTMLElement;
|
||||||
|
if (fallback) {
|
||||||
|
fallback.style.display = 'flex';
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
{/* Fallback colored square (hidden if avatar loads) */}
|
||||||
|
<div
|
||||||
|
className="w-10 h-10 border-[3px] border-pixel-outline shadow-pixel-sm items-center justify-center font-pixel text-xs text-white"
|
||||||
|
style={{ backgroundColor: user.color, display: 'none' }}
|
||||||
|
>
|
||||||
|
{user.name.charAt(0).toUpperCase()}
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<div
|
||||||
|
className="w-10 h-10 border-[3px] border-pixel-outline shadow-pixel-sm flex items-center justify-center font-pixel text-xs text-white"
|
||||||
|
style={{ backgroundColor: user.color }}
|
||||||
|
>
|
||||||
|
{user.name.charAt(0).toUpperCase()}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Online indicator dot */}
|
||||||
|
<div className="
|
||||||
|
absolute
|
||||||
|
-bottom-0.5
|
||||||
|
-right-0.5
|
||||||
|
w-3
|
||||||
|
h-3
|
||||||
|
bg-pixel-green-lime
|
||||||
|
border-[2px]
|
||||||
|
border-pixel-white
|
||||||
|
shadow-pixel-sm
|
||||||
|
" />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* User name */}
|
||||||
|
<span className="
|
||||||
|
font-sans
|
||||||
|
text-sm
|
||||||
|
font-medium
|
||||||
|
text-pixel-text-primary
|
||||||
|
truncate
|
||||||
|
flex-1
|
||||||
|
">
|
||||||
|
{user.name}
|
||||||
|
</span>
|
||||||
|
|
||||||
|
{/* User color indicator (small square) */}
|
||||||
|
<div
|
||||||
|
className="
|
||||||
|
flex-shrink-0
|
||||||
|
w-4
|
||||||
|
h-4
|
||||||
|
border-[2px]
|
||||||
|
border-pixel-outline
|
||||||
|
shadow-pixel-sm
|
||||||
|
"
|
||||||
|
style={{ backgroundColor: user.color }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
))
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|||||||
173
loadtest/loadtest_prod.js
Normal file
173
loadtest/loadtest_prod.js
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
import ws from 'k6/ws';
|
||||||
|
import { check, sleep } from 'k6';
|
||||||
|
import { Counter, Trend, Rate } from 'k6/metrics';
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// PRODUCTION-STYLE LOAD TEST (CONFIGURABLE)
|
||||||
|
// =============================================================================
|
||||||
|
// Usage examples:
|
||||||
|
// k6 run loadtest/loadtest_prod.js
|
||||||
|
// SCENARIOS=connect k6 run loadtest/loadtest_prod.js
|
||||||
|
// SCENARIOS=connect,fanout ROOMS=10 FANOUT_VUS=1000 k6 run loadtest/loadtest_prod.js
|
||||||
|
// BASE_URL=ws://localhost:8080/ws/loadtest k6 run loadtest/loadtest_prod.js
|
||||||
|
//
|
||||||
|
// Notes:
|
||||||
|
// - Default uses /ws/loadtest to bypass auth + DB permission checks.
|
||||||
|
// - RTT is not measured (server does not echo to sender).
|
||||||
|
// - Use SCENARIOS to isolate connection-only vs fanout pressure.
|
||||||
|
|
||||||
|
const BASE_URL = __ENV.BASE_URL || 'ws://localhost:8080/ws/loadtest';
|
||||||
|
const ROOMS = parseInt(__ENV.ROOMS || '10', 10);
|
||||||
|
const SEND_INTERVAL_MS = parseInt(__ENV.SEND_INTERVAL_MS || '500', 10);
|
||||||
|
const PAYLOAD_BYTES = parseInt(__ENV.PAYLOAD_BYTES || '200', 10);
|
||||||
|
const CONNECT_HOLD_SEC = parseInt(__ENV.CONNECT_HOLD_SEC || '30', 10);
|
||||||
|
const SCENARIOS = (__ENV.SCENARIOS || 'connect,fanout').split(',').map((s) => s.trim());
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// CUSTOM METRICS
|
||||||
|
// =============================================================================
|
||||||
|
const connectionTime = new Trend('ws_connection_time_ms');
|
||||||
|
const connectionsFailed = new Counter('ws_connections_failed');
|
||||||
|
const messagesReceived = new Counter('ws_msgs_received');
|
||||||
|
const messagesSent = new Counter('ws_msgs_sent');
|
||||||
|
const connectionSuccess = new Rate('ws_connection_success');
|
||||||
|
|
||||||
|
function roomForVU() {
|
||||||
|
return `loadtest-room-${__VU % ROOMS}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildUrl(roomId) {
|
||||||
|
return `${BASE_URL}/${roomId}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function connectAndHold(roomId, holdSec) {
|
||||||
|
const url = buildUrl(roomId);
|
||||||
|
const connectStart = Date.now();
|
||||||
|
|
||||||
|
const res = ws.connect(url, {}, function (socket) {
|
||||||
|
connectionTime.add(Date.now() - connectStart);
|
||||||
|
connectionSuccess.add(1);
|
||||||
|
|
||||||
|
socket.on('message', () => {
|
||||||
|
messagesReceived.add(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
socket.on('error', () => {
|
||||||
|
connectionsFailed.add(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
socket.setTimeout(() => {
|
||||||
|
socket.close();
|
||||||
|
}, holdSec * 1000);
|
||||||
|
});
|
||||||
|
|
||||||
|
const connected = check(res, {
|
||||||
|
'WebSocket connected': (r) => r && r.status === 101,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!connected) {
|
||||||
|
connectionsFailed.add(1);
|
||||||
|
connectionSuccess.add(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function connectAndFanout(roomId) {
|
||||||
|
const url = buildUrl(roomId);
|
||||||
|
const connectStart = Date.now();
|
||||||
|
const payload = new Uint8Array(PAYLOAD_BYTES);
|
||||||
|
payload[0] = 1;
|
||||||
|
for (let i = 1; i < PAYLOAD_BYTES; i++) {
|
||||||
|
payload[i] = Math.floor(Math.random() * 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = ws.connect(url, {}, function (socket) {
|
||||||
|
connectionTime.add(Date.now() - connectStart);
|
||||||
|
connectionSuccess.add(1);
|
||||||
|
|
||||||
|
socket.on('message', () => {
|
||||||
|
messagesReceived.add(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
socket.on('error', () => {
|
||||||
|
connectionsFailed.add(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
socket.setInterval(() => {
|
||||||
|
socket.sendBinary(payload.buffer);
|
||||||
|
messagesSent.add(1);
|
||||||
|
}, SEND_INTERVAL_MS);
|
||||||
|
|
||||||
|
socket.setTimeout(() => {
|
||||||
|
socket.close();
|
||||||
|
}, CONNECT_HOLD_SEC * 1000);
|
||||||
|
});
|
||||||
|
|
||||||
|
const connected = check(res, {
|
||||||
|
'WebSocket connected': (r) => r && r.status === 101,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!connected) {
|
||||||
|
connectionsFailed.add(1);
|
||||||
|
connectionSuccess.add(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// SCENARIOS (decided at init time from env)
|
||||||
|
// =============================================================================
|
||||||
|
const scenarios = {};
|
||||||
|
|
||||||
|
if (SCENARIOS.includes('connect')) {
|
||||||
|
scenarios.connect_only = {
|
||||||
|
executor: 'ramping-vus',
|
||||||
|
startVUs: 0,
|
||||||
|
stages: [
|
||||||
|
{ duration: '10s', target: 200 },
|
||||||
|
{ duration: '10s', target: 500 },
|
||||||
|
{ duration: '10s', target: 1000 },
|
||||||
|
{ duration: '60s', target: 1000 },
|
||||||
|
{ duration: '10s', target: 0 },
|
||||||
|
],
|
||||||
|
exec: 'connectOnly',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (SCENARIOS.includes('fanout')) {
|
||||||
|
scenarios.fanout = {
|
||||||
|
executor: 'constant-vus',
|
||||||
|
vus: parseInt(__ENV.FANOUT_VUS || '1000', 10),
|
||||||
|
duration: __ENV.FANOUT_DURATION || '90s',
|
||||||
|
exec: 'fanout',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export const options = {
|
||||||
|
scenarios,
|
||||||
|
thresholds: {
|
||||||
|
ws_connection_time_ms: ['p(95)<500'],
|
||||||
|
ws_connection_success: ['rate>0.95'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export function connectOnly() {
|
||||||
|
connectAndHold(roomForVU(), CONNECT_HOLD_SEC);
|
||||||
|
sleep(0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function fanout() {
|
||||||
|
connectAndFanout(roomForVU());
|
||||||
|
sleep(0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function setup() {
|
||||||
|
console.log('========================================');
|
||||||
|
console.log(' Production-Style Load Test');
|
||||||
|
console.log('========================================');
|
||||||
|
console.log(`BASE_URL: ${BASE_URL}`);
|
||||||
|
console.log(`ROOMS: ${ROOMS}`);
|
||||||
|
console.log(`SCENARIOS: ${SCENARIOS.join(',')}`);
|
||||||
|
console.log(`SEND_INTERVAL_MS: ${SEND_INTERVAL_MS}`);
|
||||||
|
console.log(`PAYLOAD_BYTES: ${PAYLOAD_BYTES}`);
|
||||||
|
console.log(`CONNECT_HOLD_SEC: ${CONNECT_HOLD_SEC}`);
|
||||||
|
console.log('========================================');
|
||||||
|
}
|
||||||
120
loadtest/loadtest_redis_stress.js
Normal file
120
loadtest/loadtest_redis_stress.js
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
import { check, sleep } from "k6";
|
||||||
|
import { Counter, Rate, Trend } from "k6/metrics";
|
||||||
|
import ws from "k6/ws";
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// CUSTOM METRICS (avoid conflicts with k6's built-in ws_* metrics)
|
||||||
|
// =============================================================================
|
||||||
|
const connectionTime = new Trend("ws_connection_time_ms");
|
||||||
|
const messageRTT = new Trend("ws_message_rtt_ms");
|
||||||
|
const connectionsFailed = new Counter("ws_connections_failed");
|
||||||
|
const messagesReceived = new Counter("ws_msgs_received");
|
||||||
|
const messagesSent = new Counter("ws_msgs_sent");
|
||||||
|
const connectionSuccess = new Rate("ws_connection_success");
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// 1000 USERS TEST - STRESS REDIS PUBSUB SUBSCRIPTIONS
|
||||||
|
// =============================================================================
|
||||||
|
export const options = {
|
||||||
|
stages: [
|
||||||
|
{ duration: "20s", target: 20 }, // Warmup: 20 users
|
||||||
|
{ duration: "10s", target: 200 }, // Ramp to 200
|
||||||
|
{ duration: "10s", target: 500 }, // Ramp to 500
|
||||||
|
{ duration: "10s", target: 1000 }, // Ramp to 1000
|
||||||
|
{ duration: "60s", target: 1000 }, // Hold at 1000 for 1 minute
|
||||||
|
{ duration: "10s", target: 0 }, // Ramp down
|
||||||
|
],
|
||||||
|
|
||||||
|
thresholds: {
|
||||||
|
ws_connection_time_ms: ["p(95)<500"], // Target: <500ms connection
|
||||||
|
ws_message_rtt_ms: ["p(95)<100"], // Target: <100ms message RTT
|
||||||
|
ws_connection_success: ["rate>0.95"], // Target: >95% success rate
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export default function () {
|
||||||
|
// CRITICAL: Create unique room per user to stress Redis PubSub
|
||||||
|
// This creates ~1000 subscriptions (1 per room) to trigger the bottleneck
|
||||||
|
const roomId = `loadtest-room-${__VU}`;
|
||||||
|
const url = `ws://localhost:8080/ws/loadtest/${roomId}`;
|
||||||
|
|
||||||
|
const connectStart = Date.now();
|
||||||
|
|
||||||
|
const res = ws.connect(url, {}, function (socket) {
|
||||||
|
const connectDuration = Date.now() - connectStart;
|
||||||
|
connectionTime.add(connectDuration);
|
||||||
|
connectionSuccess.add(1);
|
||||||
|
|
||||||
|
// Send realistic Yjs-sized messages (200 bytes)
|
||||||
|
// Yjs sync messages are typically 100-500 bytes
|
||||||
|
const payload = new Uint8Array(200);
|
||||||
|
payload[0] = 1; // Message type: Yjs sync
|
||||||
|
|
||||||
|
// Fill with realistic data (not zeros)
|
||||||
|
for (let i = 1; i < 200; i++) {
|
||||||
|
payload[i] = Math.floor(Math.random() * 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
socket.on("message", (data) => {
|
||||||
|
messagesReceived.add(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
socket.on("error", (e) => {
|
||||||
|
connectionsFailed.add(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Send message every 1 second (realistic collaborative edit rate)
|
||||||
|
socket.setInterval(function () {
|
||||||
|
socket.sendBinary(payload.buffer);
|
||||||
|
messagesSent.add(1);
|
||||||
|
}, 1000);
|
||||||
|
|
||||||
|
// Keep connection alive for 100 seconds (longer than test duration)
|
||||||
|
socket.setTimeout(function () {
|
||||||
|
socket.close();
|
||||||
|
}, 100000);
|
||||||
|
});
|
||||||
|
|
||||||
|
const connectCheck = check(res, {
|
||||||
|
"WebSocket connected": (r) => r && r.status === 101,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!connectCheck) {
|
||||||
|
connectionsFailed.add(1);
|
||||||
|
connectionSuccess.add(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small sleep to avoid hammering connection endpoint
|
||||||
|
sleep(0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function setup() {
|
||||||
|
console.log("========================================");
|
||||||
|
console.log(" Redis PubSub Stress Test: 1000 Users");
|
||||||
|
console.log("========================================");
|
||||||
|
console.log("⚠️ CRITICAL: Creates ~1000 rooms");
|
||||||
|
console.log(" This stresses Redis PubSub subscriptions");
|
||||||
|
console.log(" Each room = 1 dedicated PubSub connection");
|
||||||
|
console.log("========================================");
|
||||||
|
console.log("Expected bottleneck (before fix):");
|
||||||
|
console.log(" - 58.96s in PubSub health checks");
|
||||||
|
console.log(" - 28.09s in ReceiveTimeout");
|
||||||
|
console.log(" - Connection success rate: 80-85%");
|
||||||
|
console.log(" - P95 latency: 20-26 seconds");
|
||||||
|
console.log("========================================");
|
||||||
|
console.log("Expected after fix:");
|
||||||
|
console.log(" - <1s in PubSub operations");
|
||||||
|
console.log(" - Connection success rate: >95%");
|
||||||
|
console.log(" - P95 latency: <500ms");
|
||||||
|
console.log("========================================");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function teardown(data) {
|
||||||
|
console.log("========================================");
|
||||||
|
console.log(" Load Test Completed");
|
||||||
|
console.log("========================================");
|
||||||
|
console.log("Check profiling data with:");
|
||||||
|
console.log(" curl http://localhost:8080/debug/pprof/mutex > mutex.pb");
|
||||||
|
console.log(" go tool pprof -top mutex.pb");
|
||||||
|
console.log("========================================");
|
||||||
|
}
|
||||||
43
loadtest/run_with_pprof.sh
Executable file
43
loadtest/run_with_pprof.sh
Executable file
@@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Simple helper to run k6 and capture pprof during peak load.
|
||||||
|
# Usage:
|
||||||
|
# PPROF_BASE=http://localhost:8080/debug/pprof \
|
||||||
|
# K6_SCRIPT=loadtest/loadtest_prod.js \
|
||||||
|
# SLEEP_BEFORE=40 \
|
||||||
|
# CPU_SECONDS=30 \
|
||||||
|
# ./loadtest/run_with_pprof.sh
|
||||||
|
|
||||||
|
PPROF_BASE="${PPROF_BASE:-http://localhost:8080/debug/pprof}"
|
||||||
|
K6_SCRIPT="${K6_SCRIPT:-loadtest/loadtest_prod.js}"
|
||||||
|
SLEEP_BEFORE="${SLEEP_BEFORE:-40}"
|
||||||
|
CPU_SECONDS="${CPU_SECONDS:-30}"
|
||||||
|
OUT_DIR="${OUT_DIR:-loadtest/pprof}"
|
||||||
|
|
||||||
|
STAMP="$(date +%Y%m%d_%H%M%S)"
|
||||||
|
RUN_DIR="${OUT_DIR}/${STAMP}"
|
||||||
|
|
||||||
|
mkdir -p "${RUN_DIR}"
|
||||||
|
|
||||||
|
echo "==> Starting k6: ${K6_SCRIPT}"
|
||||||
|
k6 run "${K6_SCRIPT}" &
|
||||||
|
K6_PID=$!
|
||||||
|
|
||||||
|
echo "==> Waiting ${SLEEP_BEFORE}s before capturing pprof..."
|
||||||
|
sleep "${SLEEP_BEFORE}"
|
||||||
|
|
||||||
|
echo "==> Capturing profiles into ${RUN_DIR}"
|
||||||
|
curl -sS "${PPROF_BASE}/profile?seconds=${CPU_SECONDS}" -o "${RUN_DIR}/cpu.pprof"
|
||||||
|
curl -sS "${PPROF_BASE}/mutex" -o "${RUN_DIR}/mutex.pprof"
|
||||||
|
curl -sS "${PPROF_BASE}/block" -o "${RUN_DIR}/block.pprof"
|
||||||
|
curl -sS "${PPROF_BASE}/goroutine?debug=2" -o "${RUN_DIR}/goroutine.txt"
|
||||||
|
|
||||||
|
echo "==> Waiting for k6 to finish (pid ${K6_PID})..."
|
||||||
|
wait "${K6_PID}"
|
||||||
|
|
||||||
|
echo "==> Done. Profiles saved in ${RUN_DIR}"
|
||||||
|
echo " Inspect with:"
|
||||||
|
echo " go tool pprof -top ${RUN_DIR}/cpu.pprof"
|
||||||
|
echo " go tool pprof -top ${RUN_DIR}/mutex.pprof"
|
||||||
|
echo " go tool pprof -top ${RUN_DIR}/block.pprof"
|
||||||
Reference in New Issue
Block a user