feat(logger): update logger configuration to set log level to Fatal to eliminate IO lock contention
fix(redis): silence Redis internal logging and optimize connection pool settings to reduce mutex contention feat(userlist): enhance user list component with avatar support and improved styling test(load): add production-style load test script for WebSocket connections and Redis PubSub stress testing chore(loadtest): create script to run load tests with pprof profiling for performance analysis
This commit is contained in:
173
loadtest/loadtest_prod.js
Normal file
173
loadtest/loadtest_prod.js
Normal file
@@ -0,0 +1,173 @@
|
||||
import ws from 'k6/ws';
|
||||
import { check, sleep } from 'k6';
|
||||
import { Counter, Trend, Rate } from 'k6/metrics';
|
||||
|
||||
// =============================================================================
|
||||
// PRODUCTION-STYLE LOAD TEST (CONFIGURABLE)
|
||||
// =============================================================================
|
||||
// Usage examples:
|
||||
// k6 run loadtest/loadtest_prod.js
|
||||
// SCENARIOS=connect k6 run loadtest/loadtest_prod.js
|
||||
// SCENARIOS=connect,fanout ROOMS=10 FANOUT_VUS=1000 k6 run loadtest/loadtest_prod.js
|
||||
// BASE_URL=ws://localhost:8080/ws/loadtest k6 run loadtest/loadtest_prod.js
|
||||
//
|
||||
// Notes:
|
||||
// - Default uses /ws/loadtest to bypass auth + DB permission checks.
|
||||
// - RTT is not measured (server does not echo to sender).
|
||||
// - Use SCENARIOS to isolate connection-only vs fanout pressure.
|
||||
|
||||
const BASE_URL = __ENV.BASE_URL || 'ws://localhost:8080/ws/loadtest';
|
||||
const ROOMS = parseInt(__ENV.ROOMS || '10', 10);
|
||||
const SEND_INTERVAL_MS = parseInt(__ENV.SEND_INTERVAL_MS || '500', 10);
|
||||
const PAYLOAD_BYTES = parseInt(__ENV.PAYLOAD_BYTES || '200', 10);
|
||||
const CONNECT_HOLD_SEC = parseInt(__ENV.CONNECT_HOLD_SEC || '30', 10);
|
||||
const SCENARIOS = (__ENV.SCENARIOS || 'connect,fanout').split(',').map((s) => s.trim());
|
||||
|
||||
// =============================================================================
|
||||
// CUSTOM METRICS
|
||||
// =============================================================================
|
||||
const connectionTime = new Trend('ws_connection_time_ms');
|
||||
const connectionsFailed = new Counter('ws_connections_failed');
|
||||
const messagesReceived = new Counter('ws_msgs_received');
|
||||
const messagesSent = new Counter('ws_msgs_sent');
|
||||
const connectionSuccess = new Rate('ws_connection_success');
|
||||
|
||||
function roomForVU() {
|
||||
return `loadtest-room-${__VU % ROOMS}`;
|
||||
}
|
||||
|
||||
function buildUrl(roomId) {
|
||||
return `${BASE_URL}/${roomId}`;
|
||||
}
|
||||
|
||||
function connectAndHold(roomId, holdSec) {
|
||||
const url = buildUrl(roomId);
|
||||
const connectStart = Date.now();
|
||||
|
||||
const res = ws.connect(url, {}, function (socket) {
|
||||
connectionTime.add(Date.now() - connectStart);
|
||||
connectionSuccess.add(1);
|
||||
|
||||
socket.on('message', () => {
|
||||
messagesReceived.add(1);
|
||||
});
|
||||
|
||||
socket.on('error', () => {
|
||||
connectionsFailed.add(1);
|
||||
});
|
||||
|
||||
socket.setTimeout(() => {
|
||||
socket.close();
|
||||
}, holdSec * 1000);
|
||||
});
|
||||
|
||||
const connected = check(res, {
|
||||
'WebSocket connected': (r) => r && r.status === 101,
|
||||
});
|
||||
|
||||
if (!connected) {
|
||||
connectionsFailed.add(1);
|
||||
connectionSuccess.add(0);
|
||||
}
|
||||
}
|
||||
|
||||
function connectAndFanout(roomId) {
|
||||
const url = buildUrl(roomId);
|
||||
const connectStart = Date.now();
|
||||
const payload = new Uint8Array(PAYLOAD_BYTES);
|
||||
payload[0] = 1;
|
||||
for (let i = 1; i < PAYLOAD_BYTES; i++) {
|
||||
payload[i] = Math.floor(Math.random() * 256);
|
||||
}
|
||||
|
||||
const res = ws.connect(url, {}, function (socket) {
|
||||
connectionTime.add(Date.now() - connectStart);
|
||||
connectionSuccess.add(1);
|
||||
|
||||
socket.on('message', () => {
|
||||
messagesReceived.add(1);
|
||||
});
|
||||
|
||||
socket.on('error', () => {
|
||||
connectionsFailed.add(1);
|
||||
});
|
||||
|
||||
socket.setInterval(() => {
|
||||
socket.sendBinary(payload.buffer);
|
||||
messagesSent.add(1);
|
||||
}, SEND_INTERVAL_MS);
|
||||
|
||||
socket.setTimeout(() => {
|
||||
socket.close();
|
||||
}, CONNECT_HOLD_SEC * 1000);
|
||||
});
|
||||
|
||||
const connected = check(res, {
|
||||
'WebSocket connected': (r) => r && r.status === 101,
|
||||
});
|
||||
|
||||
if (!connected) {
|
||||
connectionsFailed.add(1);
|
||||
connectionSuccess.add(0);
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// SCENARIOS (decided at init time from env)
|
||||
// =============================================================================
|
||||
const scenarios = {};
|
||||
|
||||
if (SCENARIOS.includes('connect')) {
|
||||
scenarios.connect_only = {
|
||||
executor: 'ramping-vus',
|
||||
startVUs: 0,
|
||||
stages: [
|
||||
{ duration: '10s', target: 200 },
|
||||
{ duration: '10s', target: 500 },
|
||||
{ duration: '10s', target: 1000 },
|
||||
{ duration: '60s', target: 1000 },
|
||||
{ duration: '10s', target: 0 },
|
||||
],
|
||||
exec: 'connectOnly',
|
||||
};
|
||||
}
|
||||
|
||||
if (SCENARIOS.includes('fanout')) {
|
||||
scenarios.fanout = {
|
||||
executor: 'constant-vus',
|
||||
vus: parseInt(__ENV.FANOUT_VUS || '1000', 10),
|
||||
duration: __ENV.FANOUT_DURATION || '90s',
|
||||
exec: 'fanout',
|
||||
};
|
||||
}
|
||||
|
||||
export const options = {
|
||||
scenarios,
|
||||
thresholds: {
|
||||
ws_connection_time_ms: ['p(95)<500'],
|
||||
ws_connection_success: ['rate>0.95'],
|
||||
},
|
||||
};
|
||||
|
||||
export function connectOnly() {
|
||||
connectAndHold(roomForVU(), CONNECT_HOLD_SEC);
|
||||
sleep(0.1);
|
||||
}
|
||||
|
||||
export function fanout() {
|
||||
connectAndFanout(roomForVU());
|
||||
sleep(0.1);
|
||||
}
|
||||
|
||||
export function setup() {
|
||||
console.log('========================================');
|
||||
console.log(' Production-Style Load Test');
|
||||
console.log('========================================');
|
||||
console.log(`BASE_URL: ${BASE_URL}`);
|
||||
console.log(`ROOMS: ${ROOMS}`);
|
||||
console.log(`SCENARIOS: ${SCENARIOS.join(',')}`);
|
||||
console.log(`SEND_INTERVAL_MS: ${SEND_INTERVAL_MS}`);
|
||||
console.log(`PAYLOAD_BYTES: ${PAYLOAD_BYTES}`);
|
||||
console.log(`CONNECT_HOLD_SEC: ${CONNECT_HOLD_SEC}`);
|
||||
console.log('========================================');
|
||||
}
|
||||
120
loadtest/loadtest_redis_stress.js
Normal file
120
loadtest/loadtest_redis_stress.js
Normal file
@@ -0,0 +1,120 @@
|
||||
import { check, sleep } from "k6";
|
||||
import { Counter, Rate, Trend } from "k6/metrics";
|
||||
import ws from "k6/ws";
|
||||
|
||||
// =============================================================================
|
||||
// CUSTOM METRICS (avoid conflicts with k6's built-in ws_* metrics)
|
||||
// =============================================================================
|
||||
const connectionTime = new Trend("ws_connection_time_ms");
|
||||
const messageRTT = new Trend("ws_message_rtt_ms");
|
||||
const connectionsFailed = new Counter("ws_connections_failed");
|
||||
const messagesReceived = new Counter("ws_msgs_received");
|
||||
const messagesSent = new Counter("ws_msgs_sent");
|
||||
const connectionSuccess = new Rate("ws_connection_success");
|
||||
|
||||
// =============================================================================
|
||||
// 1000 USERS TEST - STRESS REDIS PUBSUB SUBSCRIPTIONS
|
||||
// =============================================================================
|
||||
export const options = {
|
||||
stages: [
|
||||
{ duration: "20s", target: 20 }, // Warmup: 20 users
|
||||
{ duration: "10s", target: 200 }, // Ramp to 200
|
||||
{ duration: "10s", target: 500 }, // Ramp to 500
|
||||
{ duration: "10s", target: 1000 }, // Ramp to 1000
|
||||
{ duration: "60s", target: 1000 }, // Hold at 1000 for 1 minute
|
||||
{ duration: "10s", target: 0 }, // Ramp down
|
||||
],
|
||||
|
||||
thresholds: {
|
||||
ws_connection_time_ms: ["p(95)<500"], // Target: <500ms connection
|
||||
ws_message_rtt_ms: ["p(95)<100"], // Target: <100ms message RTT
|
||||
ws_connection_success: ["rate>0.95"], // Target: >95% success rate
|
||||
},
|
||||
};
|
||||
|
||||
export default function () {
|
||||
// CRITICAL: Create unique room per user to stress Redis PubSub
|
||||
// This creates ~1000 subscriptions (1 per room) to trigger the bottleneck
|
||||
const roomId = `loadtest-room-${__VU}`;
|
||||
const url = `ws://localhost:8080/ws/loadtest/${roomId}`;
|
||||
|
||||
const connectStart = Date.now();
|
||||
|
||||
const res = ws.connect(url, {}, function (socket) {
|
||||
const connectDuration = Date.now() - connectStart;
|
||||
connectionTime.add(connectDuration);
|
||||
connectionSuccess.add(1);
|
||||
|
||||
// Send realistic Yjs-sized messages (200 bytes)
|
||||
// Yjs sync messages are typically 100-500 bytes
|
||||
const payload = new Uint8Array(200);
|
||||
payload[0] = 1; // Message type: Yjs sync
|
||||
|
||||
// Fill with realistic data (not zeros)
|
||||
for (let i = 1; i < 200; i++) {
|
||||
payload[i] = Math.floor(Math.random() * 256);
|
||||
}
|
||||
|
||||
socket.on("message", (data) => {
|
||||
messagesReceived.add(1);
|
||||
});
|
||||
|
||||
socket.on("error", (e) => {
|
||||
connectionsFailed.add(1);
|
||||
});
|
||||
|
||||
// Send message every 1 second (realistic collaborative edit rate)
|
||||
socket.setInterval(function () {
|
||||
socket.sendBinary(payload.buffer);
|
||||
messagesSent.add(1);
|
||||
}, 1000);
|
||||
|
||||
// Keep connection alive for 100 seconds (longer than test duration)
|
||||
socket.setTimeout(function () {
|
||||
socket.close();
|
||||
}, 100000);
|
||||
});
|
||||
|
||||
const connectCheck = check(res, {
|
||||
"WebSocket connected": (r) => r && r.status === 101,
|
||||
});
|
||||
|
||||
if (!connectCheck) {
|
||||
connectionsFailed.add(1);
|
||||
connectionSuccess.add(0);
|
||||
}
|
||||
|
||||
// Small sleep to avoid hammering connection endpoint
|
||||
sleep(0.1);
|
||||
}
|
||||
|
||||
export function setup() {
|
||||
console.log("========================================");
|
||||
console.log(" Redis PubSub Stress Test: 1000 Users");
|
||||
console.log("========================================");
|
||||
console.log("⚠️ CRITICAL: Creates ~1000 rooms");
|
||||
console.log(" This stresses Redis PubSub subscriptions");
|
||||
console.log(" Each room = 1 dedicated PubSub connection");
|
||||
console.log("========================================");
|
||||
console.log("Expected bottleneck (before fix):");
|
||||
console.log(" - 58.96s in PubSub health checks");
|
||||
console.log(" - 28.09s in ReceiveTimeout");
|
||||
console.log(" - Connection success rate: 80-85%");
|
||||
console.log(" - P95 latency: 20-26 seconds");
|
||||
console.log("========================================");
|
||||
console.log("Expected after fix:");
|
||||
console.log(" - <1s in PubSub operations");
|
||||
console.log(" - Connection success rate: >95%");
|
||||
console.log(" - P95 latency: <500ms");
|
||||
console.log("========================================");
|
||||
}
|
||||
|
||||
export function teardown(data) {
|
||||
console.log("========================================");
|
||||
console.log(" Load Test Completed");
|
||||
console.log("========================================");
|
||||
console.log("Check profiling data with:");
|
||||
console.log(" curl http://localhost:8080/debug/pprof/mutex > mutex.pb");
|
||||
console.log(" go tool pprof -top mutex.pb");
|
||||
console.log("========================================");
|
||||
}
|
||||
43
loadtest/run_with_pprof.sh
Executable file
43
loadtest/run_with_pprof.sh
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Simple helper to run k6 and capture pprof during peak load.
|
||||
# Usage:
|
||||
# PPROF_BASE=http://localhost:8080/debug/pprof \
|
||||
# K6_SCRIPT=loadtest/loadtest_prod.js \
|
||||
# SLEEP_BEFORE=40 \
|
||||
# CPU_SECONDS=30 \
|
||||
# ./loadtest/run_with_pprof.sh
|
||||
|
||||
PPROF_BASE="${PPROF_BASE:-http://localhost:8080/debug/pprof}"
|
||||
K6_SCRIPT="${K6_SCRIPT:-loadtest/loadtest_prod.js}"
|
||||
SLEEP_BEFORE="${SLEEP_BEFORE:-40}"
|
||||
CPU_SECONDS="${CPU_SECONDS:-30}"
|
||||
OUT_DIR="${OUT_DIR:-loadtest/pprof}"
|
||||
|
||||
STAMP="$(date +%Y%m%d_%H%M%S)"
|
||||
RUN_DIR="${OUT_DIR}/${STAMP}"
|
||||
|
||||
mkdir -p "${RUN_DIR}"
|
||||
|
||||
echo "==> Starting k6: ${K6_SCRIPT}"
|
||||
k6 run "${K6_SCRIPT}" &
|
||||
K6_PID=$!
|
||||
|
||||
echo "==> Waiting ${SLEEP_BEFORE}s before capturing pprof..."
|
||||
sleep "${SLEEP_BEFORE}"
|
||||
|
||||
echo "==> Capturing profiles into ${RUN_DIR}"
|
||||
curl -sS "${PPROF_BASE}/profile?seconds=${CPU_SECONDS}" -o "${RUN_DIR}/cpu.pprof"
|
||||
curl -sS "${PPROF_BASE}/mutex" -o "${RUN_DIR}/mutex.pprof"
|
||||
curl -sS "${PPROF_BASE}/block" -o "${RUN_DIR}/block.pprof"
|
||||
curl -sS "${PPROF_BASE}/goroutine?debug=2" -o "${RUN_DIR}/goroutine.txt"
|
||||
|
||||
echo "==> Waiting for k6 to finish (pid ${K6_PID})..."
|
||||
wait "${K6_PID}"
|
||||
|
||||
echo "==> Done. Profiles saved in ${RUN_DIR}"
|
||||
echo " Inspect with:"
|
||||
echo " go tool pprof -top ${RUN_DIR}/cpu.pprof"
|
||||
echo " go tool pprof -top ${RUN_DIR}/mutex.pprof"
|
||||
echo " go tool pprof -top ${RUN_DIR}/block.pprof"
|
||||
Reference in New Issue
Block a user