package relay import ( "fmt" "runtime" "sync" "testing" "github.com/dolonet/mtg-multi/mtglib/internal/tls" ) // BenchmarkStackVsPool measures memory consumption when N goroutines hold // either a stack-allocated buffer or a pool-allocated buffer. // Each goroutine simulates one pump direction of a relay connection. // Real connections have 2 pumps each, so N goroutines ≈ N/2 connections. func BenchmarkStackMemory(b *testing.B) { for _, numGoroutines := range []int{100, 500, 1000, 2000} { b.Run(fmt.Sprintf("goroutines=%d", numGoroutines), func(b *testing.B) { for i := 0; i < b.N; i++ { var memBefore, memAfter runtime.MemStats runtime.GC() runtime.ReadMemStats(&memBefore) var wg sync.WaitGroup ready := make(chan struct{}, numGoroutines) stop := make(chan struct{}) wg.Add(numGoroutines) for j := 0; j < numGoroutines; j++ { go blockingReadStack(&wg, ready, stop) } // Wait for all goroutines to be ready (holding their buffers) for j := 0; j < numGoroutines; j++ { <-ready } runtime.ReadMemStats(&memAfter) stackDelta := memAfter.StackInuse - memBefore.StackInuse heapDelta := memAfter.HeapInuse - memBefore.HeapInuse totalDelta := stackDelta + heapDelta b.ReportMetric(float64(stackDelta), "stack_bytes") b.ReportMetric(float64(heapDelta), "heap_bytes") b.ReportMetric(float64(totalDelta), "total_bytes") b.ReportMetric(float64(stackDelta)/float64(numGoroutines), "stack_per_goroutine") close(stop) wg.Wait() } }) } } func BenchmarkPoolMemory_16KB(b *testing.B) { benchmarkPoolMemory(b, tls.MaxRecordPayloadSize) } func BenchmarkPoolMemory_4KB(b *testing.B) { benchmarkPoolMemory(b, 4096) } func benchmarkPoolMemory(b *testing.B, poolBufSize int) { b.Helper() pool := &sync.Pool{ New: func() any { buf := make([]byte, poolBufSize) return &buf }, } for _, numGoroutines := range []int{100, 500, 1000, 2000} { b.Run(fmt.Sprintf("goroutines=%d", numGoroutines), func(b *testing.B) { for i := 0; i < b.N; i++ { var memBefore, memAfter runtime.MemStats // Ensure pool is empty runtime.GC() runtime.ReadMemStats(&memBefore) var wg sync.WaitGroup ready := make(chan struct{}, numGoroutines) stop := make(chan struct{}) wg.Add(numGoroutines) for j := 0; j < numGoroutines; j++ { go blockingReadPool(&wg, ready, stop, pool) } for j := 0; j < numGoroutines; j++ { <-ready } runtime.ReadMemStats(&memAfter) stackDelta := memAfter.StackInuse - memBefore.StackInuse heapDelta := memAfter.HeapInuse - memBefore.HeapInuse totalDelta := stackDelta + heapDelta b.ReportMetric(float64(stackDelta), "stack_bytes") b.ReportMetric(float64(heapDelta), "heap_bytes") b.ReportMetric(float64(totalDelta), "total_bytes") b.ReportMetric(float64(stackDelta)/float64(numGoroutines), "stack_per_goroutine") close(stop) wg.Wait() } }) } } // BenchmarkPoolMemory_Burst tests the scenario 9seconds described: // connections come in bursts, pool holds unused buffers between bursts. func BenchmarkPoolMemory_Burst(b *testing.B) { for _, poolBufSize := range []int{4096, 16379} { b.Run(fmt.Sprintf("poolBuf=%d", poolBufSize), func(b *testing.B) { pool := &sync.Pool{ New: func() any { buf := make([]byte, poolBufSize) return &buf }, } for i := 0; i < b.N; i++ { // Burst 1: 500 goroutines var wg sync.WaitGroup ready := make(chan struct{}, 500) stop := make(chan struct{}) wg.Add(500) for j := 0; j < 500; j++ { go blockingReadPool(&wg, ready, stop, pool) } for j := 0; j < 500; j++ { <-ready } close(stop) wg.Wait() // Between bursts: measure idle pool memory var memAfterBurst runtime.MemStats runtime.ReadMemStats(&memAfterBurst) // Burst 2: 500 goroutines again (pool should reuse) ready2 := make(chan struct{}, 500) stop2 := make(chan struct{}) wg.Add(500) for j := 0; j < 500; j++ { go blockingReadPool(&wg, ready2, stop2, pool) } for j := 0; j < 500; j++ { <-ready2 } var memDuringBurst2 runtime.MemStats runtime.ReadMemStats(&memDuringBurst2) b.ReportMetric(float64(memAfterBurst.HeapInuse), "idle_heap_bytes") b.ReportMetric(float64(memDuringBurst2.HeapInuse), "burst2_heap_bytes") b.ReportMetric(float64(memDuringBurst2.StackInuse), "burst2_stack_bytes") close(stop2) wg.Wait() } }) } }