Highly-opinionated (ex-bullshit-free) MTPROTO proxy for Telegram. If you use v1.0 or upgrade broke you proxy, please read the chapter Version 2
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

doppel_buf_test.go 8.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. package benchmarks
  2. import (
  3. "fmt"
  4. "runtime"
  5. "runtime/debug"
  6. "sync"
  7. "testing"
  8. "time"
  9. )
  10. const (
  11. maxRecordSize = 16384 // tls.MaxRecordSize
  12. sizeHeader = 5 // tls.SizeHeader
  13. )
  14. var sink byte
  15. // stackGoroutineRealistic simulates doppel start() with realistic buffer USE.
  16. // The key: merely declaring [16384]byte doesn't grow the stack. Actually
  17. // writing into it (via copy in the write loop) triggers the lazy stack growth
  18. // from 2KB -> 32KB.
  19. func stackGoroutineRealistic(done <-chan struct{}, wg *sync.WaitGroup, payload []byte) {
  20. // goroutine 1: start() with 16KB stack buffer, actually used
  21. wg.Add(1)
  22. go func() {
  23. defer wg.Done()
  24. var buf [maxRecordSize]byte
  25. // Simulate the write path in doppel start():
  26. // n, _ := c.p.writeStream.Read(buf[tls.SizeHeader : tls.SizeHeader+size])
  27. // tls.WriteRecordInPlace(c.Conn, buf[:], n)
  28. copy(buf[sizeHeader:], payload)
  29. <-done
  30. runtime.KeepAlive(&buf)
  31. }()
  32. // goroutine 2: clock tick loop
  33. wg.Add(1)
  34. go func() {
  35. defer wg.Done()
  36. ticker := time.NewTicker(50 * time.Millisecond)
  37. defer ticker.Stop()
  38. for {
  39. select {
  40. case <-done:
  41. return
  42. case <-ticker.C:
  43. }
  44. }
  45. }()
  46. }
  47. var bufPool = sync.Pool{
  48. New: func() any {
  49. b := make([]byte, maxRecordSize)
  50. return &b
  51. },
  52. }
  53. // poolGoroutineRealistic simulates the same pair with pool-based buffer.
  54. func poolGoroutineRealistic(done <-chan struct{}, wg *sync.WaitGroup, payload []byte) {
  55. // goroutine 1: start() with pooled buffer
  56. wg.Add(1)
  57. go func() {
  58. defer wg.Done()
  59. bp := bufPool.Get().(*[]byte)
  60. buf := *bp
  61. copy(buf[sizeHeader:], payload)
  62. defer bufPool.Put(bp)
  63. <-done
  64. runtime.KeepAlive(&buf)
  65. }()
  66. // goroutine 2: clock tick loop
  67. wg.Add(1)
  68. go func() {
  69. defer wg.Done()
  70. ticker := time.NewTicker(50 * time.Millisecond)
  71. defer ticker.Stop()
  72. for {
  73. select {
  74. case <-done:
  75. return
  76. case <-ticker.C:
  77. }
  78. }
  79. }()
  80. }
  81. // measureMem forces GC and returns MemStats.
  82. func measureMem() runtime.MemStats {
  83. runtime.GC()
  84. runtime.GC()
  85. var m runtime.MemStats
  86. runtime.ReadMemStats(&m)
  87. return m
  88. }
  89. // TestDoppelStackGrowthMechanism demonstrates that [16384]byte on the goroutine
  90. // stack only triggers growth when the buffer is ACTUALLY WRITTEN TO (not just
  91. // declared). Go's lazy stack growth means the stack guard page must be hit.
  92. func TestDoppelStackGrowthMechanism(t *testing.T) {
  93. debug.SetGCPercent(-1)
  94. defer debug.SetGCPercent(100)
  95. const N = 2000
  96. payload := make([]byte, 1400) // typical TLS payload
  97. for i := range payload {
  98. payload[i] = byte(i)
  99. }
  100. // Phase 1: goroutines that declare [16384]byte but only touch buf[0]
  101. {
  102. runtime.GC()
  103. time.Sleep(50 * time.Millisecond)
  104. before := measureMem()
  105. done := make(chan struct{})
  106. var wg sync.WaitGroup
  107. for i := 0; i < N; i++ {
  108. wg.Add(1)
  109. go func() {
  110. defer wg.Done()
  111. var buf [maxRecordSize]byte
  112. buf[0] = 1
  113. <-done
  114. runtime.KeepAlive(&buf)
  115. }()
  116. }
  117. time.Sleep(200 * time.Millisecond)
  118. after := measureMem()
  119. stackPerG := (after.StackInuse - before.StackInuse) / N
  120. t.Logf("DECLARE-ONLY: stack/goroutine = %d bytes (stack not grown)", stackPerG)
  121. close(done)
  122. wg.Wait()
  123. }
  124. runtime.GC()
  125. time.Sleep(100 * time.Millisecond)
  126. // Phase 2: goroutines that actually copy() into the buffer (realistic)
  127. {
  128. runtime.GC()
  129. time.Sleep(50 * time.Millisecond)
  130. before := measureMem()
  131. done := make(chan struct{})
  132. var wg sync.WaitGroup
  133. for i := 0; i < N; i++ {
  134. wg.Add(1)
  135. go func() {
  136. defer wg.Done()
  137. var buf [maxRecordSize]byte
  138. copy(buf[sizeHeader:], payload)
  139. <-done
  140. runtime.KeepAlive(&buf)
  141. }()
  142. }
  143. time.Sleep(200 * time.Millisecond)
  144. after := measureMem()
  145. stackPerG := (after.StackInuse - before.StackInuse) / N
  146. t.Logf("COPY-INTO: stack/goroutine = %d bytes (stack grown to 32KB)", stackPerG)
  147. close(done)
  148. wg.Wait()
  149. }
  150. runtime.GC()
  151. time.Sleep(100 * time.Millisecond)
  152. // Phase 3: pool-based with copy (realistic alternative)
  153. {
  154. runtime.GC()
  155. time.Sleep(50 * time.Millisecond)
  156. before := measureMem()
  157. done := make(chan struct{})
  158. var wg sync.WaitGroup
  159. for i := 0; i < N; i++ {
  160. wg.Add(1)
  161. go func() {
  162. defer wg.Done()
  163. bp := bufPool.Get().(*[]byte)
  164. buf := *bp
  165. copy(buf[sizeHeader:], payload)
  166. defer bufPool.Put(bp)
  167. <-done
  168. runtime.KeepAlive(&buf)
  169. }()
  170. }
  171. time.Sleep(200 * time.Millisecond)
  172. after := measureMem()
  173. stackPerG := (after.StackInuse - before.StackInuse) / N
  174. heapPerG := (after.HeapInuse - before.HeapInuse) / N
  175. t.Logf("POOL-BASED: stack/goroutine = %d bytes, heap/goroutine = %d bytes",
  176. stackPerG, heapPerG)
  177. close(done)
  178. wg.Wait()
  179. }
  180. }
  181. // TestDoppelCombinedOverhead measures the memory of the full doppel Conn pair
  182. // (start goroutine + clock goroutine) at various concurrency levels.
  183. // Uses realistic buffer usage pattern that triggers stack growth.
  184. func TestDoppelCombinedOverhead(t *testing.T) {
  185. payload := make([]byte, 1400)
  186. for i := range payload {
  187. payload[i] = byte(i)
  188. }
  189. for _, n := range []int{500, 1000, 2000} {
  190. t.Run(fmt.Sprintf("N=%d", n), func(t *testing.T) {
  191. debug.SetGCPercent(-1)
  192. defer debug.SetGCPercent(100)
  193. // Stack-allocated approach (current code pattern)
  194. var stackTotal uint64
  195. {
  196. runtime.GC()
  197. time.Sleep(50 * time.Millisecond)
  198. before := measureMem()
  199. done := make(chan struct{})
  200. var wg sync.WaitGroup
  201. for i := 0; i < n; i++ {
  202. stackGoroutineRealistic(done, &wg, payload)
  203. }
  204. time.Sleep(200 * time.Millisecond)
  205. after := measureMem()
  206. stackMem := after.StackInuse - before.StackInuse
  207. heapMem := after.HeapInuse - before.HeapInuse
  208. stackTotal = stackMem + heapMem
  209. t.Logf("STACK: %d conns (2 goroutines each = %d goroutines)", n, n*2)
  210. t.Logf(" StackInuse: %d KB (%d bytes/conn)", stackMem/1024, stackMem/uint64(n))
  211. t.Logf(" HeapInuse: %d KB (%d bytes/conn)", heapMem/1024, heapMem/uint64(n))
  212. t.Logf(" Total: %d KB (%.1f MB)", (stackMem+heapMem)/1024,
  213. float64(stackMem+heapMem)/(1024*1024))
  214. close(done)
  215. wg.Wait()
  216. }
  217. runtime.GC()
  218. time.Sleep(100 * time.Millisecond)
  219. // Pool-based approach
  220. {
  221. runtime.GC()
  222. time.Sleep(50 * time.Millisecond)
  223. before := measureMem()
  224. done := make(chan struct{})
  225. var wg sync.WaitGroup
  226. for i := 0; i < n; i++ {
  227. poolGoroutineRealistic(done, &wg, payload)
  228. }
  229. time.Sleep(200 * time.Millisecond)
  230. after := measureMem()
  231. stackMem := after.StackInuse - before.StackInuse
  232. heapMem := after.HeapInuse - before.HeapInuse
  233. poolTotal := stackMem + heapMem
  234. t.Logf("POOL: %d conns (2 goroutines each = %d goroutines)", n, n*2)
  235. t.Logf(" StackInuse: %d KB (%d bytes/conn)", stackMem/1024, stackMem/uint64(n))
  236. t.Logf(" HeapInuse: %d KB (%d bytes/conn)", heapMem/1024, heapMem/uint64(n))
  237. t.Logf(" Total: %d KB (%.1f MB)", (stackMem+heapMem)/1024,
  238. float64(stackMem+heapMem)/(1024*1024))
  239. savings := int64(stackTotal) - int64(poolTotal)
  240. t.Logf("SAVINGS: %d KB total (%d bytes/conn), %.0f%% reduction",
  241. savings/1024, savings/int64(n),
  242. float64(savings)/float64(stackTotal)*100)
  243. close(done)
  244. wg.Wait()
  245. }
  246. })
  247. }
  248. }
  249. // BenchmarkDoppelBufStack benchmarks goroutine pair lifecycle with stack buffer.
  250. func BenchmarkDoppelBufStack(b *testing.B) {
  251. payload := make([]byte, 1400)
  252. for b.Loop() {
  253. done := make(chan struct{})
  254. var wg sync.WaitGroup
  255. stackGoroutineRealistic(done, &wg, payload)
  256. close(done)
  257. wg.Wait()
  258. }
  259. }
  260. // BenchmarkDoppelBufPool benchmarks goroutine pair lifecycle with pool buffer.
  261. func BenchmarkDoppelBufPool(b *testing.B) {
  262. payload := make([]byte, 1400)
  263. for b.Loop() {
  264. done := make(chan struct{})
  265. var wg sync.WaitGroup
  266. poolGoroutineRealistic(done, &wg, payload)
  267. close(done)
  268. wg.Wait()
  269. }
  270. }
  271. // BenchmarkDoppelThroughputStack simulates write throughput with stack buffer.
  272. func BenchmarkDoppelThroughputStack(b *testing.B) {
  273. payload := make([]byte, 1400)
  274. for i := range payload {
  275. payload[i] = byte(i)
  276. }
  277. b.SetBytes(int64(len(payload)))
  278. for b.Loop() {
  279. var buf [maxRecordSize]byte
  280. copy(buf[sizeHeader:], payload)
  281. sink = buf[sizeHeader]
  282. }
  283. }
  284. // BenchmarkDoppelThroughputPool simulates write throughput with pooled buffer.
  285. func BenchmarkDoppelThroughputPool(b *testing.B) {
  286. payload := make([]byte, 1400)
  287. for i := range payload {
  288. payload[i] = byte(i)
  289. }
  290. b.SetBytes(int64(len(payload)))
  291. for b.Loop() {
  292. bp := bufPool.Get().(*[]byte)
  293. buf := *bp
  294. copy(buf[sizeHeader:], payload)
  295. sink = buf[sizeHeader]
  296. bufPool.Put(bp)
  297. }
  298. }