Highly-opinionated (ex-bullshit-free) MTPROTO proxy for Telegram. If you use v1.0 or upgrade broke you proxy, please read the chapter Version 2
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

relay_bench_test.go 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. package relay
  2. import (
  3. "bytes"
  4. "crypto/aes"
  5. "crypto/cipher"
  6. "crypto/rand"
  7. "encoding/binary"
  8. "fmt"
  9. "io"
  10. "net"
  11. "sync"
  12. "sync/atomic"
  13. "testing"
  14. "github.com/dolonet/mtg-multi/essentials"
  15. "github.com/dolonet/mtg-multi/mtglib/internal/tls"
  16. )
  17. // mockConn wraps a net.Conn to satisfy essentials.Conn.
  18. type mockConn struct {
  19. net.Conn
  20. }
  21. func (m mockConn) CloseRead() error { return nil }
  22. func (m mockConn) CloseWrite() error { return nil }
  23. // countingReader wraps an io.Reader and counts Read calls.
  24. type countingReader struct {
  25. r io.Reader
  26. calls atomic.Int64
  27. }
  28. func (c *countingReader) Read(p []byte) (int, error) {
  29. c.calls.Add(1)
  30. return c.r.Read(p)
  31. }
  32. // countingConn wraps essentials.Conn and counts Read calls on the underlying conn.
  33. type countingConn struct {
  34. essentials.Conn
  35. readCalls atomic.Int64
  36. }
  37. func (c *countingConn) Read(p []byte) (int, error) {
  38. c.readCalls.Add(1)
  39. return c.Conn.Read(p)
  40. }
  41. // makeTLSRecord creates a single TLS application data record with the given payload.
  42. func makeTLSRecord(payload []byte) []byte {
  43. rec := make([]byte, tls.SizeHeader+len(payload))
  44. rec[0] = tls.TypeApplicationData
  45. copy(rec[1:3], tls.TLSVersion[:])
  46. binary.BigEndian.PutUint16(rec[3:5], uint16(len(payload)))
  47. copy(rec[5:], payload)
  48. return rec
  49. }
  50. // makeTLSStream creates a stream of TLS records totaling approximately totalBytes of payload.
  51. func makeTLSStream(totalBytes int, recordPayloadSize int) []byte {
  52. var buf bytes.Buffer
  53. payload := make([]byte, recordPayloadSize)
  54. rand.Read(payload)
  55. for buf.Len() < totalBytes+tls.SizeHeader {
  56. remaining := totalBytes - (buf.Len() - (buf.Len()/(recordPayloadSize+tls.SizeHeader))*tls.SizeHeader)
  57. if remaining <= 0 {
  58. break
  59. }
  60. pSize := recordPayloadSize
  61. if remaining < pSize {
  62. pSize = remaining
  63. }
  64. rec := makeTLSRecord(payload[:pSize])
  65. buf.Write(rec)
  66. }
  67. return buf.Bytes()
  68. }
  69. // makeXORCipher creates a simple AES-CTR cipher for obfuscation testing.
  70. func makeXORCipher() cipher.Stream {
  71. key := make([]byte, 32)
  72. rand.Read(key)
  73. iv := make([]byte, aes.BlockSize)
  74. rand.Read(iv)
  75. block, _ := aes.NewCipher(key)
  76. return cipher.NewCTR(block, iv)
  77. }
  78. // obfuscatedConn mirrors the obfuscation layer: XOR on read.
  79. type obfuscatedConn struct {
  80. essentials.Conn
  81. recvCipher cipher.Stream
  82. }
  83. func (c obfuscatedConn) Read(p []byte) (int, error) {
  84. n, err := c.Conn.Read(p)
  85. if err != nil {
  86. return n, err
  87. }
  88. c.recvCipher.XORKeyStream(p[:n], p[:n])
  89. return n, nil
  90. }
  91. // ============================================================
  92. // Test A: client→telegram direction (through TLS layer)
  93. // Relay buffer reads from tls.Conn.Read() → readBuf (memcpy).
  94. // Buffer size should NOT affect underlying read calls.
  95. // ============================================================
  96. func BenchmarkClientToTelegram_TLSRead(b *testing.B) {
  97. for _, bufSize := range []int{4096, 8192, 16379} {
  98. b.Run(fmt.Sprintf("buf=%d", bufSize), func(b *testing.B) {
  99. // Create TLS stream: full records with max payload
  100. totalPayload := 10 * 1024 * 1024 // 10 MB
  101. stream := makeTLSStream(totalPayload, tls.MaxRecordPayloadSize)
  102. b.ResetTimer()
  103. b.SetBytes(int64(totalPayload))
  104. for i := 0; i < b.N; i++ {
  105. reader := bytes.NewReader(stream)
  106. counter := &countingReader{r: reader}
  107. // Simulate: raw tcp → tls.New(read=true)
  108. serverConn, clientConn := net.Pipe()
  109. mConn := mockConn{clientConn}
  110. tlsConn := tls.New(mConn, true, false)
  111. // Feed data in background
  112. go func() {
  113. io.Copy(serverConn, counter)
  114. serverConn.Close()
  115. }()
  116. buf := make([]byte, bufSize)
  117. io.CopyBuffer(io.Discard, tlsConn, buf)
  118. clientConn.Close()
  119. b.ReportMetric(float64(counter.calls.Load()), "underlying_reads")
  120. }
  121. })
  122. }
  123. }
  124. // ============================================================
  125. // Test B: telegram→client direction (raw TCP, no TLS)
  126. // Relay buffer directly determines read(2) size.
  127. // Buffer size DOES affect read calls.
  128. // ============================================================
  129. func BenchmarkTelegramToClient_RawRead(b *testing.B) {
  130. for _, bufSize := range []int{4096, 8192, 16379} {
  131. b.Run(fmt.Sprintf("buf=%d", bufSize), func(b *testing.B) {
  132. totalPayload := 10 * 1024 * 1024 // 10 MB
  133. b.ResetTimer()
  134. b.SetBytes(int64(totalPayload))
  135. for i := 0; i < b.N; i++ {
  136. serverConn, clientConn := net.Pipe()
  137. mConn := mockConn{clientConn}
  138. cipherStream := makeXORCipher()
  139. obfConn := obfuscatedConn{Conn: mConn, recvCipher: cipherStream}
  140. // Wrap in counting at the raw conn level
  141. cc := &countingConn{Conn: mConn}
  142. obfConnCounted := obfuscatedConn{Conn: cc, recvCipher: cipherStream}
  143. _ = obfConn // unused, use counted version
  144. // Feed data
  145. data := make([]byte, totalPayload)
  146. rand.Read(data)
  147. go func() {
  148. // Encrypt before sending (to match obfuscation XOR)
  149. sendCipher := makeXORCipher()
  150. sendCipher.XORKeyStream(data, data)
  151. serverConn.Write(data)
  152. serverConn.Close()
  153. }()
  154. buf := make([]byte, bufSize)
  155. io.CopyBuffer(io.Discard, obfConnCounted, buf)
  156. clientConn.Close()
  157. b.ReportMetric(float64(cc.readCalls.Load()), "underlying_reads")
  158. }
  159. })
  160. }
  161. }
  162. // ============================================================
  163. // Test C: Media/file streaming (10 MB burst and realistic MTU)
  164. // ============================================================
  165. // BenchmarkMediaDownload_Burst simulates downloading media from Telegram.
  166. // telegram→client direction, data available in large chunks.
  167. func BenchmarkMediaDownload_Burst(b *testing.B) {
  168. for _, bufSize := range []int{4096, 8192, 16379} {
  169. b.Run(fmt.Sprintf("buf=%d", bufSize), func(b *testing.B) {
  170. totalPayload := 10 * 1024 * 1024
  171. data := make([]byte, totalPayload)
  172. rand.Read(data)
  173. b.ResetTimer()
  174. b.SetBytes(int64(totalPayload))
  175. for i := 0; i < b.N; i++ {
  176. serverConn, clientConn := net.Pipe()
  177. cc := &countingConn{Conn: mockConn{clientConn}}
  178. go func() {
  179. serverConn.Write(data)
  180. serverConn.Close()
  181. }()
  182. buf := make([]byte, bufSize)
  183. io.CopyBuffer(io.Discard, cc, buf)
  184. clientConn.Close()
  185. b.ReportMetric(float64(cc.readCalls.Load()), "underlying_reads")
  186. }
  187. })
  188. }
  189. }
  190. // BenchmarkMediaDownload_MTU simulates realistic TCP behavior where data arrives
  191. // in MTU-sized chunks (~1460 bytes per segment).
  192. func BenchmarkMediaDownload_MTU(b *testing.B) {
  193. for _, bufSize := range []int{4096, 8192, 16379} {
  194. b.Run(fmt.Sprintf("buf=%d", bufSize), func(b *testing.B) {
  195. totalPayload := 10 * 1024 * 1024
  196. mtuSize := 1460
  197. b.ResetTimer()
  198. b.SetBytes(int64(totalPayload))
  199. for i := 0; i < b.N; i++ {
  200. serverConn, clientConn := net.Pipe()
  201. cc := &countingConn{Conn: mockConn{clientConn}}
  202. go func() {
  203. data := make([]byte, mtuSize)
  204. rand.Read(data)
  205. written := 0
  206. for written < totalPayload {
  207. toWrite := mtuSize
  208. if totalPayload-written < toWrite {
  209. toWrite = totalPayload - written
  210. }
  211. serverConn.Write(data[:toWrite])
  212. written += toWrite
  213. }
  214. serverConn.Close()
  215. }()
  216. buf := make([]byte, bufSize)
  217. io.CopyBuffer(io.Discard, cc, buf)
  218. clientConn.Close()
  219. b.ReportMetric(float64(cc.readCalls.Load()), "underlying_reads")
  220. }
  221. })
  222. }
  223. }
  224. // BenchmarkMediaUpload_TLS simulates uploading media through the TLS layer
  225. // (client→telegram direction). Buffer size should not matter.
  226. func BenchmarkMediaUpload_TLS(b *testing.B) {
  227. for _, bufSize := range []int{4096, 8192, 16379} {
  228. b.Run(fmt.Sprintf("buf=%d", bufSize), func(b *testing.B) {
  229. totalPayload := 10 * 1024 * 1024
  230. stream := makeTLSStream(totalPayload, tls.MaxRecordPayloadSize)
  231. b.ResetTimer()
  232. b.SetBytes(int64(totalPayload))
  233. for i := 0; i < b.N; i++ {
  234. reader := bytes.NewReader(stream)
  235. counter := &countingReader{r: reader}
  236. serverConn, clientConn := net.Pipe()
  237. mConn := mockConn{clientConn}
  238. tlsConn := tls.New(mConn, true, false)
  239. go func() {
  240. io.Copy(serverConn, counter)
  241. serverConn.Close()
  242. }()
  243. buf := make([]byte, bufSize)
  244. io.CopyBuffer(io.Discard, tlsConn, buf)
  245. clientConn.Close()
  246. b.ReportMetric(float64(counter.calls.Load()), "underlying_reads")
  247. }
  248. })
  249. }
  250. }
  251. // ============================================================
  252. // Test D: Small messages (chat traffic)
  253. // ============================================================
  254. func BenchmarkSmallMessages_TelegramToClient(b *testing.B) {
  255. for _, bufSize := range []int{4096, 8192, 16379} {
  256. b.Run(fmt.Sprintf("buf=%d", bufSize), func(b *testing.B) {
  257. // 10000 messages of 200 bytes each = 2 MB
  258. msgSize := 200
  259. numMsgs := 10000
  260. totalPayload := msgSize * numMsgs
  261. b.ResetTimer()
  262. b.SetBytes(int64(totalPayload))
  263. for i := 0; i < b.N; i++ {
  264. serverConn, clientConn := net.Pipe()
  265. cc := &countingConn{Conn: mockConn{clientConn}}
  266. go func() {
  267. msg := make([]byte, msgSize)
  268. rand.Read(msg)
  269. for j := 0; j < numMsgs; j++ {
  270. serverConn.Write(msg)
  271. }
  272. serverConn.Close()
  273. }()
  274. buf := make([]byte, bufSize)
  275. io.CopyBuffer(io.Discard, cc, buf)
  276. clientConn.Close()
  277. b.ReportMetric(float64(cc.readCalls.Load()), "underlying_reads")
  278. }
  279. })
  280. }
  281. }
  282. func BenchmarkSmallMessages_ClientToTelegram(b *testing.B) {
  283. for _, bufSize := range []int{4096, 8192, 16379} {
  284. b.Run(fmt.Sprintf("buf=%d", bufSize), func(b *testing.B) {
  285. msgSize := 200
  286. numMsgs := 10000
  287. totalPayload := msgSize * numMsgs
  288. // Wrap small messages in TLS records
  289. var streamBuf bytes.Buffer
  290. msg := make([]byte, msgSize)
  291. rand.Read(msg)
  292. for j := 0; j < numMsgs; j++ {
  293. streamBuf.Write(makeTLSRecord(msg))
  294. }
  295. stream := streamBuf.Bytes()
  296. b.ResetTimer()
  297. b.SetBytes(int64(totalPayload))
  298. for i := 0; i < b.N; i++ {
  299. reader := bytes.NewReader(stream)
  300. counter := &countingReader{r: reader}
  301. serverConn, clientConn := net.Pipe()
  302. mConn := mockConn{clientConn}
  303. tlsConn := tls.New(mConn, true, false)
  304. go func() {
  305. io.Copy(serverConn, counter)
  306. serverConn.Close()
  307. }()
  308. buf := make([]byte, bufSize)
  309. io.CopyBuffer(io.Discard, tlsConn, buf)
  310. clientConn.Close()
  311. b.ReportMetric(float64(counter.calls.Load()), "underlying_reads")
  312. }
  313. })
  314. }
  315. }
  316. // ============================================================
  317. // CPU overhead benchmarks: stack vs pool allocation
  318. // ============================================================
  319. // BenchmarkCPU_StackVsPool_Relay measures the CPU overhead of using sync.Pool
  320. // vs stack-allocated buffers in a realistic relay scenario.
  321. // This is the core question: does Pool.Get/Put add measurable CPU cost?
  322. func BenchmarkCPU_StackVsPool_Relay(b *testing.B) {
  323. totalPayload := 10 * 1024 * 1024 // 10 MB
  324. b.Run("stack_16KB", func(b *testing.B) {
  325. b.SetBytes(int64(totalPayload))
  326. for i := 0; i < b.N; i++ {
  327. serverConn, clientConn := net.Pipe()
  328. go func() {
  329. data := make([]byte, totalPayload)
  330. serverConn.Write(data)
  331. serverConn.Close()
  332. }()
  333. var buf [tls.MaxRecordPayloadSize]byte
  334. io.CopyBuffer(io.Discard, clientConn, buf[:])
  335. clientConn.Close()
  336. }
  337. })
  338. pool16 := &sync.Pool{New: func() any { b := make([]byte, tls.MaxRecordPayloadSize); return &b }}
  339. b.Run("pool_16KB", func(b *testing.B) {
  340. b.SetBytes(int64(totalPayload))
  341. for i := 0; i < b.N; i++ {
  342. serverConn, clientConn := net.Pipe()
  343. go func() {
  344. data := make([]byte, totalPayload)
  345. serverConn.Write(data)
  346. serverConn.Close()
  347. }()
  348. bp := pool16.Get().(*[]byte)
  349. io.CopyBuffer(io.Discard, clientConn, *bp)
  350. pool16.Put(bp)
  351. clientConn.Close()
  352. }
  353. })
  354. pool4 := &sync.Pool{New: func() any { b := make([]byte, 4096); return &b }}
  355. b.Run("pool_4KB", func(b *testing.B) {
  356. b.SetBytes(int64(totalPayload))
  357. for i := 0; i < b.N; i++ {
  358. serverConn, clientConn := net.Pipe()
  359. go func() {
  360. data := make([]byte, totalPayload)
  361. serverConn.Write(data)
  362. serverConn.Close()
  363. }()
  364. bp := pool4.Get().(*[]byte)
  365. io.CopyBuffer(io.Discard, clientConn, *bp)
  366. pool4.Put(bp)
  367. clientConn.Close()
  368. }
  369. })
  370. }
  371. // BenchmarkCPU_PoolGetPut measures the raw overhead of sync.Pool.Get/Put
  372. // operations (without any I/O), to isolate pool machinery cost.
  373. func BenchmarkCPU_PoolGetPut(b *testing.B) {
  374. pool := &sync.Pool{New: func() any { buf := make([]byte, tls.MaxRecordPayloadSize); return &buf }}
  375. // Warm up the pool
  376. items := make([]*[]byte, 100)
  377. for i := range items {
  378. items[i] = pool.Get().(*[]byte)
  379. }
  380. for _, item := range items {
  381. pool.Put(item)
  382. }
  383. b.ResetTimer()
  384. for i := 0; i < b.N; i++ {
  385. bp := pool.Get().(*[]byte)
  386. pool.Put(bp)
  387. }
  388. }
  389. // BenchmarkCPU_StackAlloc measures the cost of stack-allocating the buffer.
  390. func BenchmarkCPU_StackAlloc(b *testing.B) {
  391. for i := 0; i < b.N; i++ {
  392. var buf [tls.MaxRecordPayloadSize]byte
  393. sinkByte = buf[0]
  394. sinkByte = buf[len(buf)-1]
  395. }
  396. }
  397. // BenchmarkCPU_TLSRelay_StackVsPool measures CPU for the full TLS path
  398. // (client→telegram direction) with stack vs pool buffers.
  399. func BenchmarkCPU_TLSRelay_StackVsPool(b *testing.B) {
  400. totalPayload := 10 * 1024 * 1024
  401. stream := makeTLSStream(totalPayload, tls.MaxRecordPayloadSize)
  402. b.Run("stack_16KB", func(b *testing.B) {
  403. b.SetBytes(int64(totalPayload))
  404. for i := 0; i < b.N; i++ {
  405. reader := bytes.NewReader(stream)
  406. serverConn, clientConn := net.Pipe()
  407. tlsConn := tls.New(mockConn{clientConn}, true, false)
  408. go func() {
  409. io.Copy(serverConn, reader)
  410. serverConn.Close()
  411. }()
  412. var buf [tls.MaxRecordPayloadSize]byte
  413. io.CopyBuffer(io.Discard, tlsConn, buf[:])
  414. clientConn.Close()
  415. }
  416. })
  417. pool16 := &sync.Pool{New: func() any { b := make([]byte, tls.MaxRecordPayloadSize); return &b }}
  418. b.Run("pool_16KB", func(b *testing.B) {
  419. b.SetBytes(int64(totalPayload))
  420. for i := 0; i < b.N; i++ {
  421. reader := bytes.NewReader(stream)
  422. serverConn, clientConn := net.Pipe()
  423. tlsConn := tls.New(mockConn{clientConn}, true, false)
  424. go func() {
  425. io.Copy(serverConn, reader)
  426. serverConn.Close()
  427. }()
  428. bp := pool16.Get().(*[]byte)
  429. io.CopyBuffer(io.Discard, tlsConn, *bp)
  430. pool16.Put(bp)
  431. clientConn.Close()
  432. }
  433. })
  434. pool4 := &sync.Pool{New: func() any { b := make([]byte, 4096); return &b }}
  435. b.Run("pool_4KB", func(b *testing.B) {
  436. b.SetBytes(int64(totalPayload))
  437. for i := 0; i < b.N; i++ {
  438. reader := bytes.NewReader(stream)
  439. serverConn, clientConn := net.Pipe()
  440. tlsConn := tls.New(mockConn{clientConn}, true, false)
  441. go func() {
  442. io.Copy(serverConn, reader)
  443. serverConn.Close()
  444. }()
  445. bp := pool4.Get().(*[]byte)
  446. io.CopyBuffer(io.Discard, tlsConn, *bp)
  447. pool4.Put(bp)
  448. clientConn.Close()
  449. }
  450. })
  451. }
  452. // ============================================================
  453. // Concurrent memory measurement helpers for stack_bench_test.go
  454. // ============================================================
  455. var sinkByte byte // prevent compiler optimization
  456. // blockingRead simulates a long-lived relay pump with stack buffer.
  457. func blockingReadStack(wg *sync.WaitGroup, ready chan struct{}, stop chan struct{}) {
  458. defer wg.Done()
  459. var buf [tls.MaxRecordPayloadSize]byte
  460. sinkByte = buf[0] // ensure buf is used
  461. ready <- struct{}{}
  462. <-stop
  463. sinkByte = buf[len(buf)-1]
  464. }
  465. // blockingReadPool simulates relay pump with pooled buffer.
  466. func blockingReadPool(wg *sync.WaitGroup, ready chan struct{}, stop chan struct{}, pool *sync.Pool) {
  467. defer wg.Done()
  468. bp := pool.Get().(*[]byte)
  469. defer pool.Put(bp)
  470. sinkByte = (*bp)[0]
  471. ready <- struct{}{}
  472. <-stop
  473. sinkByte = (*bp)[len(*bp)-1]
  474. }