@@ -12,6 +12,7 @@ import (
12
12
"context"
13
13
"encoding/binary"
14
14
"fmt"
15
+ "io"
15
16
"net"
16
17
"os"
17
18
"os/exec"
@@ -47,13 +48,11 @@ type Aggregator struct {
47
48
48
49
// listen to events from different sources
49
50
k8sChan <- chan interface {}
50
- ebpfChan chan interface {}
51
+ ebpfChan <- chan interface {}
51
52
ebpfProcChan <- chan interface {}
52
53
ebpfTcpChan <- chan interface {}
53
54
tlsAttachSignalChan chan uint32
54
55
55
- ec * ebpf.EbpfCollector
56
-
57
56
// store the service map
58
57
clusterInfo * ClusterInfo
59
58
@@ -118,18 +117,7 @@ type ClusterInfo struct {
118
117
119
118
// Pid -> SocketMap
120
119
// pid -> fd -> {saddr, sport, daddr, dport}
121
-
122
- // shard pidToSocketMap by pid to reduce lock contention
123
- mu0 sync.RWMutex
124
- mu1 sync.RWMutex
125
- mu2 sync.RWMutex
126
- mu3 sync.RWMutex
127
- mu4 sync.RWMutex
128
- PidToSocketMap0 map [uint32 ]* SocketMap `json:"pidToSocketMap0"` // pid ending with 0-1
129
- PidToSocketMap1 map [uint32 ]* SocketMap `json:"pidToSocketMap1"` // pid ending with 2-3
130
- PidToSocketMap2 map [uint32 ]* SocketMap `json:"pidToSocketMap2"` // pid ending with 4-5
131
- PidToSocketMap3 map [uint32 ]* SocketMap `json:"pidToSocketMap3"` // pid ending with 6-7
132
- PidToSocketMap4 map [uint32 ]* SocketMap `json:"pidToSocketMap4"` // pid ending with 8-9
120
+ SocketMaps []* SocketMap // index symbolizes pid
133
121
}
134
122
135
123
// If we have information from the container runtimes
@@ -154,10 +142,7 @@ var (
154
142
purgeTime = 10 * time .Minute
155
143
)
156
144
157
- var usePgDs bool = false
158
- var useBackendDs bool = true // default to true
159
145
var reverseDnsCache * cache.Cache
160
-
161
146
var re * regexp.Regexp
162
147
163
148
func init () {
@@ -179,13 +164,24 @@ func NewAggregator(parentCtx context.Context, k8sChan <-chan interface{},
179
164
clusterInfo := & ClusterInfo {
180
165
PodIPToPodUid : map [string ]types.UID {},
181
166
ServiceIPToServiceUid : map [string ]types.UID {},
182
- PidToSocketMap0 : make (map [uint32 ]* SocketMap ),
183
- PidToSocketMap1 : make (map [uint32 ]* SocketMap ),
184
- PidToSocketMap2 : make (map [uint32 ]* SocketMap ),
185
- PidToSocketMap3 : make (map [uint32 ]* SocketMap ),
186
- PidToSocketMap4 : make (map [uint32 ]* SocketMap ),
187
167
}
188
168
169
+ maxPid , err := getPidMax ()
170
+ if err != nil {
171
+ log .Logger .Fatal ().Err (err ).Msg ("error getting max pid" )
172
+ }
173
+ sockMaps := make ([]* SocketMap , maxPid + 1 ) // index=pid
174
+
175
+ // initialize sockMaps
176
+ for i := range sockMaps {
177
+ sockMaps [i ] = & SocketMap {
178
+ M : nil , // initialized on demand later
179
+ mu : sync.RWMutex {},
180
+ }
181
+ }
182
+
183
+ clusterInfo .SocketMaps = sockMaps
184
+
189
185
a := & Aggregator {
190
186
ctx : ctx ,
191
187
k8sChan : k8sChan ,
@@ -289,6 +285,7 @@ func (a *Aggregator) Run() {
289
285
}()
290
286
go a .processk8s ()
291
287
288
+ // TODO: determine the number of workers with benchmarking
292
289
cpuCount := runtime .NumCPU ()
293
290
numWorker := 5 * cpuCount
294
291
if numWorker < 50 {
@@ -300,7 +297,6 @@ func (a *Aggregator) Run() {
300
297
go a .processEbpfTcp (a .ctx )
301
298
}
302
299
303
- // TODO: pod number may be ideal
304
300
for i := 0 ; i < 2 * cpuCount ; i ++ {
305
301
go a .processHttp2Frames ()
306
302
go a .processEbpfProc (a .ctx )
@@ -472,23 +468,15 @@ func (a *Aggregator) processTcpConnect(d *tcp_state.TcpConnectEvent) {
472
468
var sockMap * SocketMap
473
469
var ok bool
474
470
475
- mu , pidToSocketMap := a .getShard (d .Pid )
476
- mu .Lock ()
477
- sockMap , ok = pidToSocketMap [d .Pid ]
478
- if ! ok {
479
- sockMap = & SocketMap {
480
- M : make (map [uint64 ]* SocketLine ),
481
- mu : sync.RWMutex {},
482
- }
483
- pidToSocketMap [d .Pid ] = sockMap
484
- }
485
- mu .Unlock () // unlock for writing
486
-
471
+ sockMap = a .clusterInfo .SocketMaps [d .Pid ]
487
472
var skLine * SocketLine
488
473
489
474
sockMap .mu .Lock () // lock for reading
490
- skLine , ok = sockMap .M [d .Fd ]
475
+ if sockMap .M == nil {
476
+ sockMap .M = make (map [uint64 ]* SocketLine )
477
+ }
491
478
479
+ skLine , ok = sockMap .M [d .Fd ]
492
480
if ! ok {
493
481
skLine = NewSocketLine (d .Pid , d .Fd )
494
482
sockMap .M [d .Fd ] = skLine
@@ -512,24 +500,14 @@ func (a *Aggregator) processTcpConnect(d *tcp_state.TcpConnectEvent) {
512
500
var sockMap * SocketMap
513
501
var ok bool
514
502
515
- mu , pidToSocketMap := a .getShard (d .Pid )
516
- mu .Lock ()
517
- sockMap , ok = pidToSocketMap [d .Pid ]
518
- if ! ok {
519
- sockMap = & SocketMap {
520
- M : make (map [uint64 ]* SocketLine ),
521
- mu : sync.RWMutex {},
522
- }
523
-
524
- pidToSocketMap [d .Pid ] = sockMap
525
- mu .Unlock () // unlock for writing
526
- return
527
- }
528
- mu .Unlock ()
503
+ sockMap = a .clusterInfo .SocketMaps [d .Pid ]
529
504
530
505
var skLine * SocketLine
531
506
532
507
sockMap .mu .Lock () // lock for reading
508
+ if sockMap .M == nil {
509
+ sockMap .M = make (map [uint64 ]* SocketLine )
510
+ }
533
511
skLine , ok = sockMap .M [d .Fd ]
534
512
if ! ok {
535
513
sockMap .mu .Unlock () // unlock for reading
@@ -1068,17 +1046,9 @@ func (a *Aggregator) fetchSkLine(sockMap *SocketMap, pid uint32, fd uint64) *Soc
1068
1046
// add it to the socket map
1069
1047
func (a * Aggregator ) getAlreadyExistingSockets (pid uint32 ) {
1070
1048
// no need for locking because this is called firstmost and no other goroutine is running
1071
- _ , pidToSocketMap := a .getShard (pid )
1072
- sockMap , ok := pidToSocketMap [pid ]
1073
- if ! ok {
1074
- sockMap = & SocketMap {
1075
- M : make (map [uint64 ]* SocketLine ),
1076
- mu : sync.RWMutex {},
1077
- }
1078
- pidToSocketMap [pid ] = sockMap
1079
- }
1080
1049
1081
1050
socks := map [string ]sock {}
1051
+ sockMap := a .fetchSocketMap (pid )
1082
1052
1083
1053
// Get the sockets for the process.
1084
1054
var err error
@@ -1140,7 +1110,12 @@ func (a *Aggregator) getAlreadyExistingSockets(pid uint32) {
1140
1110
skLine := NewSocketLine (pid , fd .Fd )
1141
1111
skLine .AddValue (0 , sockInfo )
1142
1112
1113
+ sockMap .mu .Lock ()
1114
+ if sockMap .M == nil {
1115
+ sockMap .M = make (map [uint64 ]* SocketLine )
1116
+ }
1143
1117
sockMap .M [fd .Fd ] = skLine
1118
+ sockMap .mu .Unlock ()
1144
1119
}
1145
1120
}
1146
1121
@@ -1178,56 +1153,20 @@ func (a *Aggregator) fetchSkInfo(ctx context.Context, skLine *SocketLine, d *l7_
1178
1153
return skInfo
1179
1154
}
1180
1155
1181
- func (a * Aggregator ) getShard (pid uint32 ) (* sync.RWMutex , map [uint32 ]* SocketMap ) {
1182
- lastDigit := pid % 10
1183
- var mu * sync.RWMutex
1184
- var pidToSocketMap map [uint32 ]* SocketMap
1185
- switch lastDigit {
1186
- case 0 , 1 :
1187
- mu = & a .clusterInfo .mu0
1188
- pidToSocketMap = a .clusterInfo .PidToSocketMap0
1189
- case 2 , 3 :
1190
- mu = & a .clusterInfo .mu1
1191
- pidToSocketMap = a .clusterInfo .PidToSocketMap1
1192
- case 4 , 5 :
1193
- mu = & a .clusterInfo .mu2
1194
- pidToSocketMap = a .clusterInfo .PidToSocketMap2
1195
- case 6 , 7 :
1196
- mu = & a .clusterInfo .mu3
1197
- pidToSocketMap = a .clusterInfo .PidToSocketMap3
1198
- case 8 , 9 :
1199
- mu = & a .clusterInfo .mu4
1200
- pidToSocketMap = a .clusterInfo .PidToSocketMap4
1201
- }
1202
-
1203
- return mu , pidToSocketMap
1204
- }
1205
-
1206
1156
func (a * Aggregator ) removeFromClusterInfo (pid uint32 ) {
1207
- mu , pidToSocketMap := a .getShard ( pid )
1208
- mu .Lock ()
1209
- delete ( pidToSocketMap , pid )
1210
- mu .Unlock ()
1157
+ sockMap := a .clusterInfo . SocketMaps [ pid ]
1158
+ sockMap . mu .Lock ()
1159
+ sockMap . M = nil
1160
+ sockMap . mu .Unlock ()
1211
1161
}
1212
1162
1213
1163
func (a * Aggregator ) fetchSocketMap (pid uint32 ) * SocketMap {
1214
- var sockMap * SocketMap
1215
- var ok bool
1216
-
1217
- mu , pidToSocketMap := a .getShard (pid ) // create shard if not exists
1218
- mu .Lock () // lock for reading
1219
- sockMap , ok = pidToSocketMap [pid ]
1220
- if ! ok {
1221
- // initialize socket map
1222
- sockMap = & SocketMap {
1223
- M : make (map [uint64 ]* SocketLine ),
1224
- mu : sync.RWMutex {},
1225
- }
1226
- pidToSocketMap [pid ] = sockMap
1227
-
1228
- go a .signalTlsAttachment (pid )
1164
+ sockMap := a .clusterInfo .SocketMaps [pid ]
1165
+ sockMap .mu .Lock ()
1166
+ if sockMap .M == nil {
1167
+ sockMap .M = make (map [uint64 ]* SocketLine )
1229
1168
}
1230
- mu .Unlock () // unlock for writing
1169
+ sockMap . mu .Unlock ()
1231
1170
1232
1171
return sockMap
1233
1172
}
@@ -1408,44 +1347,36 @@ func (a *Aggregator) clearSocketLines(ctx context.Context) {
1408
1347
}()
1409
1348
1410
1349
for range ticker .C {
1411
- a .clusterInfo .mu0 .RLock ()
1412
- for _ , socketMap := range a .clusterInfo .PidToSocketMap0 {
1413
- for _ , socketLine := range socketMap .M {
1414
- skLineCh <- socketLine
1415
- }
1416
- }
1417
- a .clusterInfo .mu0 .RUnlock ()
1418
-
1419
- a .clusterInfo .mu1 .RLock ()
1420
- for _ , socketMap := range a .clusterInfo .PidToSocketMap1 {
1421
- for _ , socketLine := range socketMap .M {
1422
- skLineCh <- socketLine
1423
- }
1424
- }
1425
- a .clusterInfo .mu1 .RUnlock ()
1426
-
1427
- a .clusterInfo .mu2 .RLock ()
1428
- for _ , socketMap := range a .clusterInfo .PidToSocketMap2 {
1429
- for _ , socketLine := range socketMap .M {
1430
- skLineCh <- socketLine
1350
+ for _ , sockMap := range a .clusterInfo .SocketMaps {
1351
+ sockMap .mu .Lock ()
1352
+ if sockMap .M != nil {
1353
+ for _ , skLine := range sockMap .M {
1354
+ skLineCh <- skLine
1355
+ }
1431
1356
}
1357
+ sockMap .mu .Unlock ()
1432
1358
}
1433
- a .clusterInfo .mu2 .RUnlock ()
1359
+ }
1360
+ }
1434
1361
1435
- a .clusterInfo .mu3 .RLock ()
1436
- for _ , socketMap := range a .clusterInfo .PidToSocketMap3 {
1437
- for _ , socketLine := range socketMap .M {
1438
- skLineCh <- socketLine
1439
- }
1440
- }
1441
- a .clusterInfo .mu3 .RUnlock ()
1362
+ func getPidMax () (int , error ) {
1363
+ // Read the contents of the file
1364
+ f , err := os .Open ("/proc/sys/kernel/pid_max" )
1365
+ if err != nil {
1366
+ fmt .Println ("Error opening file:" , err )
1367
+ return 0 , err
1368
+ }
1369
+ content , err := io .ReadAll (f )
1370
+ if err != nil {
1371
+ fmt .Println ("Error reading file:" , err )
1372
+ return 0 , err
1373
+ }
1442
1374
1443
- a .clusterInfo .mu4 .RLock ()
1444
- for _ , socketMap := range a .clusterInfo .PidToSocketMap4 {
1445
- for _ , socketLine := range socketMap .M {
1446
- skLineCh <- socketLine
1447
- }
1448
- }
1449
- a .clusterInfo .mu4 .RUnlock ()
1375
+ // Convert the content to an integer
1376
+ pidMax , err := strconv .Atoi (string (content [:len (content )- 1 ])) // trim newline
1377
+ if err != nil {
1378
+ fmt .Println ("Error converting to integer:" , err )
1379
+ return 0 , err
1450
1380
}
1381
+ return pidMax , nil
1451
1382
}
0 commit comments