From d1378d10c3bc20599475cd505ce3b970ccc4160c Mon Sep 17 00:00:00 2001 From: byte-msft <163155971+byte-msft@users.noreply.github.com> Date: Wed, 5 Feb 2025 18:13:19 +0000 Subject: [PATCH] fix: Clean up interfaceLockMap entries on endpoint deletion (#1249) # Description The packetParser was creating entries in interfaceLockMap for each new interface but failing to remove them when interfaces were deleted. In environments with high pod counts and frequent churn, this caused a memory leak as the map grew indefinitely. ## Related Issue [Potential memory leak in packetparser's interfaceLockMap #1236](https://github.com/microsoft/retina/issues/1236) ## Checklist - [X] I have read the [contributing documentation](https://retina.sh/docs/contributing). - [X] I signed and signed-off the commits (`git commit -S -s ...`). See [this documentation](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification) on signing commits. - [X] I have correctly attributed the author(s) of the code. - [X] I have tested the changes locally. - [X] I have followed the project's style guidelines. - [ ] I have updated the documentation, if necessary. - [X] I have added tests, if applicable. ## Screenshots (if applicable) or Testing Completed Please add any relevant screenshots or GIFs to showcase the changes made. ## Additional Notes ### Solution - Added cleanup of interfaceLockMap entries in the EndpointDeleted case - Improved mutex handling logic to prevent resource leaks - Updated test cases to verify proper cleanup of both tcMap and interfaceLockMap ### Testing - Added comprehensive test coverage for interface deletion scenario - Verified cleanup of both maps in test cases - Tested with high pod churn scenarios ### Impact This fix prevents memory leaks in environments with frequent pod creation/deletion, improving the overall stability and resource usage of the system. --- Please refer to the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more information on how to contribute to this project. --------- Signed-off-by: Yerlan Baiturinov --- pkg/plugin/packetparser/packetparser_linux.go | 2 ++ .../packetparser/packetparser_linux_test.go | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pkg/plugin/packetparser/packetparser_linux.go b/pkg/plugin/packetparser/packetparser_linux.go index fb54c8bb5e..c11ef9cef9 100644 --- a/pkg/plugin/packetparser/packetparser_linux.go +++ b/pkg/plugin/packetparser/packetparser_linux.go @@ -400,6 +400,8 @@ func (p *packetParser) endpointWatcherCallbackFn(obj interface{}) { // Delete from map. p.tcMap.Delete(ifaceKey) } + // Delete from lock map. + p.interfaceLockMap.Delete(ifaceKey) default: // Unknown. p.l.Debug("Unknown event", zap.String("type", event.Type.String())) diff --git a/pkg/plugin/packetparser/packetparser_linux_test.go b/pkg/plugin/packetparser/packetparser_linux_test.go index aa27cfa356..50903852a1 100644 --- a/pkg/plugin/packetparser/packetparser_linux_test.go +++ b/pkg/plugin/packetparser/packetparser_linux_test.go @@ -162,18 +162,24 @@ func TestEndpointWatcherCallbackFn_EndpointDeleted(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + // Initialize packetParser with both maps. p := &packetParser{ cfg: cfgPodLevelEnabled, l: log.Logger().Named("test"), interfaceLockMap: &sync.Map{}, + tcMap: &sync.Map{}, } - p.tcMap = &sync.Map{} + + // Create test interface attributes. linkAttr := netlink.LinkAttrs{ Name: "test", HardwareAddr: []byte("test"), NetNsID: 1, } key := ifaceToKey(linkAttr) + + // Pre-populate both maps to simulate existing interface + p.interfaceLockMap.Store(key, &sync.Mutex{}) p.tcMap.Store(key, &tcValue{nil, &tc.Object{}}) // Create EndpointDeleted event. @@ -182,10 +188,16 @@ func TestEndpointWatcherCallbackFn_EndpointDeleted(t *testing.T) { Obj: linkAttr, } + // Execute the callback. p.endpointWatcherCallbackFn(e) - _, ok := p.tcMap.Load(key) - assert.False(t, ok) + // Verify both maps are cleaned up. + _, tcMapExists := p.tcMap.Load(key) + _, lockMapExists := p.interfaceLockMap.Load(key) + + // Assert both maps are cleaned up + assert.False(t, tcMapExists, "tcMap entry should be deleted") + assert.False(t, lockMapExists, "interfaceLockMap entry should be deleted") } func TestCreateQdiscAndAttach(t *testing.T) {