From 6a15bacc1867110bf1d82a6a7f60d3ce8716272a Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Mon, 19 Aug 2024 14:40:01 +0200 Subject: [PATCH] WIP - Moving to metadata file instead of meaningful filepath --- pkg/cmd/dump.go | 4 +- pkg/dump/ingestor.go | 23 +++++ pkg/dump/result.go | 72 ++++++---------- pkg/dump/result_test.go | 107 ++++-------------------- pkg/dump/writer/file_writer.go | 4 + pkg/dump/writer/mockwriter/writer.go | 75 +++++++++++++++-- pkg/dump/writer/tar_writer.go | 4 + pkg/dump/writer/writer.go | 1 + pkg/kubehound/core/core_dump.go | 5 ++ pkg/kubehound/core/core_ingest_local.go | 11 ++- 10 files changed, 159 insertions(+), 147 deletions(-) diff --git a/pkg/cmd/dump.go b/pkg/cmd/dump.go index dd9367936..a42ef7459 100644 --- a/pkg/cmd/dump.go +++ b/pkg/cmd/dump.go @@ -61,6 +61,6 @@ func InitRemoteIngestCmd(cmd *cobra.Command, standalone bool) { func InitLocalIngestCmd(cmd *cobra.Command) { cmd.PersistentFlags().String("cluster", "", "Cluster name to ingest (e.g.: my-cluster-1)") - viper.BindPFlag(config.IngestorClusterName, cmd.Flags().Lookup("cluster")) //nolint: errcheck - cmd.MarkFlagRequired("cluster") //nolint: errcheck + viper.BindPFlag(config.IngestorClusterName, cmd.PersistentFlags().Lookup("cluster")) //nolint: errcheck + cmd.MarkFlagRequired("cluster") //nolint: errcheck } diff --git a/pkg/dump/ingestor.go b/pkg/dump/ingestor.go index 6765c9086..688748b8f 100644 --- a/pkg/dump/ingestor.go +++ b/pkg/dump/ingestor.go @@ -2,7 +2,10 @@ package dump import ( "context" + "encoding/json" "fmt" + "os" + "path/filepath" "github.com/DataDog/KubeHound/pkg/collector" "github.com/DataDog/KubeHound/pkg/config" @@ -50,10 +53,30 @@ func getClusterName(ctx context.Context, collector collector.CollectorClient) (s return cluster.Name, nil } +func (d *DumpIngestor) Metadata() (Metadata, error) { + path := filepath.Join(d.writer.OutputPath(), "metadata.json") + data, err := os.ReadFile(path) + if err != nil { + return Metadata{}, err + } + + md := Metadata{} + err = json.Unmarshal(data, &md) + if err != nil { + return Metadata{}, err + } + + return md, nil +} + func (d *DumpIngestor) OutputPath() string { return d.writer.OutputPath() } +func (d *DumpIngestor) SaveMetadata(ctx context.Context) error { + return d.writer.WriteMetadata(ctx) +} + func (d *DumpIngestor) DumpK8sObjects(ctx context.Context) error { spanDump, ctx := tracer.StartSpanFromContext(ctx, span.CollectorDump, tracer.Measured()) var err error diff --git a/pkg/dump/result.go b/pkg/dump/result.go index 3c0955983..fae9ed990 100644 --- a/pkg/dump/result.go +++ b/pkg/dump/result.go @@ -4,13 +4,19 @@ import ( "fmt" "path" "regexp" + "time" ) type DumpResult struct { - clusterName string - RunID string - isDir bool - extension string + isDir bool + extension string + Metadata Metadata +} + +type Metadata struct { + RunID string `json:"run_id"` + ClusterName string `json:"cluster"` + DumpTime time.Time `json:"dump_time"` } const ( @@ -18,17 +24,17 @@ const ( DumpResultRunIDRegex = `([a-z0-9]{26})` DumpResultExtensionRegex = `\.?([a-z0-9\.]+)?` DumpResultPrefix = "kubehound_" - DumpResultFilenameRegex = DumpResultPrefix + DumpResultClusterNameRegex + "_" + DumpResultRunIDRegex + DumpResultExtensionRegex - DumpResultPathRegex = DumpResultClusterNameRegex + "/" + DumpResultFilenameRegex DumpResultTarWriterExtension = "tar.gz" ) func NewDumpResult(clusterName, runID string, isCompressed bool) (*DumpResult, error) { dumpResult := &DumpResult{ - clusterName: clusterName, - RunID: runID, - isDir: true, + Metadata: Metadata{ + ClusterName: clusterName, + RunID: runID, + }, + isDir: true, } if isCompressed { dumpResult.Compressed() @@ -44,18 +50,18 @@ func NewDumpResult(clusterName, runID string, isCompressed bool) (*DumpResult, e func (i *DumpResult) Validate() error { re := regexp.MustCompile(DumpResultClusterNameRegex) - if !re.MatchString(i.clusterName) { - return fmt.Errorf("Invalid clustername: %q", i.clusterName) + if !re.MatchString(i.Metadata.ClusterName) { + return fmt.Errorf("Invalid clustername: %q", i.Metadata.ClusterName) } - matches := re.FindStringSubmatch(i.clusterName) - if len(matches) == 2 && matches[1] != i.clusterName { - return fmt.Errorf("Invalid clustername: %q", i.clusterName) + matches := re.FindStringSubmatch(i.Metadata.ClusterName) + if len(matches) == 2 && matches[1] != i.Metadata.ClusterName { + return fmt.Errorf("Invalid clustername: %q", i.Metadata.ClusterName) } re = regexp.MustCompile(DumpResultRunIDRegex) - if !re.MatchString(i.RunID) { - return fmt.Errorf("Invalid runID: %q", i.RunID) + if !re.MatchString(i.Metadata.RunID) { + return fmt.Errorf("Invalid runID: %q", i.Metadata.RunID) } return nil @@ -70,44 +76,14 @@ func (i *DumpResult) Compressed() { func (i *DumpResult) GetFullPath() string { filename := i.GetFilename() - return path.Join(i.clusterName, filename) + return path.Join(i.Metadata.ClusterName, filename) } func (i *DumpResult) GetFilename() string { - filename := fmt.Sprintf("%s%s_%s", DumpResultPrefix, i.clusterName, i.RunID) + filename := fmt.Sprintf("%s%s_%s", DumpResultPrefix, i.Metadata.ClusterName, i.Metadata.RunID) if i.isDir { return filename } return fmt.Sprintf("%s.%s", filename, i.extension) } - -func ParsePath(path string) (*DumpResult, error) { - // .//kubehound__[.tar.gz] - // re := regexp.MustCompile(`([a-z0-9\.\-_]+)/kubehound_([a-z0-9\.-_]+)_([a-z0-9]{26})\.?([a-z0-9\.]+)?`) - re := regexp.MustCompile(DumpResultPathRegex) - if !re.MatchString(path) { - return nil, fmt.Errorf("Invalid path provided: %q", path) - } - - matches := re.FindStringSubmatch(path) - // The cluster name should match (parent dir and in the filename) - if matches[1] != matches[2] { - return nil, fmt.Errorf("Cluster name does not match in the path provided: %q", path) - } - - clusterName := matches[1] - runID := matches[3] - extension := matches[4] - - isCompressed := false - if extension != "" { - isCompressed = true - } - result, err := NewDumpResult(clusterName, runID, isCompressed) - if err != nil { - return nil, err - } - - return result, nil -} diff --git a/pkg/dump/result_test.go b/pkg/dump/result_test.go index e084711d5..0b64ea7dc 100644 --- a/pkg/dump/result_test.go +++ b/pkg/dump/result_test.go @@ -16,85 +16,6 @@ const ( nonValidRunID = "01j2qs8TH6yarr5hkafysekn0j" ) -func TestParsePath(t *testing.T) { - t.Parallel() - type args struct { - path string - } - tests := []struct { - name string - args args - want *DumpResult - wantErr bool - }{ - { - name: "valid path with no compression", - args: args{ - path: "/tmp/cluster1.k8s.local/kubehound_cluster1.k8s.local_01j2qs8th6yarr5hkafysekn0j", - }, - want: &DumpResult{ - clusterName: validClusterName, - RunID: validRunID, - isDir: true, - extension: "", - }, - wantErr: false, - }, - { - name: "valid path with compressed data", - args: args{ - path: "/tmp/cluster1.k8s.local/kubehound_cluster1.k8s.local_01j2qs8th6yarr5hkafysekn0j.tar.gz", - }, - want: &DumpResult{ - clusterName: validClusterName, - RunID: validRunID, - isDir: false, - extension: "tar.gz", - }, - wantErr: false, - }, - { - name: "invalid path", - args: args{ - path: "/tmp/cluster1.k8s.local/cluster1.k8s.local_01j2qs8th6yarr5hkafysekn0j", - }, - want: nil, - wantErr: true, - }, - { - name: "not matching clustername ", - args: args{ - path: "/tmp/cluster1.k8s.local/kubehound_cluster2.k8s.local_01j2qs8th6yarr5hkafysekn0j", - }, - want: nil, - wantErr: true, - }, - { - name: "invalid runID", - args: args{ - path: "/tmp/cluster1.k8s.local/kubehound_cluster1.k8s.local_01j2qs8TH6yarr5hkafysekn0j", - }, - want: nil, - wantErr: true, - }, - } - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - got, err := ParsePath(tt.args.path) - if (err != nil) != tt.wantErr { - t.Errorf("ParsePath() error = %v, wantErr %v", err, tt.wantErr) - - return - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("ParsePath() = %v, want %v", got, tt.want) - } - }) - } -} - func TestDumpResult_GetFilename(t *testing.T) { t.Parallel() @@ -135,10 +56,12 @@ func TestDumpResult_GetFilename(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() i := &DumpResult{ - clusterName: tt.fields.ClusterName, - RunID: tt.fields.RunID, - isDir: tt.fields.IsDir, - extension: tt.fields.Extension, + Metadata: Metadata{ + ClusterName: tt.fields.ClusterName, + RunID: tt.fields.RunID, + }, + isDir: tt.fields.IsDir, + extension: tt.fields.Extension, } if got := i.GetFilename(); got != tt.want { t.Errorf("DumpResult.GetFilename() = %v, want %v", got, tt.want) @@ -187,10 +110,12 @@ func TestDumpResult_GetFullPath(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() i := &DumpResult{ - clusterName: tt.fields.ClusterName, - RunID: tt.fields.RunID, - isDir: tt.fields.IsDir, - extension: tt.fields.Extension, + Metadata: Metadata{ + ClusterName: tt.fields.ClusterName, + RunID: tt.fields.RunID, + }, + isDir: tt.fields.IsDir, + extension: tt.fields.Extension, } if got := i.GetFullPath(); got != tt.want { t.Errorf("DumpResult.GetFullPath() = %v, want %v", got, tt.want) @@ -221,9 +146,11 @@ func TestNewDumpResult(t *testing.T) { isCompressed: false, }, want: &DumpResult{ - clusterName: validClusterName, - RunID: validRunID, - isDir: true, + Metadata: Metadata{ + ClusterName: validClusterName, + RunID: validRunID, + }, + isDir: true, }, wantErr: false, }, diff --git a/pkg/dump/writer/file_writer.go b/pkg/dump/writer/file_writer.go index 09bea82ee..dadae1333 100644 --- a/pkg/dump/writer/file_writer.go +++ b/pkg/dump/writer/file_writer.go @@ -56,6 +56,10 @@ func (f *FileWriter) WorkerNumber() int { return FileWriterWorkerNumber } +func (f *FileWriter) WriteMetadata(ctx context.Context) error { + return nil +} + // Write function writes the Kubernetes object to a buffer // All buffer are stored in a map which is flushed at the end of every type processed func (f *FileWriter) Write(ctx context.Context, k8sObj []byte, pathObj string) error { diff --git a/pkg/dump/writer/mockwriter/writer.go b/pkg/dump/writer/mockwriter/writer.go index 08f9836e9..951369f5a 100644 --- a/pkg/dump/writer/mockwriter/writer.go +++ b/pkg/dump/writer/mockwriter/writer.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. +// Code generated by mockery v2.43.0. DO NOT EDIT. package mocks @@ -25,6 +25,10 @@ func (_m *DumperWriter) EXPECT() *DumperWriter_Expecter { func (_m *DumperWriter) Close(_a0 context.Context) error { ret := _m.Called(_a0) + if len(ret) == 0 { + panic("no return value specified for Close") + } + var r0 error if rf, ok := ret.Get(0).(func(context.Context) error); ok { r0 = rf(_a0) @@ -67,6 +71,10 @@ func (_c *DumperWriter_Close_Call) RunAndReturn(run func(context.Context) error) func (_m *DumperWriter) Flush(_a0 context.Context) error { ret := _m.Called(_a0) + if len(ret) == 0 { + panic("no return value specified for Flush") + } + var r0 error if rf, ok := ret.Get(0).(func(context.Context) error); ok { r0 = rf(_a0) @@ -109,6 +117,10 @@ func (_c *DumperWriter_Flush_Call) RunAndReturn(run func(context.Context) error) func (_m *DumperWriter) OutputPath() string { ret := _m.Called() + if len(ret) == 0 { + panic("no return value specified for OutputPath") + } + var r0 string if rf, ok := ret.Get(0).(func() string); ok { r0 = rf() @@ -150,6 +162,10 @@ func (_c *DumperWriter_OutputPath_Call) RunAndReturn(run func() string) *DumperW func (_m *DumperWriter) WorkerNumber() int { ret := _m.Called() + if len(ret) == 0 { + panic("no return value specified for WorkerNumber") + } + var r0 int if rf, ok := ret.Get(0).(func() int); ok { r0 = rf() @@ -191,6 +207,10 @@ func (_c *DumperWriter_WorkerNumber_Call) RunAndReturn(run func() int) *DumperWr func (_m *DumperWriter) Write(_a0 context.Context, _a1 []byte, _a2 string) error { ret := _m.Called(_a0, _a1, _a2) + if len(ret) == 0 { + panic("no return value specified for Write") + } + var r0 error if rf, ok := ret.Get(0).(func(context.Context, []byte, string) error); ok { r0 = rf(_a0, _a1, _a2) @@ -231,13 +251,58 @@ func (_c *DumperWriter_Write_Call) RunAndReturn(run func(context.Context, []byte return _c } -type mockConstructorTestingTNewDumperWriter interface { - mock.TestingT - Cleanup(func()) +// WriteMetadata provides a mock function with given fields: _a0 +func (_m *DumperWriter) WriteMetadata(_a0 context.Context) error { + ret := _m.Called(_a0) + + if len(ret) == 0 { + panic("no return value specified for WriteMetadata") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context) error); ok { + r0 = rf(_a0) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// DumperWriter_WriteMetadata_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'WriteMetadata' +type DumperWriter_WriteMetadata_Call struct { + *mock.Call +} + +// WriteMetadata is a helper method to define mock.On call +// - _a0 context.Context +func (_e *DumperWriter_Expecter) WriteMetadata(_a0 interface{}) *DumperWriter_WriteMetadata_Call { + return &DumperWriter_WriteMetadata_Call{Call: _e.mock.On("WriteMetadata", _a0)} +} + +func (_c *DumperWriter_WriteMetadata_Call) Run(run func(_a0 context.Context)) *DumperWriter_WriteMetadata_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context)) + }) + return _c +} + +func (_c *DumperWriter_WriteMetadata_Call) Return(_a0 error) *DumperWriter_WriteMetadata_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *DumperWriter_WriteMetadata_Call) RunAndReturn(run func(context.Context) error) *DumperWriter_WriteMetadata_Call { + _c.Call.Return(run) + return _c } // NewDumperWriter creates a new instance of DumperWriter. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewDumperWriter(t mockConstructorTestingTNewDumperWriter) *DumperWriter { +// The first argument is typically a *testing.T value. +func NewDumperWriter(t interface { + mock.TestingT + Cleanup(func()) +}) *DumperWriter { mock := &DumperWriter{} mock.Mock.Test(t) diff --git a/pkg/dump/writer/tar_writer.go b/pkg/dump/writer/tar_writer.go index 89107a128..ee4b5cb32 100644 --- a/pkg/dump/writer/tar_writer.go +++ b/pkg/dump/writer/tar_writer.go @@ -78,6 +78,10 @@ func (f *TarWriter) WorkerNumber() int { return TarWorkerNumber } +func (f *TarWriter) WriteMetadata(ctx context.Context) error { + return nil +} + // Write function writes the Kubernetes object to a buffer // All buffer are stored in a map which is flushed at the end of every type processed func (t *TarWriter) Write(ctx context.Context, k8sObj []byte, filePath string) error { diff --git a/pkg/dump/writer/writer.go b/pkg/dump/writer/writer.go index 36535ad5c..26127dd62 100644 --- a/pkg/dump/writer/writer.go +++ b/pkg/dump/writer/writer.go @@ -18,6 +18,7 @@ const ( //go:generate mockery --name DumperWriter --output mockwriter --case underscore --filename writer.go --with-expecter type DumperWriter interface { Write(context.Context, []byte, string) error + WriteMetadata(context.Context) error Flush(context.Context) error Close(context.Context) error diff --git a/pkg/kubehound/core/core_dump.go b/pkg/kubehound/core/core_dump.go index 9a140ce19..83dbaae25 100644 --- a/pkg/kubehound/core/core_dump.go +++ b/pkg/kubehound/core/core_dump.go @@ -109,5 +109,10 @@ func runLocalDump(ctx context.Context, khCfg *config.KubehoundConfig) (string, e return "", fmt.Errorf("dump k8s object: %w", err) } + err = dumpIngestor.SaveMetadata(ctx) + if err != nil { + return "", fmt.Errorf("save metadata file: %w", err) + } + return dumpIngestor.OutputPath(), nil } diff --git a/pkg/kubehound/core/core_ingest_local.go b/pkg/kubehound/core/core_ingest_local.go index 5f69982ea..e3bc7aba8 100644 --- a/pkg/kubehound/core/core_ingest_local.go +++ b/pkg/kubehound/core/core_ingest_local.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os" + "path/filepath" "github.com/DataDog/KubeHound/pkg/config" "github.com/DataDog/KubeHound/pkg/ingestor/puller" @@ -13,7 +14,8 @@ func CoreLocalIngest(ctx context.Context, khCfg *config.KubehoundConfig, resultP // Using the collector config to ingest the data khCfg.Collector.Type = config.CollectorTypeFile // Applying the clusterName from the dynamic config (from CLI or env var) to the collector config - khCfg.Collector.File.ClusterName = khCfg.Dynamic.ClusterName + // khCfg.Collector.File.ClusterName = khCfg.Dynamic.ClusterName + // Treating by default as data not compressed (directory of the results) khCfg.Collector.File.Directory = resultPath @@ -22,7 +24,7 @@ func CoreLocalIngest(ctx context.Context, khCfg *config.KubehoundConfig, resultP if err != nil { return err } - + metadataFilePath := filepath.Join(resultPath, "metadata.json") if compress { tmpDir, err := os.MkdirTemp("/tmp/", "kh-local-ingest-*") if err != nil { @@ -35,6 +37,11 @@ func CoreLocalIngest(ctx context.Context, khCfg *config.KubehoundConfig, resultP if err != nil { return err } + metadataFilePath = filepath.Join(tmpDir, "metadata.json") + } + md, err := GetDumpMetadata(metadataFilePath) + if err != nil { + return err } return CoreLive(ctx, khCfg)