Skip to content

Commit

Permalink
WIP - Moving to metadata file instead of meaningful filepath
Browse files Browse the repository at this point in the history
  • Loading branch information
edznux-dd committed Aug 19, 2024
1 parent 7341940 commit 6a15bac
Show file tree
Hide file tree
Showing 10 changed files with 159 additions and 147 deletions.
4 changes: 2 additions & 2 deletions pkg/cmd/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,6 @@ func InitRemoteIngestCmd(cmd *cobra.Command, standalone bool) {

func InitLocalIngestCmd(cmd *cobra.Command) {
cmd.PersistentFlags().String("cluster", "", "Cluster name to ingest (e.g.: my-cluster-1)")
viper.BindPFlag(config.IngestorClusterName, cmd.Flags().Lookup("cluster")) //nolint: errcheck
cmd.MarkFlagRequired("cluster") //nolint: errcheck
viper.BindPFlag(config.IngestorClusterName, cmd.PersistentFlags().Lookup("cluster")) //nolint: errcheck
cmd.MarkFlagRequired("cluster") //nolint: errcheck
}
23 changes: 23 additions & 0 deletions pkg/dump/ingestor.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ package dump

import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"

"github.com/DataDog/KubeHound/pkg/collector"
"github.com/DataDog/KubeHound/pkg/config"
Expand Down Expand Up @@ -50,10 +53,30 @@ func getClusterName(ctx context.Context, collector collector.CollectorClient) (s
return cluster.Name, nil
}

func (d *DumpIngestor) Metadata() (Metadata, error) {
path := filepath.Join(d.writer.OutputPath(), "metadata.json")
data, err := os.ReadFile(path)
if err != nil {
return Metadata{}, err
}

md := Metadata{}
err = json.Unmarshal(data, &md)
if err != nil {
return Metadata{}, err
}

return md, nil
}

func (d *DumpIngestor) OutputPath() string {
return d.writer.OutputPath()
}

func (d *DumpIngestor) SaveMetadata(ctx context.Context) error {
return d.writer.WriteMetadata(ctx)
}

func (d *DumpIngestor) DumpK8sObjects(ctx context.Context) error {
spanDump, ctx := tracer.StartSpanFromContext(ctx, span.CollectorDump, tracer.Measured())
var err error
Expand Down
72 changes: 24 additions & 48 deletions pkg/dump/result.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,37 @@ import (
"fmt"
"path"
"regexp"
"time"
)

type DumpResult struct {
clusterName string
RunID string
isDir bool
extension string
isDir bool
extension string
Metadata Metadata
}

type Metadata struct {
RunID string `json:"run_id"`
ClusterName string `json:"cluster"`
DumpTime time.Time `json:"dump_time"`
}

const (
DumpResultClusterNameRegex = `([A-Za-z0-9\.\-_]+)`
DumpResultRunIDRegex = `([a-z0-9]{26})`
DumpResultExtensionRegex = `\.?([a-z0-9\.]+)?`
DumpResultPrefix = "kubehound_"
DumpResultFilenameRegex = DumpResultPrefix + DumpResultClusterNameRegex + "_" + DumpResultRunIDRegex + DumpResultExtensionRegex
DumpResultPathRegex = DumpResultClusterNameRegex + "/" + DumpResultFilenameRegex

DumpResultTarWriterExtension = "tar.gz"
)

func NewDumpResult(clusterName, runID string, isCompressed bool) (*DumpResult, error) {
dumpResult := &DumpResult{
clusterName: clusterName,
RunID: runID,
isDir: true,
Metadata: Metadata{
ClusterName: clusterName,
RunID: runID,
},
isDir: true,
}
if isCompressed {
dumpResult.Compressed()
Expand All @@ -44,18 +50,18 @@ func NewDumpResult(clusterName, runID string, isCompressed bool) (*DumpResult, e

func (i *DumpResult) Validate() error {
re := regexp.MustCompile(DumpResultClusterNameRegex)
if !re.MatchString(i.clusterName) {
return fmt.Errorf("Invalid clustername: %q", i.clusterName)
if !re.MatchString(i.Metadata.ClusterName) {
return fmt.Errorf("Invalid clustername: %q", i.Metadata.ClusterName)
}

matches := re.FindStringSubmatch(i.clusterName)
if len(matches) == 2 && matches[1] != i.clusterName {
return fmt.Errorf("Invalid clustername: %q", i.clusterName)
matches := re.FindStringSubmatch(i.Metadata.ClusterName)
if len(matches) == 2 && matches[1] != i.Metadata.ClusterName {
return fmt.Errorf("Invalid clustername: %q", i.Metadata.ClusterName)
}

re = regexp.MustCompile(DumpResultRunIDRegex)
if !re.MatchString(i.RunID) {
return fmt.Errorf("Invalid runID: %q", i.RunID)
if !re.MatchString(i.Metadata.RunID) {
return fmt.Errorf("Invalid runID: %q", i.Metadata.RunID)
}

return nil
Expand All @@ -70,44 +76,14 @@ func (i *DumpResult) Compressed() {
func (i *DumpResult) GetFullPath() string {
filename := i.GetFilename()

return path.Join(i.clusterName, filename)
return path.Join(i.Metadata.ClusterName, filename)
}

func (i *DumpResult) GetFilename() string {
filename := fmt.Sprintf("%s%s_%s", DumpResultPrefix, i.clusterName, i.RunID)
filename := fmt.Sprintf("%s%s_%s", DumpResultPrefix, i.Metadata.ClusterName, i.Metadata.RunID)
if i.isDir {
return filename
}

return fmt.Sprintf("%s.%s", filename, i.extension)
}

func ParsePath(path string) (*DumpResult, error) {
// ./<clusterName>/kubehound_<clusterName>_<run_id>[.tar.gz]
// re := regexp.MustCompile(`([a-z0-9\.\-_]+)/kubehound_([a-z0-9\.-_]+)_([a-z0-9]{26})\.?([a-z0-9\.]+)?`)
re := regexp.MustCompile(DumpResultPathRegex)
if !re.MatchString(path) {
return nil, fmt.Errorf("Invalid path provided: %q", path)
}

matches := re.FindStringSubmatch(path)
// The cluster name should match (parent dir and in the filename)
if matches[1] != matches[2] {
return nil, fmt.Errorf("Cluster name does not match in the path provided: %q", path)
}

clusterName := matches[1]
runID := matches[3]
extension := matches[4]

isCompressed := false
if extension != "" {
isCompressed = true
}
result, err := NewDumpResult(clusterName, runID, isCompressed)
if err != nil {
return nil, err
}

return result, nil
}
107 changes: 17 additions & 90 deletions pkg/dump/result_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,85 +16,6 @@ const (
nonValidRunID = "01j2qs8TH6yarr5hkafysekn0j"
)

func TestParsePath(t *testing.T) {
t.Parallel()
type args struct {
path string
}
tests := []struct {
name string
args args
want *DumpResult
wantErr bool
}{
{
name: "valid path with no compression",
args: args{
path: "/tmp/cluster1.k8s.local/kubehound_cluster1.k8s.local_01j2qs8th6yarr5hkafysekn0j",
},
want: &DumpResult{
clusterName: validClusterName,
RunID: validRunID,
isDir: true,
extension: "",
},
wantErr: false,
},
{
name: "valid path with compressed data",
args: args{
path: "/tmp/cluster1.k8s.local/kubehound_cluster1.k8s.local_01j2qs8th6yarr5hkafysekn0j.tar.gz",
},
want: &DumpResult{
clusterName: validClusterName,
RunID: validRunID,
isDir: false,
extension: "tar.gz",
},
wantErr: false,
},
{
name: "invalid path",
args: args{
path: "/tmp/cluster1.k8s.local/cluster1.k8s.local_01j2qs8th6yarr5hkafysekn0j",
},
want: nil,
wantErr: true,
},
{
name: "not matching clustername ",
args: args{
path: "/tmp/cluster1.k8s.local/kubehound_cluster2.k8s.local_01j2qs8th6yarr5hkafysekn0j",
},
want: nil,
wantErr: true,
},
{
name: "invalid runID",
args: args{
path: "/tmp/cluster1.k8s.local/kubehound_cluster1.k8s.local_01j2qs8TH6yarr5hkafysekn0j",
},
want: nil,
wantErr: true,
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got, err := ParsePath(tt.args.path)
if (err != nil) != tt.wantErr {
t.Errorf("ParsePath() error = %v, wantErr %v", err, tt.wantErr)

return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ParsePath() = %v, want %v", got, tt.want)
}
})
}
}

func TestDumpResult_GetFilename(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -135,10 +56,12 @@ func TestDumpResult_GetFilename(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
i := &DumpResult{
clusterName: tt.fields.ClusterName,
RunID: tt.fields.RunID,
isDir: tt.fields.IsDir,
extension: tt.fields.Extension,
Metadata: Metadata{
ClusterName: tt.fields.ClusterName,
RunID: tt.fields.RunID,
},
isDir: tt.fields.IsDir,
extension: tt.fields.Extension,
}
if got := i.GetFilename(); got != tt.want {
t.Errorf("DumpResult.GetFilename() = %v, want %v", got, tt.want)
Expand Down Expand Up @@ -187,10 +110,12 @@ func TestDumpResult_GetFullPath(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
i := &DumpResult{
clusterName: tt.fields.ClusterName,
RunID: tt.fields.RunID,
isDir: tt.fields.IsDir,
extension: tt.fields.Extension,
Metadata: Metadata{
ClusterName: tt.fields.ClusterName,
RunID: tt.fields.RunID,
},
isDir: tt.fields.IsDir,
extension: tt.fields.Extension,
}
if got := i.GetFullPath(); got != tt.want {
t.Errorf("DumpResult.GetFullPath() = %v, want %v", got, tt.want)
Expand Down Expand Up @@ -221,9 +146,11 @@ func TestNewDumpResult(t *testing.T) {
isCompressed: false,
},
want: &DumpResult{
clusterName: validClusterName,
RunID: validRunID,
isDir: true,
Metadata: Metadata{
ClusterName: validClusterName,
RunID: validRunID,
},
isDir: true,
},
wantErr: false,
},
Expand Down
4 changes: 4 additions & 0 deletions pkg/dump/writer/file_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ func (f *FileWriter) WorkerNumber() int {
return FileWriterWorkerNumber
}

func (f *FileWriter) WriteMetadata(ctx context.Context) error {
return nil
}

// Write function writes the Kubernetes object to a buffer
// All buffer are stored in a map which is flushed at the end of every type processed
func (f *FileWriter) Write(ctx context.Context, k8sObj []byte, pathObj string) error {
Expand Down
Loading

0 comments on commit 6a15bac

Please sign in to comment.