diff --git a/artifact/image/layerscanning/image/image.go b/artifact/image/layerscanning/image/image.go index 6b0dfc33..8f11e8e0 100644 --- a/artifact/image/layerscanning/image/image.go +++ b/artifact/image/layerscanning/image/image.go @@ -243,7 +243,7 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) { defer layerReader.Close() tarReader := tar.NewReader(layerReader) - requiredTargets, err = fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets) + requiredTargets, err = fillChainLayersWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets) if err != nil { return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err) } @@ -304,6 +304,7 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile, maxSy latestLayer: &Layer{ buildCommand: entry.CreatedBy, isEmpty: true, + fileNodeTree: pathtree.NewNode[fileNode](), }, maxSymlinkDepth: maxSymlinkDepth, }) @@ -353,10 +354,10 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile, maxSy return chainLayers, nil } -// fillChainLayerWithFilesFromTar fills the chain layers with the files found in the tar. The +// fillChainLayersWithFilesFromTar fills the chain layers with the files found in the tar. The // chainLayersToFill are the chain layers that will be filled with the files via the virtual // filesystem. -func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) { +func fillChainLayersWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) { currentChainLayer := chainLayersToFill[0] for { @@ -477,6 +478,10 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay // outer loop is looping backwards (latest layer first), we ignore any files that are already in // each chainLayer, as they would have been overwritten. fillChainLayersWithFileNode(chainLayersToFill, newNode) + + // Add the fileNode to the node tree of the underlying layer. + layer := currentChainLayer.latestLayer.(*Layer) + layer.fileNodeTree.Insert(virtualPath, newNode) } return requiredTargets, nil } diff --git a/artifact/image/layerscanning/image/image_test.go b/artifact/image/layerscanning/image/image_test.go index 9cc6d0ee..22368e26 100644 --- a/artifact/image/layerscanning/image/image_test.go +++ b/artifact/image/layerscanning/image/image_test.go @@ -936,7 +936,7 @@ func TestInitializeChainLayers(t *testing.T) { t.Fatalf("initializeChainLayers(%v, %v, %v) returned an unexpected error: %v", tc.v1Layers, tc.configFile, tc.maxSymlinkDepth, err) } - if diff := cmp.Diff(tc.want, gotChainLayers, cmp.AllowUnexported(chainLayer{}, Layer{}, fakev1layer.FakeV1Layer{}), cmpopts.IgnoreFields(chainLayer{}, "fileNodeTree")); diff != "" { + if diff := cmp.Diff(tc.want, gotChainLayers, cmp.AllowUnexported(chainLayer{}, Layer{}, fakev1layer.FakeV1Layer{}), cmpopts.IgnoreFields(chainLayer{}, "fileNodeTree"), cmpopts.IgnoreFields(Layer{}, "fileNodeTree")); diff != "" { t.Fatalf("initializeChainLayers(%v, %v, %v) returned an unexpected diff (-want +got): %v", tc.v1Layers, tc.configFile, tc.maxSymlinkDepth, diff) } }) diff --git a/artifact/image/layerscanning/image/layer.go b/artifact/image/layerscanning/image/layer.go index aa93994d..93ec7597 100644 --- a/artifact/image/layerscanning/image/layer.go +++ b/artifact/image/layerscanning/image/layer.go @@ -50,11 +50,14 @@ type Layer struct { diffID digest.Digest buildCommand string isEmpty bool + fileNodeTree *pathtree.Node[fileNode] } // FS returns a scalibr compliant file system. func (layer *Layer) FS() scalibrfs.FS { - return nil + return &FS{ + tree: layer.fileNodeTree, + } } // IsEmpty returns whether the layer is empty. @@ -96,6 +99,7 @@ func convertV1Layer(v1Layer v1.Layer, command string, isEmpty bool) (*Layer, err diffID: digest.Digest(diffID.String()), buildCommand: command, isEmpty: isEmpty, + fileNodeTree: pathtree.NewNode[fileNode](), }, nil } diff --git a/artifact/image/layerscanning/image/layer_test.go b/artifact/image/layerscanning/image/layer_test.go index eb7b87fd..3e6f7c84 100644 --- a/artifact/image/layerscanning/image/layer_test.go +++ b/artifact/image/layerscanning/image/layer_test.go @@ -52,6 +52,7 @@ func TestConvertV1Layer(t *testing.T) { diffID: "sha256:abc123", buildCommand: "ADD file", isEmpty: false, + fileNodeTree: pathtree.NewNode[fileNode](), }, }, { @@ -77,7 +78,7 @@ func TestConvertV1Layer(t *testing.T) { if tc.wantError != nil && gotError == tc.wantError { t.Errorf("convertV1Layer(%v, %v, %v) returned error: %v, want error: %v", tc.v1Layer, tc.command, tc.isEmpty, gotError, tc.wantError) } - if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{})); tc.wantLayer != nil && diff != "" { + if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{}, pathtree.Node[fileNode]{})); tc.wantLayer != nil && diff != "" { t.Errorf("convertV1Layer(%v, %v, %v) returned layer: %v, want layer: %v", tc.v1Layer, tc.command, tc.isEmpty, gotLayer, tc.wantLayer) } }) diff --git a/artifact/image/layerscanning/testing/fakelayer/fake_layer.go b/artifact/image/layerscanning/testing/fakelayer/fake_layer.go index a36dafea..c703979c 100644 --- a/artifact/image/layerscanning/testing/fakelayer/fake_layer.go +++ b/artifact/image/layerscanning/testing/fakelayer/fake_layer.go @@ -19,6 +19,10 @@ package fakelayer import ( "fmt" "io" + "io/fs" + "os" + "path" + "path/filepath" scalibrfs "github.com/google/osv-scalibr/fs" "github.com/opencontainers/go-digest" @@ -26,21 +30,37 @@ import ( // FakeLayer is a fake implementation of the image.Layer interface for testing purposes. type FakeLayer struct { + testDir string diffID digest.Digest buildCommand string + files map[string]string } // New creates a new FakeLayer. -func New(diffID digest.Digest, buildCommand string) *FakeLayer { +func New(testDir string, diffID digest.Digest, buildCommand string, files map[string]string, filesAlreadyExist bool) (*FakeLayer, error) { + if !filesAlreadyExist { + for name, contents := range files { + filename := filepath.Join(testDir, name) + if err := os.MkdirAll(filepath.Dir(filename), 0700); err != nil { + return nil, err + } + + if err := os.WriteFile(filename, []byte(contents), 0600); err != nil { + return nil, err + } + } + } return &FakeLayer{ + testDir: testDir, diffID: diffID, buildCommand: buildCommand, - } + files: files, + }, nil } // FS is not currently used for the purposes of layer scanning, thus a nil value is returned. func (fakeLayer *FakeLayer) FS() scalibrfs.FS { - return nil + return fakeLayer } // DiffID returns the diffID of the layer. @@ -62,3 +82,30 @@ func (fakeLayer *FakeLayer) IsEmpty() bool { func (fakeLayer *FakeLayer) Uncompressed() (io.ReadCloser, error) { return nil, fmt.Errorf("not implemented") } + +// ------------------------------------------------------------------------------------------------- +// scalibrfs.FS implementation +// ------------------------------------------------------------------------------------------------- + +// Open returns a file if it exists in the files map. +func (fakeLayer *FakeLayer) Open(name string) (fs.File, error) { + if _, ok := fakeLayer.files[name]; ok { + filename := filepath.Join(fakeLayer.testDir, name) + return os.Open(filename) + } + return nil, os.ErrNotExist +} + +// Stat returns the file info of a file if it exists in the files map. +func (fakeLayer *FakeLayer) Stat(name string) (fs.FileInfo, error) { + if _, ok := fakeLayer.files[name]; ok { + return os.Stat(path.Join(fakeLayer.testDir, name)) + } + return nil, os.ErrNotExist +} + +// ReadDir is not used in the trace package since individual files are opened instead of +// directories. +func (fakeLayer *FakeLayer) ReadDir(name string) ([]fs.DirEntry, error) { + return nil, fmt.Errorf("not implemented") +} diff --git a/artifact/image/layerscanning/trace/trace.go b/artifact/image/layerscanning/trace/trace.go index 1948f2f1..b04ade46 100644 --- a/artifact/image/layerscanning/trace/trace.go +++ b/artifact/image/layerscanning/trace/trace.go @@ -18,6 +18,7 @@ package trace import ( "context" "errors" + "fmt" "io/fs" "slices" "sort" @@ -54,7 +55,7 @@ type locationAndIndex struct { // Note that a precondition of this algorithm is that the chain layers are ordered by order of // creation. func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) { - chainLayerDetailsList := []*extractor.LayerDetails{} + layerDetailsList := []*extractor.LayerDetails{} // Create list of layer details struct to be referenced by inventory. for i, chainLayer := range chainLayers { @@ -65,7 +66,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, diffID = chainLayer.Layer().DiffID().Encoded() } - chainLayerDetailsList = append(chainLayerDetailsList, &extractor.LayerDetails{ + layerDetailsList = append(layerDetailsList, &extractor.LayerDetails{ Index: i, DiffID: diffID, Command: chainLayer.Layer().Command(), @@ -90,7 +91,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, lastLayerIndex := len(chainLayers) - 1 for _, inv := range inventory { - layerDetails := chainLayerDetailsList[lastLayerIndex] + layerDetails := layerDetailsList[lastLayerIndex] invExtractor, isFilesystemExtractor := inv.Extractor.(filesystem.Extractor) // Only filesystem extractors are supported for layer scanning. Also, if the inventory has no @@ -101,33 +102,39 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, } var foundOrigin bool + fileLocation := inv.Locations[0] // Go backwards through the chain layers and find the first layer where the inventory is not // present. Such layer is the layer in which the inventory was introduced. If the inventory is // present in all layers, then it means it was introduced in the first layer. - // TODO: b/381249869 - Optimization: Skip layers if file not found. for i := len(chainLayers) - 2; i >= 0; i-- { oldChainLayer := chainLayers[i] invLocationAndIndex := locationAndIndex{ - location: inv.Locations[0], + location: fileLocation, index: i, } + // If the file of interest is not present in the underlying layer, then there will be no + // difference in the extracted inventory from oldChainLayer, so extraction can be skipped in + // the chain layer. This is an optimization to avoid extracting the same inventory multiple + // times. + if !isFileInLayerDiff(oldChainLayer, fileLocation) { + continue + } + var oldInventory []*extractor.Inventory if cachedInventory, ok := locationIndexToInventory[invLocationAndIndex]; ok { oldInventory = cachedInventory } else { // Check if file still exist in this layer, if not skip extraction. // This is both an optimization, and avoids polluting the log output with false file not found errors. - if _, err := oldChainLayer.FS().Stat(inv.Locations[0]); errors.Is(err, fs.ErrNotExist) { - oldInventory = []*extractor.Inventory{} - } else { + if _, err := oldChainLayer.FS().Stat(fileLocation); err == nil { // Update the extractor config to use the files from the current layer. // We only take extract the first location because other locations are derived from the initial // extraction location. If other locations can no longer be determined from the first location // they should not be included here, and the trace for those packages stops here. - updateExtractorConfig([]string{inv.Locations[0]}, invExtractor, oldChainLayer.FS()) + updateExtractorConfig([]string{fileLocation}, invExtractor, oldChainLayer.FS()) var err error oldInventory, _, err = filesystem.Run(ctx, config) @@ -150,7 +157,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, // If the inventory is not present in the old layer, then it was introduced in layer i+1. if !foundPackage { - layerDetails = chainLayerDetailsList[i+1] + layerDetails = layerDetailsList[i+1] foundOrigin = true break } @@ -159,7 +166,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, // If the inventory is present in every layer, then it means it was introduced in the first // layer. if !foundOrigin { - layerDetails = chainLayerDetailsList[0] + layerDetails = layerDetailsList[0] } inv.LayerDetails = layerDetails } @@ -192,3 +199,30 @@ func areInventoriesEqual(inv1 *extractor.Inventory, inv2 *extractor.Inventory) b } return true } + +// getSingleLayerFSFromChainLayer returns the filesystem of the underlying layer in the chain layer. +func getLayerFSFromChainLayer(chainLayer scalibrImage.ChainLayer) (scalibrfs.FS, error) { + layer := chainLayer.Layer() + if layer == nil { + return nil, fmt.Errorf("chain layer has no layer") + } + + fs := layer.FS() + if fs == nil { + return nil, fmt.Errorf("layer has no filesystem") + } + + return fs, nil +} + +// isFileInLayerDiff checks if a file is present in the underlying layer of a chain layer. +func isFileInLayerDiff(chainLayer scalibrImage.ChainLayer, fileLocation string) bool { + layerFS, err := getLayerFSFromChainLayer(chainLayer) + if err != nil { + return false + } + if _, err := layerFS.Stat(fileLocation); errors.Is(err, fs.ErrNotExist) { + return true + } + return false +} diff --git a/artifact/image/layerscanning/trace/trace_test.go b/artifact/image/layerscanning/trace/trace_test.go index 1a31d429..1f1d7bd9 100644 --- a/artifact/image/layerscanning/trace/trace_test.go +++ b/artifact/image/layerscanning/trace/trace_test.go @@ -30,13 +30,17 @@ import ( "github.com/opencontainers/go-digest" ) -func setupFakeChainLayer(t *testing.T, testDir string, index int, diffID digest.Digest, command string, fileContents map[string]string) *fakechainlayer.FakeChainLayer { +func setupFakeChainLayer(t *testing.T, testDir string, index int, diffID digest.Digest, command string, layerContents map[string]string, chainLayerContents map[string]string) *fakechainlayer.FakeChainLayer { t.Helper() - layer := fakelayer.New(diffID, command) - chainLayer, err := fakechainlayer.New(testDir, index, diffID, command, layer, fileContents, false) + layer, err := fakelayer.New(testDir, diffID, command, layerContents, false) if err != nil { - t.Fatalf("fakechainlayer.New(%d, %q, %q, %v, %v) failed: %v", index, diffID, command, layer, fileContents, err) + t.Fatalf("fakelayer.New(%q, %q, %q, %v, %v) failed: %v", testDir, diffID, command, layerContents, false, err) + } + + chainLayer, err := fakechainlayer.New(testDir, index, diffID, command, layer, chainLayerContents, false) + if err != nil { + t.Fatalf("fakechainlayer.New(%d, %q, %q, %v, %v) failed: %v", index, diffID, command, layer, chainLayerContents, err) } return chainLayer } @@ -57,11 +61,16 @@ func TestPopulateLayerDetails(t *testing.T) { // Chain Layer 1: Start with foo and bar packages. // - foo.txt // - bar.txt - digest1 := digest.NewDigestFromEncoded(digest.SHA256, "diff-id-1") - fakeChainLayer1 := setupFakeChainLayer(t, t.TempDir(), 0, digest1, "command-1", map[string]string{ + layerContents1 := map[string]string{ fooFile: fooPackage, barFile: barPackage, - }) + } + chainLayerContents1 := map[string]string{ + fooFile: fooPackage, + barFile: barPackage, + } + digest1 := digest.NewDigestFromEncoded(digest.SHA256, "diff-id-1") + fakeChainLayer1 := setupFakeChainLayer(t, t.TempDir(), 0, digest1, "command-1", layerContents1, chainLayerContents1) fakeExtractor1 := fakeextractor.New("fake-extractor-1", 1, []string{fooFile, barFile}, map[string]fakeextractor.NamesErr{ fooFile: fakeextractor.NamesErr{ Names: []string{fooPackage}, @@ -71,12 +80,14 @@ func TestPopulateLayerDetails(t *testing.T) { }, }) + layerContents2 := map[string]string{} + chainLayerContents2 := map[string]string{ + fooFile: fooPackage, + } // Chain Layer 2: Deletes bar package. // - foo.txt digest2 := digest.NewDigestFromEncoded(digest.SHA256, "diff-id-2") - fakeChainLayer2 := setupFakeChainLayer(t, t.TempDir(), 1, digest2, "command-2", map[string]string{ - fooFile: fooPackage, - }) + fakeChainLayer2 := setupFakeChainLayer(t, t.TempDir(), 1, digest2, "command-2", layerContents2, chainLayerContents2) fakeExtractor2 := fakeextractor.New("fake-extractor-2", 1, []string{fooFile}, map[string]fakeextractor.NamesErr{ fooFile: fakeextractor.NamesErr{ Names: []string{fooPackage}, @@ -86,11 +97,15 @@ func TestPopulateLayerDetails(t *testing.T) { // Chain Layer 3: Adds baz package. // - foo.txt // - baz.txt - digest3 := digest.NewDigestFromEncoded(digest.SHA256, "diff-id-3") - fakeChainLayer3 := setupFakeChainLayer(t, t.TempDir(), 2, digest3, "command-3", map[string]string{ + layerContents3 := map[string]string{ + bazFile: bazPackage, + } + chainLayerContents3 := map[string]string{ fooFile: fooPackage, bazFile: bazPackage, - }) + } + digest3 := digest.NewDigestFromEncoded(digest.SHA256, "diff-id-3") + fakeChainLayer3 := setupFakeChainLayer(t, t.TempDir(), 2, digest3, "command-3", layerContents3, chainLayerContents3) fakeExtractor3 := fakeextractor.New("fake-extractor-3", 1, []string{fooFile, bazFile}, map[string]fakeextractor.NamesErr{ fooFile: fakeextractor.NamesErr{ Names: []string{fooPackage}, @@ -104,12 +119,16 @@ func TestPopulateLayerDetails(t *testing.T) { // - foo.txt // - bar.txt // - baz.txt - digest4 := digest.NewDigestFromEncoded(digest.SHA256, "diff-id-4") - fakeChainLayer4 := setupFakeChainLayer(t, t.TempDir(), 3, digest4, "command-4", map[string]string{ + layerContents4 := map[string]string{ + barFile: barPackage, + } + chainLayerContents4 := map[string]string{ fooFile: fooPackage, barFile: barPackage, bazFile: bazPackage, - }) + } + digest4 := digest.NewDigestFromEncoded(digest.SHA256, "diff-id-4") + fakeChainLayer4 := setupFakeChainLayer(t, t.TempDir(), 3, digest4, "command-4", layerContents4, chainLayerContents4) fakeExtractor4 := fakeextractor.New("fake-extractor-4", 1, []string{fooFile, barFile, bazFile}, map[string]fakeextractor.NamesErr{ fooFile: fakeextractor.NamesErr{ Names: []string{fooPackage},