Skip to content

Commit cb80c41

Browse files
Mario Leyvacopybara-github
Mario Leyva
authored andcommitted
Small refactor in the package tracing logic.
PiperOrigin-RevId: 731565937
1 parent 30e8a92 commit cb80c41

File tree

7 files changed

+146
-36
lines changed

7 files changed

+146
-36
lines changed

artifact/image/layerscanning/image/image.go

+8-3
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
243243
defer layerReader.Close()
244244

245245
tarReader := tar.NewReader(layerReader)
246-
requiredTargets, err = fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets)
246+
requiredTargets, err = fillChainLayersWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets)
247247
if err != nil {
248248
return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err)
249249
}
@@ -304,6 +304,7 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile, maxSy
304304
latestLayer: &Layer{
305305
buildCommand: entry.CreatedBy,
306306
isEmpty: true,
307+
fileNodeTree: pathtree.NewNode[fileNode](),
307308
},
308309
maxSymlinkDepth: maxSymlinkDepth,
309310
})
@@ -353,10 +354,10 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile, maxSy
353354
return chainLayers, nil
354355
}
355356

356-
// fillChainLayerWithFilesFromTar fills the chain layers with the files found in the tar. The
357+
// fillChainLayersWithFilesFromTar fills the chain layers with the files found in the tar. The
357358
// chainLayersToFill are the chain layers that will be filled with the files via the virtual
358359
// filesystem.
359-
func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) {
360+
func fillChainLayersWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) {
360361
currentChainLayer := chainLayersToFill[0]
361362

362363
for {
@@ -477,6 +478,10 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
477478
// outer loop is looping backwards (latest layer first), we ignore any files that are already in
478479
// each chainLayer, as they would have been overwritten.
479480
fillChainLayersWithFileNode(chainLayersToFill, newNode)
481+
482+
// Add the fileNode to the node tree of the underlying layer.
483+
layer := currentChainLayer.latestLayer.(*Layer)
484+
layer.fileNodeTree.Insert(virtualPath, newNode)
480485
}
481486
return requiredTargets, nil
482487
}

artifact/image/layerscanning/image/image_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -936,7 +936,7 @@ func TestInitializeChainLayers(t *testing.T) {
936936
t.Fatalf("initializeChainLayers(%v, %v, %v) returned an unexpected error: %v", tc.v1Layers, tc.configFile, tc.maxSymlinkDepth, err)
937937
}
938938

939-
if diff := cmp.Diff(tc.want, gotChainLayers, cmp.AllowUnexported(chainLayer{}, Layer{}, fakev1layer.FakeV1Layer{}), cmpopts.IgnoreFields(chainLayer{}, "fileNodeTree")); diff != "" {
939+
if diff := cmp.Diff(tc.want, gotChainLayers, cmp.AllowUnexported(chainLayer{}, Layer{}, fakev1layer.FakeV1Layer{}), cmpopts.IgnoreFields(chainLayer{}, "fileNodeTree"), cmpopts.IgnoreFields(Layer{}, "fileNodeTree")); diff != "" {
940940
t.Fatalf("initializeChainLayers(%v, %v, %v) returned an unexpected diff (-want +got): %v", tc.v1Layers, tc.configFile, tc.maxSymlinkDepth, diff)
941941
}
942942
})

artifact/image/layerscanning/image/layer.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,14 @@ type Layer struct {
5050
diffID digest.Digest
5151
buildCommand string
5252
isEmpty bool
53+
fileNodeTree *pathtree.Node[fileNode]
5354
}
5455

5556
// FS returns a scalibr compliant file system.
5657
func (layer *Layer) FS() scalibrfs.FS {
57-
return nil
58+
return &FS{
59+
tree: layer.fileNodeTree,
60+
}
5861
}
5962

6063
// IsEmpty returns whether the layer is empty.
@@ -96,6 +99,7 @@ func convertV1Layer(v1Layer v1.Layer, command string, isEmpty bool) (*Layer, err
9699
diffID: digest.Digest(diffID.String()),
97100
buildCommand: command,
98101
isEmpty: isEmpty,
102+
fileNodeTree: pathtree.NewNode[fileNode](),
99103
}, nil
100104
}
101105

artifact/image/layerscanning/image/layer_test.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ func TestConvertV1Layer(t *testing.T) {
5252
diffID: "sha256:abc123",
5353
buildCommand: "ADD file",
5454
isEmpty: false,
55+
fileNodeTree: pathtree.NewNode[fileNode](),
5556
},
5657
},
5758
{
@@ -77,7 +78,7 @@ func TestConvertV1Layer(t *testing.T) {
7778
if tc.wantError != nil && gotError == tc.wantError {
7879
t.Errorf("convertV1Layer(%v, %v, %v) returned error: %v, want error: %v", tc.v1Layer, tc.command, tc.isEmpty, gotError, tc.wantError)
7980
}
80-
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{})); tc.wantLayer != nil && diff != "" {
81+
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{}, pathtree.Node[fileNode]{})); tc.wantLayer != nil && diff != "" {
8182
t.Errorf("convertV1Layer(%v, %v, %v) returned layer: %v, want layer: %v", tc.v1Layer, tc.command, tc.isEmpty, gotLayer, tc.wantLayer)
8283
}
8384
})

artifact/image/layerscanning/testing/fakelayer/fake_layer.go

+50-3
Original file line numberDiff line numberDiff line change
@@ -19,28 +19,48 @@ package fakelayer
1919
import (
2020
"fmt"
2121
"io"
22+
"io/fs"
23+
"os"
24+
"path"
25+
"path/filepath"
2226

2327
scalibrfs "github.com/google/osv-scalibr/fs"
2428
"github.com/opencontainers/go-digest"
2529
)
2630

2731
// FakeLayer is a fake implementation of the image.Layer interface for testing purposes.
2832
type FakeLayer struct {
33+
testDir string
2934
diffID digest.Digest
3035
buildCommand string
36+
files map[string]string
3137
}
3238

3339
// New creates a new FakeLayer.
34-
func New(diffID digest.Digest, buildCommand string) *FakeLayer {
40+
func New(testDir string, diffID digest.Digest, buildCommand string, files map[string]string, filesAlreadyExist bool) (*FakeLayer, error) {
41+
if !filesAlreadyExist {
42+
for name, contents := range files {
43+
filename := filepath.Join(testDir, name)
44+
if err := os.MkdirAll(filepath.Dir(filename), 0700); err != nil {
45+
return nil, err
46+
}
47+
48+
if err := os.WriteFile(filename, []byte(contents), 0600); err != nil {
49+
return nil, err
50+
}
51+
}
52+
}
3553
return &FakeLayer{
54+
testDir: testDir,
3655
diffID: diffID,
3756
buildCommand: buildCommand,
38-
}
57+
files: files,
58+
}, nil
3959
}
4060

4161
// FS is not currently used for the purposes of layer scanning, thus a nil value is returned.
4262
func (fakeLayer *FakeLayer) FS() scalibrfs.FS {
43-
return nil
63+
return fakeLayer
4464
}
4565

4666
// DiffID returns the diffID of the layer.
@@ -62,3 +82,30 @@ func (fakeLayer *FakeLayer) IsEmpty() bool {
6282
func (fakeLayer *FakeLayer) Uncompressed() (io.ReadCloser, error) {
6383
return nil, fmt.Errorf("not implemented")
6484
}
85+
86+
// -------------------------------------------------------------------------------------------------
87+
// scalibrfs.FS implementation
88+
// -------------------------------------------------------------------------------------------------
89+
90+
// Open returns a file if it exists in the files map.
91+
func (fakeLayer *FakeLayer) Open(name string) (fs.File, error) {
92+
if _, ok := fakeLayer.files[name]; ok {
93+
filename := filepath.Join(fakeLayer.testDir, name)
94+
return os.Open(filename)
95+
}
96+
return nil, os.ErrNotExist
97+
}
98+
99+
// Stat returns the file info of a file if it exists in the files map.
100+
func (fakeLayer *FakeLayer) Stat(name string) (fs.FileInfo, error) {
101+
if _, ok := fakeLayer.files[name]; ok {
102+
return os.Stat(path.Join(fakeLayer.testDir, name))
103+
}
104+
return nil, os.ErrNotExist
105+
}
106+
107+
// ReadDir is not used in the trace package since individual files are opened instead of
108+
// directories.
109+
func (fakeLayer *FakeLayer) ReadDir(name string) ([]fs.DirEntry, error) {
110+
return nil, fmt.Errorf("not implemented")
111+
}

artifact/image/layerscanning/trace/trace.go

+45-11
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package trace
1818
import (
1919
"context"
2020
"errors"
21+
"fmt"
2122
"io/fs"
2223
"slices"
2324
"sort"
@@ -54,7 +55,7 @@ type locationAndIndex struct {
5455
// Note that a precondition of this algorithm is that the chain layers are ordered by order of
5556
// creation.
5657
func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
57-
chainLayerDetailsList := []*extractor.LayerDetails{}
58+
layerDetailsList := []*extractor.LayerDetails{}
5859

5960
// Create list of layer details struct to be referenced by inventory.
6061
for i, chainLayer := range chainLayers {
@@ -65,7 +66,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
6566
diffID = chainLayer.Layer().DiffID().Encoded()
6667
}
6768

68-
chainLayerDetailsList = append(chainLayerDetailsList, &extractor.LayerDetails{
69+
layerDetailsList = append(layerDetailsList, &extractor.LayerDetails{
6970
Index: i,
7071
DiffID: diffID,
7172
Command: chainLayer.Layer().Command(),
@@ -90,7 +91,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
9091
lastLayerIndex := len(chainLayers) - 1
9192

9293
for _, inv := range inventory {
93-
layerDetails := chainLayerDetailsList[lastLayerIndex]
94+
layerDetails := layerDetailsList[lastLayerIndex]
9495
invExtractor, isFilesystemExtractor := inv.Extractor.(filesystem.Extractor)
9596

9697
// Only filesystem extractors are supported for layer scanning. Also, if the inventory has no
@@ -101,33 +102,39 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
101102
}
102103

103104
var foundOrigin bool
105+
fileLocation := inv.Locations[0]
104106

105107
// Go backwards through the chain layers and find the first layer where the inventory is not
106108
// present. Such layer is the layer in which the inventory was introduced. If the inventory is
107109
// present in all layers, then it means it was introduced in the first layer.
108-
// TODO: b/381249869 - Optimization: Skip layers if file not found.
109110
for i := len(chainLayers) - 2; i >= 0; i-- {
110111
oldChainLayer := chainLayers[i]
111112

112113
invLocationAndIndex := locationAndIndex{
113-
location: inv.Locations[0],
114+
location: fileLocation,
114115
index: i,
115116
}
116117

118+
// If the file of interest is not present in the underlying layer, then there will be no
119+
// difference in the extracted inventory from oldChainLayer, so extraction can be skipped in
120+
// the chain layer. This is an optimization to avoid extracting the same inventory multiple
121+
// times.
122+
if !isFileInLayerDiff(oldChainLayer, fileLocation) {
123+
continue
124+
}
125+
117126
var oldInventory []*extractor.Inventory
118127
if cachedInventory, ok := locationIndexToInventory[invLocationAndIndex]; ok {
119128
oldInventory = cachedInventory
120129
} else {
121130
// Check if file still exist in this layer, if not skip extraction.
122131
// This is both an optimization, and avoids polluting the log output with false file not found errors.
123-
if _, err := oldChainLayer.FS().Stat(inv.Locations[0]); errors.Is(err, fs.ErrNotExist) {
124-
oldInventory = []*extractor.Inventory{}
125-
} else {
132+
if _, err := oldChainLayer.FS().Stat(fileLocation); err == nil {
126133
// Update the extractor config to use the files from the current layer.
127134
// We only take extract the first location because other locations are derived from the initial
128135
// extraction location. If other locations can no longer be determined from the first location
129136
// they should not be included here, and the trace for those packages stops here.
130-
updateExtractorConfig([]string{inv.Locations[0]}, invExtractor, oldChainLayer.FS())
137+
updateExtractorConfig([]string{fileLocation}, invExtractor, oldChainLayer.FS())
131138

132139
var err error
133140
oldInventory, _, err = filesystem.Run(ctx, config)
@@ -150,7 +157,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
150157

151158
// If the inventory is not present in the old layer, then it was introduced in layer i+1.
152159
if !foundPackage {
153-
layerDetails = chainLayerDetailsList[i+1]
160+
layerDetails = layerDetailsList[i+1]
154161
foundOrigin = true
155162
break
156163
}
@@ -159,7 +166,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
159166
// If the inventory is present in every layer, then it means it was introduced in the first
160167
// layer.
161168
if !foundOrigin {
162-
layerDetails = chainLayerDetailsList[0]
169+
layerDetails = layerDetailsList[0]
163170
}
164171
inv.LayerDetails = layerDetails
165172
}
@@ -192,3 +199,30 @@ func areInventoriesEqual(inv1 *extractor.Inventory, inv2 *extractor.Inventory) b
192199
}
193200
return true
194201
}
202+
203+
// getSingleLayerFSFromChainLayer returns the filesystem of the underlying layer in the chain layer.
204+
func getLayerFSFromChainLayer(chainLayer scalibrImage.ChainLayer) (scalibrfs.FS, error) {
205+
layer := chainLayer.Layer()
206+
if layer == nil {
207+
return nil, fmt.Errorf("chain layer has no layer")
208+
}
209+
210+
fs := layer.FS()
211+
if fs == nil {
212+
return nil, fmt.Errorf("layer has no filesystem")
213+
}
214+
215+
return fs, nil
216+
}
217+
218+
// isFileInLayerDiff checks if a file is present in the underlying layer of a chain layer.
219+
func isFileInLayerDiff(chainLayer scalibrImage.ChainLayer, fileLocation string) bool {
220+
layerFS, err := getLayerFSFromChainLayer(chainLayer)
221+
if err != nil {
222+
return false
223+
}
224+
if _, err := layerFS.Stat(fileLocation); errors.Is(err, fs.ErrNotExist) {
225+
return true
226+
}
227+
return false
228+
}

0 commit comments

Comments
 (0)