-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathtrace.go
232 lines (200 loc) · 7.78 KB
/
trace.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package trace provides functionality to trace the origin of a package in a container image.
package trace
import (
"context"
"errors"
"fmt"
"io/fs"
"slices"
"sort"
"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem"
scalibrImage "github.com/google/osv-scalibr/artifact/image"
scalibrfs "github.com/google/osv-scalibr/fs"
)
// locationAndIndex is a struct to represent a location and the index of the layer it was found in.
type locationAndIndex struct {
location string
index int
}
// PopulateLayerDetails populates the LayerDetails field of the package with the origin details
// obtained by tracing the package in the image.
//
// It does this by looking at each consecutive pair (n, n+1) of chain layers in reverse order and
// checking if a package is present in layer n+1, but not layer n. For example, consider the chain
// layers, each with a different set of packages:
//
// Chain Layer 0: Packages A, B
// Chain Layer 1: Packages A
// Chain Layer 2: Packages A, B, C
//
// Then the origin of package C is layer 2, because it is not present in layer 1, but it is in
// layer 2. Even though package B is present in layer 0, it is attributed to layer 2 because it
// exists in layer 2, but not in layer 1. Package A is attributed to layer 0 because it is present
// in all layers.
//
// Note that a precondition of this algorithm is that the chain layers are ordered by order of
// creation.
func PopulateLayerDetails(ctx context.Context, pkgs []*extractor.Package, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
chainLayerDetailsList := []*extractor.LayerDetails{}
// Create list of layer details struct to be referenced by package.
for i, chainLayer := range chainLayers {
var diffID string
if chainLayer.Layer().IsEmpty() {
diffID = ""
} else {
diffID = chainLayer.Layer().DiffID().Encoded()
}
chainLayerDetailsList = append(chainLayerDetailsList, &extractor.LayerDetails{
Index: i,
DiffID: diffID,
Command: chainLayer.Layer().Command(),
InBaseImage: false,
})
}
// Helper function to update the extractor config.
updateExtractorConfig := func(filesToExtract []string, extractor filesystem.Extractor, chainFS scalibrfs.FS) {
config.Extractors = []filesystem.Extractor{extractor}
config.FilesToExtract = filesToExtract
config.ScanRoots = []*scalibrfs.ScanRoot{
&scalibrfs.ScanRoot{
FS: chainFS,
},
}
}
// locationIndexToPackage is used as a package cache to avoid re-extracting the same
// package from a file multiple times.
locationIndexToPackage := map[locationAndIndex][]*extractor.Package{}
lastLayerIndex := len(chainLayers) - 1
for _, pkg := range pkgs {
layerDetails := chainLayerDetailsList[lastLayerIndex]
pkgExtractor, isFilesystemExtractor := pkg.Extractor.(filesystem.Extractor)
// Only filesystem extractors are supported for layer scanning. Also, if the package has no
// locations, it cannot be traced.
isPackageTraceable := isFilesystemExtractor && len(pkg.Locations) > 0
if !isPackageTraceable {
continue
}
var foundOrigin bool
fileLocation := pkg.Locations[0]
// Go backwards through the chain layers and find the first layer where the package is not
// present. Such layer is the layer in which the package was introduced. If the package is
// present in all layers, then it means it was introduced in the first layer.
for i := len(chainLayers) - 2; i >= 0; i-- {
oldChainLayer := chainLayers[i]
pkgLocationAndIndex := locationAndIndex{
location: fileLocation,
index: i,
}
var oldPackages []*extractor.Package
if cachedPackage, ok := locationIndexToPackage[pkgLocationAndIndex]; ok {
oldPackages = cachedPackage
} else if _, err := oldChainLayer.FS().Stat(fileLocation); errors.Is(err, fs.ErrNotExist) {
// Check if file still exist in this layer, if not skip extraction.
// This is both an optimization, and avoids polluting the log output with false file not found errors.
oldPackages = []*extractor.Package{}
} else if filesExistInLayer(oldChainLayer, pkg.Locations) {
// Update the extractor config to use the files from the current layer.
// We only take extract the first location because other locations are derived from the initial
// extraction location. If other locations can no longer be determined from the first location
// they should not be included here, and the trace for those packages stops here.
updateExtractorConfig([]string{fileLocation}, pkgExtractor, oldChainLayer.FS())
var err error
// Runs SCALIBR extraction on the file of interest in oldChainLayer.
oldPackages, _, err = filesystem.Run(ctx, config)
if err != nil {
break
}
} else {
// If none of the files from the packages are present in the underlying layer, then there
// will be no difference in the extracted packages from oldChainLayer, so extraction can be
// skipped in the chain layer. This is an optimization to avoid extracting the same package
// multiple times.
continue
}
// Cache the packages for future use.
locationIndexToPackage[pkgLocationAndIndex] = oldPackages
foundPackage := false
for _, oldPKG := range oldPackages {
if arePackagesEqual(pkg, oldPKG) {
foundPackage = true
break
}
}
// If the package is not present in the old layer, then it was introduced in layer i+1.
if !foundPackage {
layerDetails = chainLayerDetailsList[i+1]
foundOrigin = true
break
}
}
// If the package is present in every layer, then it means it was introduced in the first
// layer.
if !foundOrigin {
layerDetails = chainLayerDetailsList[0]
}
pkg.LayerDetails = layerDetails
}
}
// arePackagesEqual checks if two packages are equal. It does this by comparing the PURLs and
// the locations of the packages.
func arePackagesEqual(pkg1 *extractor.Package, pkg2 *extractor.Package) bool {
if pkg1.Extractor == nil || pkg2.Extractor == nil {
return false
}
// Check if the PURLs are equal.
purl1 := pkg1.Extractor.ToPURL(pkg1)
purl2 := pkg2.Extractor.ToPURL(pkg2)
if purl1.String() != purl2.String() {
return false
}
// Check if the locations are equal.
locations1 := pkg1.Locations[:]
sort.Strings(locations1)
locations2 := pkg2.Locations[:]
sort.Strings(locations2)
if !slices.Equal(locations1, locations2) {
return false
}
return true
}
// getSingleLayerFSFromChainLayer returns the filesystem of the underlying layer in the chain layer.
func getLayerFSFromChainLayer(chainLayer scalibrImage.ChainLayer) (scalibrfs.FS, error) {
layer := chainLayer.Layer()
if layer == nil {
return nil, fmt.Errorf("chain layer has no layer")
}
fs := layer.FS()
if fs == nil {
return nil, fmt.Errorf("layer has no filesystem")
}
return fs, nil
}
// filesExistInLayer checks if any of the provided files are present in the underlying layer of the
// chain layer.
func filesExistInLayer(chainLayer scalibrImage.ChainLayer, fileLocations []string) bool {
layerFS, err := getLayerFSFromChainLayer(chainLayer)
if err != nil {
return false
}
// Check if any of the files are present in the underlying layer.
for _, fileLocation := range fileLocations {
if _, err := layerFS.Stat(fileLocation); err == nil {
return true
}
}
return false
}