Skip to content

Move inventories + findings under a top-level struct inside ScanResults. #523

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions artifact/image/layerscanning/testing/fakelayerbuilder/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/inventory"
"github.com/google/osv-scalibr/plugin"
"github.com/google/osv-scalibr/purl"
)
Expand All @@ -46,43 +47,43 @@ func (e FakeTestLayersExtractor) FileRequired(_ filesystem.FileAPI) bool {
}

// Extract extracts packages from yarn.lock files passed through the scan input.
func (e FakeTestLayersExtractor) Extract(_ context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) {
func (e FakeTestLayersExtractor) Extract(_ context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
scanner := bufio.NewScanner(input.Reader)
invs := []*extractor.Inventory{}
pkgs := []*extractor.Package{}

for scanner.Scan() {
pkgline := scanner.Text()
// If no version found, just return "" as version
pkg, version, _ := strings.Cut(pkgline, "@")

invs = append(invs, &extractor.Inventory{
pkgs = append(pkgs, &extractor.Package{
Name: pkg,
Version: version,
Locations: []string{input.Path},
})
}

if err := scanner.Err(); err != nil {
return nil, err
return inventory.Inventory{}, err
}

return invs, nil
return inventory.Inventory{Packages: pkgs}, nil
}

// ToPURL always returns nil
func (e FakeTestLayersExtractor) ToPURL(i *extractor.Inventory) *purl.PackageURL {
func (e FakeTestLayersExtractor) ToPURL(p *extractor.Package) *purl.PackageURL {
return &purl.PackageURL{
Type: purl.TypeGeneric,
Name: i.Name,
Version: i.Version,
Name: p.Name,
Version: p.Version,
}
}

// ToCPEs is not applicable as this extractor does not infer CPEs from the Inventory.
func (e FakeTestLayersExtractor) ToCPEs(_ *extractor.Inventory) []string { return []string{} }
func (e FakeTestLayersExtractor) ToCPEs(_ *extractor.Package) []string { return []string{} }

// Ecosystem returns no ecosystem as this is a mock for testing
func (e FakeTestLayersExtractor) Ecosystem(i *extractor.Inventory) string {
func (e FakeTestLayersExtractor) Ecosystem(p *extractor.Package) string {
return ""
}

Expand Down
79 changes: 40 additions & 39 deletions artifact/image/layerscanning/trace/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/inventory"
"github.com/google/osv-scalibr/log"

scalibrImage "github.com/google/osv-scalibr/artifact/image"
Expand Down Expand Up @@ -53,7 +54,7 @@ type locationAndIndex struct {
//
// Note that a precondition of this algorithm is that the chain layers are ordered by order of
// creation.
func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
func PopulateLayerDetails(ctx context.Context, inventory inventory.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
// If there are no chain layers, then there is nothing to trace. This should not happen, but we
// should handle it gracefully.
if len(chainLayers) == 0 {
Expand Down Expand Up @@ -94,94 +95,94 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
}
}

// locationIndexToInventory is used as an inventory cache to avoid re-extracting the same
// inventory from a file multiple times.
locationIndexToInventory := map[locationAndIndex][]*extractor.Inventory{}
// locationIndexToPackages is used as a package cache to avoid re-extracting the same
// package from a file multiple times.
locationIndexToPackages := map[locationAndIndex][]*extractor.Package{}
lastLayerIndex := len(chainLayers) - 1

for _, inv := range inventory {
for _, pkg := range inventory.Packages {
layerDetails := chainLayerDetailsList[lastLayerIndex]
invExtractor, isFilesystemExtractor := inv.Extractor.(filesystem.Extractor)
pkgExtractor, isFilesystemExtractor := pkg.Extractor.(filesystem.Extractor)

// Only filesystem extractors are supported for layer scanning. Also, if the inventory has no
// Only filesystem extractors are supported for layer scanning. Also, if the package has no
// locations, it cannot be traced.
isInventoryTraceable := isFilesystemExtractor && len(inv.Locations) > 0
if !isInventoryTraceable {
isPackageTraceable := isFilesystemExtractor && len(pkg.Locations) > 0
if !isPackageTraceable {
continue
}

var invPURL string
if inv.Extractor != nil {
invPURL = inv.Extractor.ToPURL(inv).String()
var pkgPURL string
if pkg.Extractor != nil {
pkgPURL = pkg.Extractor.ToPURL(pkg).String()
}

var foundOrigin bool
fileLocation := inv.Locations[0]
fileLocation := pkg.Locations[0]
lastScannedLayerIndex := len(chainLayers) - 1

// Go backwards through the chain layers and find the first layer where the inventory is not
// present. Such layer is the layer in which the inventory was introduced. If the inventory is
// Go backwards through the chain layers and find the first layer where the package is not
// present. Such layer is the layer in which the package was introduced. If the package is
// present in all layers, then it means it was introduced in the first layer.
for i := len(chainLayers) - 2; i >= 0; i-- {
oldChainLayer := chainLayers[i]

invLocationAndIndex := locationAndIndex{
pkgLocationAndIndex := locationAndIndex{
location: fileLocation,
index: i,
}

var oldInventory []*extractor.Inventory
if cachedInventory, ok := locationIndexToInventory[invLocationAndIndex]; ok {
oldInventory = cachedInventory
var oldPackages []*extractor.Package
if cachedPackages, ok := locationIndexToPackages[pkgLocationAndIndex]; ok {
oldPackages = cachedPackages
} else if _, err := oldChainLayer.FS().Stat(fileLocation); errors.Is(err, fs.ErrNotExist) {
// Check if file still exist in this layer, if not skip extraction.
// This is both an optimization, and avoids polluting the log output with false file not found errors.
oldInventory = []*extractor.Inventory{}
} else if filesExistInLayer(oldChainLayer, inv.Locations) {
oldPackages = []*extractor.Package{}
} else if filesExistInLayer(oldChainLayer, pkg.Locations) {
// Update the extractor config to use the files from the current layer.
// We only take extract the first location because other locations are derived from the initial
// extraction location. If other locations can no longer be determined from the first location
// they should not be included here, and the trace for those packages stops here.
updateExtractorConfig([]string{fileLocation}, invExtractor, oldChainLayer.FS())
updateExtractorConfig([]string{fileLocation}, pkgExtractor, oldChainLayer.FS())

var err error
// Runs SCALIBR extraction on the file of interest in oldChainLayer.
oldInventory, _, err = filesystem.Run(ctx, config)
oldInv, _, err := filesystem.Run(ctx, config)
oldPackages = oldInv.Packages
if err != nil {
break
}
} else {
// If none of the files from the inventory are present in the underlying layer, then there
// will be no difference in the extracted inventory from oldChainLayer, so extraction can be
// skipped in the chain layer. This is an optimization to avoid extracting the same inventory
// If none of the files from the packages are present in the underlying layer, then there
// will be no difference in the extracted packages from oldChainLayer, so extraction can be
// skipped in the chain layer. This is an optimization to avoid extracting the same package
// multiple times.
continue
}

// Cache the inventory for future use.
locationIndexToInventory[invLocationAndIndex] = oldInventory
// Cache the packages for future use.
locationIndexToPackages[pkgLocationAndIndex] = oldPackages

foundPackage := false
for _, oldInv := range oldInventory {
if oldInv.Extractor == nil {
for _, oldPKG := range oldPackages {
if oldPKG.Extractor == nil {
continue
}

// PURLs are being used as a package key, so if they are different, skip this inventory.
oldInvPURL := oldInv.Extractor.ToPURL(oldInv).String()
if oldInvPURL != invPURL {
// PURLs are being used as a package key, so if they are different, skip this package.
oldPKGPURL := oldPKG.Extractor.ToPURL(oldPKG).String()
if oldPKGPURL != pkgPURL {
continue
}

if !areLocationsEqual(oldInv.Locations, inv.Locations) {
if !areLocationsEqual(oldPKG.Locations, pkg.Locations) {
continue
}

foundPackage = true
break
}

// If the inventory is not present in the old layer, then it was introduced in the previous layer we actually scanned
// If the package is not present in the old layer, then it was introduced in the previous layer we actually scanned
if !foundPackage {
layerDetails = chainLayerDetailsList[lastScannedLayerIndex]
foundOrigin = true
Expand All @@ -192,16 +193,16 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
lastScannedLayerIndex = i
}

// If the inventory is present in every layer, then it means it was introduced in the first
// If the package is present in every layer, then it means it was introduced in the first
// layer.
if !foundOrigin {
layerDetails = chainLayerDetailsList[0]
}
inv.LayerDetails = layerDetails
pkg.LayerDetails = layerDetails
}
}

// areLocationsEqual checks if the inventory location strings are equal.
// areLocationsEqual checks if the package location strings are equal.
func areLocationsEqual(fileLocations []string, otherFileLocations []string) bool {
if len(fileLocations) == 0 || len(otherFileLocations) == 0 {
log.Warnf("Empty file locations found. This should not happen.")
Expand Down
Loading
Loading