Skip to content

Commit 3ee976e

Browse files
authored
feat: Add artifact extractors (google#1531)
- Adds the python wheel.egg and java jar archive extractors. - renames artifacts source from lockfile to artifact, so output will actually be "artifact:<path to binary>". - Reenable some of the image scanning tests accidentally removed from a previous PR. The unrelated snapshot changes will be from that. This PR is not ready to be merged in yet, waiting on a osv-scalibr change to be merged in first. (google/osv-scalibr#407)
1 parent 9d55ac1 commit 3ee976e

File tree

17 files changed

+729
-4
lines changed

17 files changed

+729
-4
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
.idea/
44
/dist/
55
/osv-scanner
6+
/temp
67
/coverage.out
78
/coverage.html
89
*.tar

cmd/osv-scanner/__snapshots__/main_test.snap

+447
Large diffs are not rendered by default.

cmd/osv-scanner/main_test.go

+86
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package main
33

44
import (
55
"bytes"
6+
"errors"
67
"os"
78
"path/filepath"
89
"reflect"
@@ -817,6 +818,91 @@ func TestRun_Docker(t *testing.T) {
817818
}
818819
}
819820

821+
func TestRun_OCIImage(t *testing.T) {
822+
t.Parallel()
823+
824+
testutility.SkipIfNotAcceptanceTesting(t, "Not consistent on MacOS/Windows")
825+
826+
tests := []cliTestCase{
827+
{
828+
name: "Invalid path",
829+
args: []string{"", "scan", "image", "--archive", "./fixtures/oci-image/no-file-here.tar"},
830+
exit: 127,
831+
},
832+
{
833+
name: "Alpine 3.10 image tar with 3.18 version file",
834+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-alpine.tar"},
835+
exit: 1,
836+
},
837+
{
838+
name: "Scanning python image with some packages",
839+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-python-full.tar"},
840+
exit: 1,
841+
},
842+
{
843+
name: "Scanning python image with no packages",
844+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-python-empty.tar"},
845+
exit: 1,
846+
},
847+
{
848+
name: "Scanning java image with some packages",
849+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-java-full.tar"},
850+
exit: 1,
851+
},
852+
{
853+
name: "scanning node_modules using npm with no packages",
854+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-npm-empty.tar"},
855+
exit: 1,
856+
},
857+
{
858+
name: "scanning node_modules using npm with some packages",
859+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-npm-full.tar"},
860+
exit: 1,
861+
},
862+
{
863+
name: "scanning node_modules using yarn with no packages",
864+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-yarn-empty.tar"},
865+
exit: 1,
866+
},
867+
{
868+
name: "scanning node_modules using yarn with some packages",
869+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-yarn-full.tar"},
870+
exit: 1,
871+
},
872+
{
873+
name: "scanning node_modules using pnpm with no packages",
874+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-pnpm-empty.tar"},
875+
exit: 1,
876+
},
877+
{
878+
name: "scanning node_modules using pnpm with some packages",
879+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-pnpm-full.tar"},
880+
exit: 1,
881+
},
882+
{
883+
name: "scanning image with go binary",
884+
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-package-tracing.tar"},
885+
exit: 1,
886+
},
887+
}
888+
for _, tt := range tests {
889+
t.Run(tt.name, func(t *testing.T) {
890+
t.Parallel()
891+
892+
// point out that we need the images to be built and saved separately
893+
for _, arg := range tt.args {
894+
if strings.HasPrefix(arg, "../../internal/image/fixtures/") && strings.HasSuffix(arg, ".tar") {
895+
if _, err := os.Stat(arg); errors.Is(err, os.ErrNotExist) {
896+
t.Fatalf("%s does not exist - have you run scripts/build_test_images.sh?", arg)
897+
}
898+
}
899+
}
900+
901+
testCli(t, tt)
902+
})
903+
}
904+
}
905+
820906
// Tests all subcommands here.
821907
func TestRun_SubCommands(t *testing.T) {
822908
t.Parallel()

go.mod

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ require (
1717
github.com/go-git/go-billy/v5 v5.6.2
1818
github.com/go-git/go-git/v5 v5.13.1
1919
github.com/google/go-cmp v0.6.0
20-
github.com/google/osv-scalibr v0.1.6-0.20250123155336-85f39dea4c05
20+
github.com/google/osv-scalibr v0.1.6-0.20250128013153-34aef7c77adf
2121
github.com/ianlancetaylor/demangle v0.0.0-20240912202439-0a2b6291aafd
2222
github.com/jedib0t/go-pretty/v6 v6.6.5
2323
github.com/muesli/reflow v0.3.0
@@ -104,6 +104,7 @@ require (
104104
github.com/mattn/go-runewidth v0.0.16 // indirect
105105
github.com/mattn/go-sqlite3 v1.14.22 // indirect
106106
github.com/microcosm-cc/bluemonday v1.0.27 // indirect
107+
github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b // indirect
107108
github.com/mitchellh/go-homedir v1.1.0 // indirect
108109
github.com/moby/locker v1.0.1 // indirect
109110
github.com/moby/sys/mountinfo v0.6.2 // indirect

go.sum

+4
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ github.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l
184184
github.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8=
185185
github.com/google/osv-scalibr v0.1.6-0.20250123155336-85f39dea4c05 h1:47dObbqXVFPmg39yLeRWfKZYw2xR6O2BJVLmgC6Zygw=
186186
github.com/google/osv-scalibr v0.1.6-0.20250123155336-85f39dea4c05/go.mod h1:nikSO3CqGGRQY05sGgzsgf4+84p5xCmPWOiaSomkuAU=
187+
github.com/google/osv-scalibr v0.1.6-0.20250128013153-34aef7c77adf h1:s6PZEjcMocRehGjuHIFN7Chy8VlMw4XheLgLaWRx21U=
188+
github.com/google/osv-scalibr v0.1.6-0.20250128013153-34aef7c77adf/go.mod h1:jKAptk1dYWBO91ODkI5XYKDDvZEbLKQH9DSXcTtUDSw=
187189
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
188190
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
189191
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -233,6 +235,8 @@ github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o
233235
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
234236
github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
235237
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
238+
github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b h1:84JbAJpjZ8p1ttV6dpIqfe8IehWMf0i8DPSgmE9aZuA=
239+
github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b/go.mod h1:vYT9HE7WCvL64iVeZylKmCsWKfE+JZ8105iuh2Trk8g=
236240
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
237241
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
238242
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
7+
<groupId>com.mycompany.app</groupId>
8+
<artifactId>my-app</artifactId>
9+
<version>1.0-SNAPSHOT</version>
10+
11+
<name>my-app</name>
12+
<url>https://osv.dev</url>
13+
14+
<dependencies>
15+
<dependency>
16+
<groupId>org.apache.hadoop</groupId>
17+
<artifactId>hadoop-client</artifactId>
18+
<version>3.4.0</version>
19+
</dependency>
20+
<dependency>
21+
<groupId>org.apache.commons</groupId>
22+
<artifactId>commons-compress</artifactId>
23+
<version>1.21</version>
24+
</dependency>
25+
</dependencies>
26+
<build>
27+
<plugins>
28+
<plugin>
29+
<artifactId>maven-assembly-plugin</artifactId>
30+
<configuration>
31+
<archive>
32+
<manifest>
33+
<mainClass>com.mycompany.app.App</mainClass>
34+
</manifest>
35+
</archive>
36+
<descriptorRefs>
37+
<descriptorRef>jar-with-dependencies</descriptorRef>
38+
</descriptorRefs>
39+
</configuration>
40+
<executions>
41+
<execution>
42+
<id>make-assembly</id>
43+
<phase>package</phase>
44+
<goals>
45+
<goal>single</goal>
46+
</goals>
47+
</execution>
48+
</executions>
49+
</plugin>
50+
</plugins>
51+
</build>
52+
53+
<properties>
54+
<maven.compiler.source>1.8</maven.compiler.source>
55+
<maven.compiler.target>1.8</maven.compiler.target>
56+
</properties>
57+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.mycompany.app;
2+
3+
/**
4+
* Hello world!
5+
*
6+
*/
7+
public class App
8+
{
9+
public static void main( String[] args )
10+
{
11+
System.out.println( "Hello World!" );
12+
}
13+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
def main():
2+
return 'Hello, World!'
3+
4+
if __name__ == '__main__':
5+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
flask==0.12.2 # Vulnerable to CVE-2019-1010083
2+
django==1.11.29 # Vulnerable to CVE-2021-35042
3+
requests==2.20.0 # Vulnerable to CVE-2018-18074
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Use the official OpenJDK image as the base image
2+
# TODO: This has been deprecated and we might want to switch to another image
3+
FROM openjdk:25-jdk-slim@sha256:34f10f3a1a5b638184ebd1c5c1b4aa4c49616ae3e5c1e845f0ac18c5332b5c6f
4+
5+
RUN apt update && apt install -y maven
6+
7+
# Set the working directory inside the container
8+
WORKDIR /app
9+
10+
# Copy the project files into the container
11+
COPY ./java-fixture/app .
12+
13+
# Download dependencies with maven
14+
RUN mvn clean package
15+
16+
FROM alpine:3.21@sha256:56fa17d2a7e7f168a043a2712e63aed1f8543aeafdcee47c58dcffe38ed51099
17+
18+
RUN apk update && apk add openjdk21-jre
19+
20+
WORKDIR /app
21+
22+
COPY --from=0 /app/target/my-app-1.0-SNAPSHOT-jar-with-dependencies.jar target.jar
23+
24+
# Set the entry point to run the JAR file
25+
ENTRYPOINT ["java", "-jar", "target.jar"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Use the official Debian image as the base
2+
FROM python:3.9-slim-buster@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990
3+
4+
# Set the working directory in the container
5+
WORKDIR /app
6+
7+
# Copy the rest of the application code into the container
8+
COPY python-fixture/main.py main.py
9+
10+
# Specify the command to run when the container starts
11+
CMD ["python", "main.py"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Use the official Debian image as the base
2+
FROM python:3.9-slim-buster@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990
3+
4+
# Set the working directory in the container
5+
WORKDIR /app
6+
7+
# Copy the requirements file into the container
8+
COPY ./python-fixture/requirements.txt .
9+
10+
# Install the Python dependencies
11+
RUN pip install --no-cache-dir -r requirements.txt
12+
13+
# Copy the rest of the application code into the container
14+
COPY python-fixture/main.py main.py
15+
16+
# Specify the command to run when the container starts
17+
CMD ["python", "main.py"]

internal/imodels/imodels.go

+35
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,20 @@ package imodels
22

33
import (
44
"log"
5+
"strings"
56

67
"github.com/google/osv-scalibr/extractor"
8+
"github.com/google/osv-scalibr/extractor/filesystem/language/golang/gobinary"
9+
"github.com/google/osv-scalibr/extractor/filesystem/language/java/archive"
10+
"github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg"
711
"github.com/google/osv-scalibr/extractor/filesystem/os/apk"
812
"github.com/google/osv-scalibr/extractor/filesystem/os/dpkg"
913
"github.com/google/osv-scalibr/extractor/filesystem/os/rpm"
1014
"github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx"
1115
"github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx"
16+
"github.com/google/osv-scanner/internal/cachedregexp"
1217
"github.com/google/osv-scanner/internal/imodels/ecosystem"
18+
"github.com/google/osv-scanner/internal/scalibrextract/language/javascript/nodemodules"
1319
"github.com/google/osv-scanner/internal/scalibrextract/vcs/gitrepo"
1420
"github.com/google/osv-scanner/pkg/models"
1521
"github.com/ossf/osv-schema/bindings/go/osvschema"
@@ -32,6 +38,13 @@ var osExtractors = map[string]struct{}{
3238
rpm.Extractor{}.Name(): {},
3339
}
3440

41+
var artifactExtractors = map[string]struct{}{
42+
nodemodules.Extractor{}.Name(): {},
43+
gobinary.Extractor{}.Name(): {},
44+
archive.Extractor{}.Name(): {},
45+
wheelegg.Extractor{}.Name(): {},
46+
}
47+
3548
// PackageInfo provides getter functions for commonly used fields of inventory
3649
// and applies transformations when required for use in osv-scanner
3750
type PackageInfo struct {
@@ -47,10 +60,29 @@ func (pkg *PackageInfo) Name() string {
4760
return pkg.purlCache.Name
4861
}
4962

63+
// --- Make specific patches to names as necessary ---
64+
// Patch Go package to stdlib
5065
if pkg.Ecosystem().Ecosystem == osvschema.EcosystemGo && pkg.Inventory.Name == "go" {
5166
return "stdlib"
5267
}
5368

69+
// TODO: Move the normalization to another where matching logic happens.
70+
// Patch python package names to be normalized
71+
if pkg.Ecosystem().Ecosystem == osvschema.EcosystemPyPI {
72+
// per https://peps.python.org/pep-0503/#normalized-names
73+
return strings.ToLower(cachedregexp.MustCompile(`[-_.]+`).ReplaceAllLiteralString(pkg.Inventory.Name, "-"))
74+
}
75+
76+
// Patch Maven archive extractor package names
77+
if metadata, ok := pkg.Inventory.Metadata.(*archive.Metadata); ok {
78+
// Debian uses source name on osv.dev
79+
// (fallback to using the normal name if source name is empty)
80+
if metadata.ArtifactID != "" && metadata.GroupID != "" {
81+
return metadata.GroupID + ":" + metadata.ArtifactID
82+
}
83+
}
84+
85+
// --- OS metadata ---
5486
if metadata, ok := pkg.Inventory.Metadata.(*dpkg.Metadata); ok {
5587
// Debian uses source name on osv.dev
5688
// (fallback to using the normal name if source name is empty)
@@ -124,6 +156,8 @@ func (pkg *PackageInfo) SourceType() SourceType {
124156
return SourceTypeSBOM
125157
} else if _, ok := gitExtractors[extractorName]; ok {
126158
return SourceTypeGit
159+
} else if _, ok := artifactExtractors[extractorName]; ok {
160+
return SourceTypeArtifact
127161
}
128162

129163
return SourceTypeProjectPackage
@@ -187,6 +221,7 @@ const (
187221
SourceTypeUnknown SourceType = iota
188222
SourceTypeOSPackage
189223
SourceTypeProjectPackage
224+
SourceTypeArtifact
190225
SourceTypeSBOM
191226
SourceTypeGit
192227
)

pkg/osvscanner/filter.go

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/google/osv-scanner/internal/imodels/results"
99
"github.com/google/osv-scanner/pkg/models"
1010
"github.com/google/osv-scanner/pkg/reporter"
11+
"github.com/ossf/osv-schema/bindings/go/osvschema"
1112
)
1213

1314
// filterUnscannablePackages removes packages that don't have enough information to be scanned
@@ -21,6 +22,7 @@ func filterUnscannablePackages(r reporter.Reporter, scanResults *results.ScanRes
2122
// If none of the cases match, skip this package since it's not scannable
2223
case !p.Ecosystem().IsEmpty() && p.Name() != "" && p.Version() != "":
2324
case p.Commit() != "":
25+
case p.Ecosystem().Ecosystem == osvschema.EcosystemMaven && p.Name() == "unknown":
2426
default:
2527
continue
2628
}

0 commit comments

Comments
 (0)