Create units package to make size values more readable and less error prone

vpasdf · copybara-github · commit d3a439d29437 · 2024-04-12T07:06:27.000-07:00
PiperOrigin-RevId: 624166373
diff --git a/extractor/internal/units/units.go b/extractor/internal/units/units.go
@@ -0,0 +1,31 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package units provides constants for common units.
+package units
+
+const (
+	// KiB is a Kibibyte
+	KiB = int64(1024)
+	// MiB is a Mebibyte
+	MiB = 1024 * KiB
+	// GiB is a Gibibyte
+	GiB = 1024 * MiB
+	// TiB is a Tebibyte
+	TiB = 1024 * GiB
+	// PiB is a Pebibyte
+	PiB = 1024 * TiB
+	// EiB is a Exbibyte
+	EiB = 1024 * PiB
+)
diff --git a/extractor/language/java/archive/extractor.go b/extractor/language/java/archive/extractor.go
@@ -30,6 +30,7 @@ import (
 
 	"go.uber.org/multierr"
 	"github.com/google/osv-scalibr/extractor"
+	"github.com/google/osv-scalibr/extractor/internal/units"
 	"github.com/google/osv-scalibr/log"
 	"github.com/google/osv-scalibr/purl"
 )
@@ -43,7 +44,7 @@ const (
 	defaultMaxZipDepth = 16
 	// defaultMaxZipBytes in the maximum number of bytes recursively read from an archive file.
 	// If this limit is reached, the default extractor is halted and results so far are returned.
-	defaultMaxZipBytes = 4 << 30 // 4GiB
+	defaultMaxZipBytes = 4 * units.GiB
 	// defaultMinZipBytes is slightly larger than an empty zip file which is 22 bytes.
 	// https://en.wikipedia.org/wiki/ZIP_(file_format)#:~:text=Viewed%20as%20an%20ASCII%20string,file%20are%20usually%20%22PK%22.
 	defaultMinZipBytes = 30
@@ -60,7 +61,7 @@ type Config struct {
 	MaxZipDepth int
 	// MaxOpenedBytes is the maximum number of bytes recursively read from an archive file.
 	// If this limit is reached, extraction is halted and results so far are returned.
-	MaxOpenedBytes int
+	MaxOpenedBytes int64
 	// MinZipBytes is use to ignore empty zip files during extraction.
 	// Zip files smaller than minZipBytes are ignored.
 	MinZipBytes int
@@ -73,7 +74,7 @@ type Config struct {
 // Extractor extracts Java packages from archive files.
 type Extractor struct {
 	maxZipDepth         int
-	maxOpenedBytes      int
+	maxOpenedBytes      int64
 	minZipBytes         int
 	extractFromFilename bool
 	hashJars            bool
@@ -128,12 +129,12 @@ func (e Extractor) Extract(ctx context.Context, input *extractor.ScanInput) ([]*
 //
 // It returns early with an error if max depth or max opened bytes is reached.
 // Extracted packages are returned even if an error has occurred.
-func (e Extractor) extractWithMax(ctx context.Context, input *extractor.ScanInput, depth, openedBytes int) ([]*extractor.Inventory, error) {
+func (e Extractor) extractWithMax(ctx context.Context, input *extractor.ScanInput, depth int, openedBytes int64) ([]*extractor.Inventory, error) {
 	// Return early if any max/min thresholds are hit.
 	if depth > e.maxZipDepth {
 		return nil, fmt.Errorf("%s reached max zip depth %d at %q", e.Name(), depth, input.Path)
 	}
-	if oBytes := openedBytes + int(input.Info.Size()); oBytes > e.maxOpenedBytes {
+	if oBytes := openedBytes + input.Info.Size(); oBytes > e.maxOpenedBytes {
 		return nil, fmt.Errorf("%s reached max opened bytes of %d at %q", e.Name(), oBytes, input.Path)
 	}
 	if int(input.Info.Size()) < e.minZipBytes {
@@ -151,7 +152,7 @@ func (e Extractor) extractWithMax(ctx context.Context, input *extractor.ScanInpu
 		if err != nil {
 			return nil, fmt.Errorf("%s failed to read file at %q: %w", e.Name(), input.Path, err)
 		}
-		openedBytes += len(b)
+		openedBytes += int64(len(b))
 		// Check size again in case input.Info.Size() was not accurate. Return early if hit max.
 		if openedBytes > e.maxOpenedBytes {
 			return nil, fmt.Errorf("%s reached max opened bytes of %d at %q", e.Name(), openedBytes, input.Path)
diff --git a/extractor/language/javascript/packagejson/extractor.go b/extractor/language/javascript/packagejson/extractor.go
@@ -25,6 +25,7 @@ import (
 	"strings"
 
 	"github.com/google/osv-scalibr/extractor"
+	"github.com/google/osv-scalibr/extractor/internal/units"
 	"github.com/google/osv-scalibr/log"
 	"github.com/google/osv-scalibr/purl"
 )
@@ -35,7 +36,7 @@ const (
 
 	// defaultMaxJSONSize is the maximum file size an extractor will unmarshal.
 	// If Extract gets a bigger file, it will return an error.
-	defaultMaxJSONSize = int64(100) << 20 // 100MiB
+	defaultMaxJSONSize = 100 * units.MiB
 )
 
 type packageJSON struct {
diff --git a/extractor/language/python/wheelegg/extractor.go b/extractor/language/python/wheelegg/extractor.go
@@ -29,6 +29,7 @@ import (
 	"strings"
 
 	"github.com/google/osv-scalibr/extractor"
+	"github.com/google/osv-scalibr/extractor/internal/units"
 	"github.com/google/osv-scalibr/purl"
 )
 
@@ -38,7 +39,7 @@ const (
 
 	// defaultMaxFileSize is the maximum file size an extractor will unmarshal.
 	// If Extract gets a bigger file, it will return an error.
-	defaultMaxFileSize = int64(100) << 20 // 100MiB
+	defaultMaxFileSize = 100 * units.MiB
 )
 
 // Extractor extracts python packages from wheel/egg files.
diff --git a/extractor/os/dpkg/extractor.go b/extractor/os/dpkg/extractor.go
@@ -27,6 +27,7 @@ import (
 	"strings"
 
 	"github.com/google/osv-scalibr/extractor"
+	"github.com/google/osv-scalibr/extractor/internal/units"
 	"github.com/google/osv-scalibr/extractor/os/osrelease"
 	"github.com/google/osv-scalibr/log"
 	"github.com/google/osv-scalibr/purl"
@@ -38,7 +39,7 @@ const (
 
 	// defaultMaxFileSize is the maximum file size an extractor will unmarshal.
 	// If Extract gets a bigger file, it will return an error.
-	defaultMaxFileSize = int64(100) << 20 // 100MiB
+	defaultMaxFileSize = 100 * units.MiB
 )
 
 // Config is the configuration for the Extractor.

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,7 @@ import (`
`25`	`25`	`"strings"`
`26`	`26`
`27`	`27`	`"github.com/google/osv-scalibr/extractor"`
	`28`	`+ "github.com/google/osv-scalibr/extractor/internal/units"`
`28`	`29`	`"github.com/google/osv-scalibr/log"`
`29`	`30`	`"github.com/google/osv-scalibr/purl"`
`30`	`31`	`)`
`@@ -35,7 +36,7 @@ const (`
`35`	`36`
`36`	`37`	`// defaultMaxJSONSize is the maximum file size an extractor will unmarshal.`
`37`	`38`	`// If Extract gets a bigger file, it will return an error.`
`38`		`- defaultMaxJSONSize = int64(100) << 20 // 100MiB`
	`39`	`+ defaultMaxJSONSize = 100 * units.MiB`
`39`	`40`	`)`
`40`	`41`
`41`	`42`	`type packageJSON struct {`
Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@ import (`
`29`	`29`	`"strings"`
`30`	`30`
`31`	`31`	`"github.com/google/osv-scalibr/extractor"`
	`32`	`+ "github.com/google/osv-scalibr/extractor/internal/units"`
`32`	`33`	`"github.com/google/osv-scalibr/purl"`
`33`	`34`	`)`
`34`	`35`
`@@ -38,7 +39,7 @@ const (`
`38`	`39`
`39`	`40`	`// defaultMaxFileSize is the maximum file size an extractor will unmarshal.`
`40`	`41`	`// If Extract gets a bigger file, it will return an error.`
`41`		`- defaultMaxFileSize = int64(100) << 20 // 100MiB`
	`42`	`+ defaultMaxFileSize = 100 * units.MiB`
`42`	`43`	`)`
`43`	`44`
`44`	`45`	`// Extractor extracts python packages from wheel/egg files.`