@@ -30,6 +30,7 @@ import (
3030
3131 "go.uber.org/multierr"
3232 "github.com/google/osv-scalibr/extractor"
33+ "github.com/google/osv-scalibr/extractor/internal/units"
3334 "github.com/google/osv-scalibr/log"
3435 "github.com/google/osv-scalibr/purl"
3536)
@@ -43,7 +44,7 @@ const (
4344 defaultMaxZipDepth = 16
4445 // defaultMaxZipBytes in the maximum number of bytes recursively read from an archive file.
4546 // If this limit is reached, the default extractor is halted and results so far are returned.
46- defaultMaxZipBytes = 4 << 30 // 4GiB
47+ defaultMaxZipBytes = 4 * units . GiB
4748 // defaultMinZipBytes is slightly larger than an empty zip file which is 22 bytes.
4849 // https://en.wikipedia.org/wiki/ZIP_(file_format)#:~:text=Viewed%20as%20an%20ASCII%20string,file%20are%20usually%20%22PK%22.
4950 defaultMinZipBytes = 30
@@ -60,7 +61,7 @@ type Config struct {
6061 MaxZipDepth int
6162 // MaxOpenedBytes is the maximum number of bytes recursively read from an archive file.
6263 // If this limit is reached, extraction is halted and results so far are returned.
63- MaxOpenedBytes int
64+ MaxOpenedBytes int64
6465 // MinZipBytes is use to ignore empty zip files during extraction.
6566 // Zip files smaller than minZipBytes are ignored.
6667 MinZipBytes int
@@ -73,7 +74,7 @@ type Config struct {
7374// Extractor extracts Java packages from archive files.
7475type Extractor struct {
7576 maxZipDepth int
76- maxOpenedBytes int
77+ maxOpenedBytes int64
7778 minZipBytes int
7879 extractFromFilename bool
7980 hashJars bool
@@ -128,12 +129,12 @@ func (e Extractor) Extract(ctx context.Context, input *extractor.ScanInput) ([]*
128129//
129130// It returns early with an error if max depth or max opened bytes is reached.
130131// Extracted packages are returned even if an error has occurred.
131- func (e Extractor ) extractWithMax (ctx context.Context , input * extractor.ScanInput , depth , openedBytes int ) ([]* extractor.Inventory , error ) {
132+ func (e Extractor ) extractWithMax (ctx context.Context , input * extractor.ScanInput , depth int , openedBytes int64 ) ([]* extractor.Inventory , error ) {
132133 // Return early if any max/min thresholds are hit.
133134 if depth > e .maxZipDepth {
134135 return nil , fmt .Errorf ("%s reached max zip depth %d at %q" , e .Name (), depth , input .Path )
135136 }
136- if oBytes := openedBytes + int ( input .Info .Size () ); oBytes > e .maxOpenedBytes {
137+ if oBytes := openedBytes + input .Info .Size (); oBytes > e .maxOpenedBytes {
137138 return nil , fmt .Errorf ("%s reached max opened bytes of %d at %q" , e .Name (), oBytes , input .Path )
138139 }
139140 if int (input .Info .Size ()) < e .minZipBytes {
@@ -151,7 +152,7 @@ func (e Extractor) extractWithMax(ctx context.Context, input *extractor.ScanInpu
151152 if err != nil {
152153 return nil , fmt .Errorf ("%s failed to read file at %q: %w" , e .Name (), input .Path , err )
153154 }
154- openedBytes += len (b )
155+ openedBytes += int64 ( len (b ) )
155156 // Check size again in case input.Info.Size() was not accurate. Return early if hit max.
156157 if openedBytes > e .maxOpenedBytes {
157158 return nil , fmt .Errorf ("%s reached max opened bytes of %d at %q" , e .Name (), openedBytes , input .Path )
0 commit comments