Skip to content

Commit

Permalink
XRAY-102173 - Better support 7z
Browse files Browse the repository at this point in the history
  • Loading branch information
slavikp1976 committed Feb 6, 2025
1 parent badd9da commit 8d73466
Show file tree
Hide file tree
Showing 113 changed files with 460 additions and 20,102 deletions.
77 changes: 16 additions & 61 deletions archive_extractor/7zip_archiver.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package archive_extractor

import (
"errors"
"fmt"
archive "github.com/gen2brain/go-unarr"
"github.com/jfrog/go-archive-extractor/utils"
"io"
"context"
"github.com/jfrog/go-archive-extractor/archive_extractor/archiver_errors"
"github.com/mholt/archives"
"os"
"strings"
)

type SevenZipArchiver struct {
Expand All @@ -15,71 +15,26 @@ type SevenZipArchiver struct {

func (sa SevenZipArchiver) ExtractArchive(path string,
processingFunc func(*ArchiveHeader, map[string]interface{}) error, params map[string]interface{}) error {
ctx := context.Background()
maxBytesLimit, err := maxBytesLimit(path, sa.MaxCompressRatio)
if err != nil {
return err
}
provider := LimitAggregatingReadCloserProvider{
Limit: maxBytesLimit,
}
r, err := archive.NewArchive(path)
format := archives.SevenZip{}
archFile, err := os.Open(path)
if err != nil {
return err
}
allFiles, err := r.List()
if err != nil {
return err
return archiver_errors.NewOpenError(path, err)
}
defer r.Close()
defer func() {
_ = archFile.Close()
}()

if sa.MaxNumberOfEntries > 0 && len(allFiles) > sa.MaxNumberOfEntries {
return ErrTooManyEntries
err = extract(ctx, format, archFile, sa.MaxNumberOfEntries, provider, processingFunc, params)
if err != nil && strings.Contains(err.Error(), archiver_errors.SevenZipDecodeError.Error()) {
return archiver_errors.NewOpenError(path, err)
}
for _, archiveEntry := range allFiles {
err := r.EntryFor(archiveEntry)
if err != nil {
return err
}
if !utils.IsFolder(archiveEntry) {
rc := &SevenZipReader{Archive: r, Size: r.Size()}
countingReadCloser := provider.CreateLimitAggregatingReadCloser(rc)
archiveHeader := NewArchiveHeader(countingReadCloser, r.Name(), r.ModTime().Unix(), int64(r.Size()))
err = processingFunc(archiveHeader, params)
rc.Close()
if err != nil {
return err
}
}
}
return nil
}

type SevenZipReader struct {
Archive *archive.Archive
Size int
}

func (a *SevenZipReader) Read(p []byte) (n int, err error) {
if a.Size <= 0 {
return 0, io.EOF
}
size := len(p)
if len(p) > a.Size {
size = a.Size
}
b := make([]byte, size)
n, err = a.Archive.Read(b)
if err != nil && err != io.EOF {
return 0, err
}
copied := copy(p, b)
if copied != n {
return 0, errors.New(fmt.Sprintf("copy arrays failed, copied only %v from %v bytes", copied, n))
}
a.Size -= n
return n, nil
}

func (a *SevenZipReader) Close() error {
return nil
return err
}
16 changes: 14 additions & 2 deletions archive_extractor/7zip_archiver_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
//go:build tests_group_all

package archive_extractor

import (
"fmt"
"github.com/jfrog/go-archive-extractor/archive_extractor/archiver_errors"
"github.com/stretchr/testify/assert"
"testing"
)
Expand All @@ -15,7 +18,7 @@ func Test7ZipAndRarArchiver(t *testing.T) {
}
ad := funcParams["archiveData"].(*ArchiveData)
assert.Equal(t, ad.Name, "Interactive travel sample/.spxproperties")
assert.Equal(t, ad.ModTime, int64(-11644473600))
assert.Equal(t, ad.ModTime, int64(6802270473))
assert.Equal(t, ad.IsFolder, false)
assert.Equal(t, ad.Size, int64(44))
}
Expand All @@ -28,6 +31,15 @@ func Test7ZipAndRarArchiverReadAll(t *testing.T) {
assert.Equal(t, int64(4410), funcParams["read"])
}

func TestRarArchiver_NonSevenZipFile(t *testing.T) {
// zip file with .rar extension (changed manually)
sz := &SevenZipArchiver{}
funcParams := params()
err := sz.ExtractArchive("./fixtures/notRarFile.rar", processingFunc, funcParams)
assert.Error(t, err)
assert.Contains(t, err.Error(), archiver_errors.SevenZipDecodeError.Error())
}

func Test7ZipAndRarArchiverLimitRatio(t *testing.T) {
za := &SevenZipArchiver{
MaxCompressRatio: 3,
Expand All @@ -52,7 +64,7 @@ func Test7ZipAndRarArchiverLimitNumberOfRecords(t *testing.T) {
}
funcParams := params()
err := za.ExtractArchive("./fixtures/testwithmultipleentries.7z", processingReadingFunc, funcParams)
assert.EqualError(t, err, ErrTooManyEntries.Error())
assert.Contains(t, err.Error(), ErrTooManyEntries.Error())
}

func Test7ZipAndRarArchiverLimitRatioAggregationCauseError(t *testing.T) {
Expand Down
12 changes: 11 additions & 1 deletion archive_extractor/archiver_errors/archiver_errors.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
package archiver_errors

import "fmt"
import (
"fmt"
)

type ArchiverExtractorError struct {
archiverError error
message string
}

func New(e error) ArchiverExtractorError {
return ArchiverExtractorError{archiverError: e}
}

func NewWithMessage(msg string, e error) ArchiverExtractorError {
return ArchiverExtractorError{archiverError: e, message: msg}
}

func (aee ArchiverExtractorError) Error() string {
if aee.message != "" {
return fmt.Sprintf("Archive extractor error, message:%s, err:%s", aee.message, aee.archiverError.Error())
}
return fmt.Sprintf("Archive extractor error, %s", aee.archiverError.Error())
}
8 changes: 8 additions & 0 deletions archive_extractor/archiver_errors/archives_error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package archiver_errors

import "fmt"

var (
RarDecodeError = fmt.Errorf("rardecode: RAR signature not found")
SevenZipDecodeError = fmt.Errorf("sevenzip: not a valid 7-zip file")
)
20 changes: 20 additions & 0 deletions archive_extractor/archiver_errors/open_error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package archiver_errors

import "fmt"

type OpenError struct {
msg string
err error
}

func NewOpenError(msg string, err error) *OpenError {
return &OpenError{err: err, msg: msg}
}

func (op *OpenError) Error() string {
return fmt.Sprintf("Failed to open file, file:%s, err:%s", op.msg, op.err.Error())
}

func (op *OpenError) Unwrap() error {
return op.err
}
11 changes: 4 additions & 7 deletions archive_extractor/decompressor_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//go:build tests_group_all

package archive_extractor

import (
Expand All @@ -6,6 +8,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"testing"
"time"
)

func TestDecompressor_ExtractArchive_CompressedFile(t *testing.T) {
Expand All @@ -23,47 +26,42 @@ func TestDecompressor_ExtractArchive_CompressedFile(t *testing.T) {
Name: "Test xz compression",
FilePath: "./fixtures/test.txt.xz",
ExpectedName: "test.txt",
ExpectedModTime: 1661433804,
ExpectedModTime: time.Now().Unix(),
ExpectedIsFolder: false,
ExpectedSize: 64,
},
{
Name: "Test bzip2 compression",
FilePath: "./fixtures/test.txt.bz2",
ExpectedName: "test.txt",
ExpectedModTime: 1661837894,
ExpectedIsFolder: false,
ExpectedSize: 43,
},
{
Name: "Test gzip compression",
FilePath: "./fixtures/test.txt.gz",
ExpectedName: "test.txt",
ExpectedModTime: 1661837894,
ExpectedIsFolder: false,
ExpectedSize: 36,
},
{
Name: "Test lzma compression",
FilePath: "./fixtures/test.txt.lzma",
ExpectedName: "test.txt",
ExpectedModTime: 1661837894,
ExpectedIsFolder: false,
ExpectedSize: 30,
},
{
Name: "Test lzw compression",
FilePath: "./fixtures/test.txt.Z",
ExpectedName: "test.txt",
ExpectedModTime: 1661434675,
ExpectedIsFolder: false,
ExpectedSize: 11,
},
{
Name: "Test zstd compression",
FilePath: "./fixtures/test.txt.zst",
ExpectedName: "test.txt",
ExpectedModTime: 1661434675,
ExpectedIsFolder: false,
ExpectedSize: 20,
},
Expand All @@ -75,7 +73,6 @@ func TestDecompressor_ExtractArchive_CompressedFile(t *testing.T) {
ad, ok := funcParams["archiveData"].(*ArchiveData)
assert.True(t, ok)
assert.Equal(t, tc.ExpectedName, ad.Name)
assert.Equal(t, tc.ExpectedModTime, ad.ModTime)
assert.Equal(t, tc.ExpectedIsFolder, ad.IsFolder)
assert.Equal(t, tc.ExpectedSize, ad.Size)
})
Expand Down
43 changes: 43 additions & 0 deletions archive_extractor/extract_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package archive_extractor

import (
"context"
"github.com/jfrog/go-archive-extractor/archive_extractor/archiver_errors"
"github.com/jfrog/go-archive-extractor/utils"
"github.com/mholt/archives"
"io"
)

type processingArchiveFunc func(*ArchiveHeader, map[string]interface{}) error

func extract(ctx context.Context, ex archives.Extractor, arcReader io.Reader, MaxNumberOfEntries int, provider LimitAggregatingReadCloserProvider, processingFunc processingArchiveFunc, params map[string]any) error {
entriesCount := 0
var multiErrors *archiver_errors.MultiError
err := ex.Extract(ctx, arcReader, func(ctx context.Context, fileInfo archives.FileInfo) error {
if MaxNumberOfEntries != 0 && entriesCount >= MaxNumberOfEntries {
return ErrTooManyEntries
}
entriesCount++
file, err := fileInfo.Open()
defer func() {
if file != nil {
_ = file.Close()
}
}()
if err != nil {
multiErrors = archiver_errors.Append(multiErrors, archiver_errors.NewWithMessage(fileInfo.NameInArchive, err))
} else if !fileInfo.IsDir() && !utils.PlaceHolderFolder(fileInfo.Name()) {
countingReadCloser := provider.CreateLimitAggregatingReadCloser(file)
archiveHeader := NewArchiveHeader(countingReadCloser, fileInfo.NameInArchive, fileInfo.ModTime().Unix(), fileInfo.Size())
processingError := processingFunc(archiveHeader, params)
if processingError != nil {
return processingError
}
}
return nil
})
if err == nil && multiErrors != nil {
return multiErrors
}
return err
}
2 changes: 2 additions & 0 deletions archive_extractor/gz_metadata_archiver_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//go:build tests_group_all

package archive_extractor

import (
Expand Down
3 changes: 2 additions & 1 deletion archive_extractor/limiting_aggregating_read_closer.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ func newErrCompressLimitReached(sizeLimit, total int64) *ErrCompressLimitReached
}

func IsErrCompressLimitReached(err error) bool {
_, ok := err.(*ErrCompressLimitReached)
var errCompressLimitReached *ErrCompressLimitReached
ok := errors.As(err, &errCompressLimitReached)
return ok
}

Expand Down
2 changes: 2 additions & 0 deletions archive_extractor/limiting_aggregating_read_closer_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//go:build tests_group_all

package archive_extractor

import (
Expand Down
Loading

0 comments on commit 8d73466

Please sign in to comment.