diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index c4bd936b..14c91450 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -32,12 +32,15 @@ func extractNestedArchive( if err != nil { return fmt.Errorf("failed to determine file type: %w", err) } - if ft != nil && ft.MIME == "application/zlib" { + switch { + case ft != nil && ft.MIME == "application/x-upx": isArchive = true - } - if _, ok := programkind.ArchiveMap[programkind.GetExt(f)]; ok { + case ft != nil && ft.MIME == "application/zlib": + isArchive = true + case programkind.ArchiveMap[programkind.GetExt(f)]: isArchive = true } + //nolint:nestif // ignore complexity of 8 if isArchive { // Ensure the file was extracted and exists @@ -52,11 +55,15 @@ func extractNestedArchive( if err != nil { return fmt.Errorf("failed to determine file type: %w", err) } - if ft != nil && ft.MIME == "application/zlib" { + switch { + case ft != nil && ft.MIME == "application/x-upx": + extract = ExtractUPX + case ft != nil && ft.MIME == "application/zlib": extract = ExtractZlib - } else { + default: extract = ExtractionMethod(programkind.GetExt(fullPath)) } + err = extract(ctx, d, fullPath) if err != nil { return fmt.Errorf("extract nested archive: %w", err) @@ -103,11 +110,16 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) { if err != nil { return "", fmt.Errorf("failed to determine file type: %w", err) } - if ft != nil && ft.MIME == "application/zlib" { + + switch { + case ft != nil && ft.MIME == "application/zlib": extract = ExtractZlib - } else { + case ft != nil && ft.MIME == "application/x-upx": + extract = ExtractUPX + default: extract = ExtractionMethod(programkind.GetExt(path)) } + if extract == nil { return "", fmt.Errorf("unsupported archive type: %s", path) } diff --git a/pkg/archive/upx.go b/pkg/archive/upx.go new file mode 100644 index 00000000..f4f7995d --- /dev/null +++ b/pkg/archive/upx.go @@ -0,0 +1,68 @@ +package archive + +import ( + "context" + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/chainguard-dev/clog" +) + +var ErrUPXNotFound = errors.New("UPX executable not found in PATH") + +func upxInstalled() error { + _, err := exec.LookPath("upx") + if err != nil { + if errors.Is(err, exec.ErrNotFound) { + return ErrUPXNotFound + } + return fmt.Errorf("failed to check for UPX executable: %w", err) + } + return nil +} + +func ExtractUPX(ctx context.Context, d, f string) error { + // Check if UPX is installed + if err := upxInstalled(); err != nil { + return err + } + + logger := clog.FromContext(ctx).With("dir", d, "file", f) + logger.Debug("extracting upx") + + // Check if the file is valid + _, err := os.Stat(f) + if err != nil { + return fmt.Errorf("failed to stat file: %w", err) + } + + gf, err := os.Open(f) + if err != nil { + return fmt.Errorf("failed to open file: %w", err) + } + defer gf.Close() + + base := filepath.Base(f) + target := filepath.Join(d, base[:len(base)-len(filepath.Ext(base))]) + + // copy the file to the temporary directory before decompressing + tf, err := os.ReadFile(f) + if err != nil { + return err + } + + err = os.WriteFile(target, tf, 0o600) + if err != nil { + return err + } + + cmd := exec.Command("upx", "-d", target) + if _, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to decompress upx file: %w", err) + } + + return nil +} diff --git a/pkg/programkind/programkind.go b/pkg/programkind/programkind.go index f0d700a7..4e547af5 100644 --- a/pkg/programkind/programkind.go +++ b/pkg/programkind/programkind.go @@ -4,6 +4,7 @@ package programkind import ( + "bytes" "errors" "fmt" "io" @@ -30,6 +31,7 @@ var ArchiveMap = map[string]bool{ ".tar.gz": true, ".tar.xz": true, ".tgz": true, + ".upx": true, ".whl": true, ".xz": true, ".zip": true, @@ -86,6 +88,7 @@ var supportedKind = map[string]string{ "sh": "application/x-sh", "so": "application/x-sharedlib", "ts": "application/typescript", + "upx": "application/x-upx", "whl": "application/x-wheel+zip", "yaml": "", "yara": "", @@ -99,8 +102,17 @@ type FileType struct { } // IsSupportedArchive returns whether a path can be processed by our archive extractor. +// UPX files are an edge case since they may or may not even have an extension that can be referenced. func IsSupportedArchive(path string) bool { - return ArchiveMap[GetExt(path)] + if _, isValidArchive := ArchiveMap[GetExt(path)]; isValidArchive { + return true + } + if ft, err := File(path); err == nil && ft != nil { + if ft.MIME == "application/x-upx" { + return true + } + } + return false } // getExt returns the extension of a file path @@ -206,6 +218,10 @@ func File(path string) (*FileType, error) { // final strategy: DIY matching where mimetype is too strict. s := string(hdr[:]) switch { + // Check for UPX files before we do the ELF check + // We're looking for UPX! throughout the header since it may not be in the first 2-4 bytes + case bytes.Contains(hdr[:], []byte{'\x55', '\x50', '\x58', '\x21'}): + return Path(".upx"), nil case hdr[0] == '\x7f' && hdr[1] == 'E' || hdr[2] == 'L' || hdr[3] == 'F': return Path(".elf"), nil case strings.Contains(s, "