From 1191e248654f7a0add1ae2b8aee474e623a5c719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=83=A1=E7=8E=AE=E6=96=87?= Date: Fri, 18 Oct 2024 18:05:53 +0800 Subject: [PATCH] disk: call sfdisk directly to expand partition growpart is a shell script, and we don't have shell now in our image. Fixes: af7a885cb5d0c5be15e22618738cf187aea1d9a7 --- build/multi/Dockerfile.multi | 2 +- build/multi/Dockerfile.multi.asi | 2 +- pkg/disk/nodeserver.go | 22 +++++-------- pkg/disk/sfdisk/expand.go | 53 ++++++++++++++++++++++++++++++++ pkg/disk/sfdisk/expand_test.go | 41 ++++++++++++++++++++++++ pkg/utils/os/exec.go | 23 ++++++++++++++ pkg/utils/os/exec_test.go | 19 ++++++++++++ 7 files changed, 145 insertions(+), 17 deletions(-) create mode 100644 pkg/disk/sfdisk/expand.go create mode 100644 pkg/disk/sfdisk/expand_test.go create mode 100644 pkg/utils/os/exec.go create mode 100644 pkg/utils/os/exec_test.go diff --git a/build/multi/Dockerfile.multi b/build/multi/Dockerfile.multi index 6882b9641..f796fc9f1 100644 --- a/build/multi/Dockerfile.multi +++ b/build/multi/Dockerfile.multi @@ -32,7 +32,7 @@ LABEL maintainers="Alibaba Cloud Authors" description="Alibaba Cloud CSI Plugin" ARG TARGETARCH RUN --mount=type=cache,target=/var/cache/dnf,sharing=locked,id=dnf-cache-$TARGETARCH \ - dnf install -y ca-certificates file tzdata nfs-utils xfsprogs e4fsprogs pciutils iputils strace cloud-utils-growpart gdisk nc telnet tar cpio lsof && \ + dnf install -y ca-certificates file tzdata nfs-utils xfsprogs e4fsprogs pciutils iputils strace util-linux nc telnet tar cpio lsof && \ ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' >/etc/timezone FROM base as build-util-linux diff --git a/build/multi/Dockerfile.multi.asi b/build/multi/Dockerfile.multi.asi index 39bc30c3b..611865f1a 100644 --- a/build/multi/Dockerfile.multi.asi +++ b/build/multi/Dockerfile.multi.asi @@ -27,7 +27,7 @@ RUN --mount=type=cache,target=/root/.cache/go-build \ FROM registry.eu-west-1.aliyuncs.com/acs/alinux:3-update as base LABEL maintainers="Alibaba Cloud Authors" description="Alibaba Cloud CSI Plugin" -RUN yum install -y ca-certificates file tzdata nfs-utils xfsprogs e4fsprogs pciutils iputils strace cloud-utils-growpart gdisk nc telnet tar cpio lsof && \ +RUN yum install -y ca-certificates file tzdata nfs-utils xfsprogs e4fsprogs pciutils iputils strace util-linux nc telnet tar cpio lsof && \ yum clean all RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' >/etc/timezone diff --git a/pkg/disk/nodeserver.go b/pkg/disk/nodeserver.go index 388daf14a..97815b373 100644 --- a/pkg/disk/nodeserver.go +++ b/pkg/disk/nodeserver.go @@ -17,14 +17,12 @@ limitations under the License. package disk import ( - "bytes" "context" "crypto/sha256" "errors" "fmt" "io/fs" "os" - "os/exec" "path/filepath" "regexp" "strconv" @@ -34,6 +32,7 @@ import ( "github.com/container-storage-interface/spec/lib/go/csi" "github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/cloud/metadata" "github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/common" + "github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/disk/sfdisk" "github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/features" "github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/utils" utilsio "github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/utils/io" @@ -974,29 +973,22 @@ func localExpandVolume(ctx context.Context, req *csi.NodeExpandVolumeRequest) (* } return nil, status.Errorf(codes.Internal, "NodeExpandVolume: VolumeId: %s, get device name error: %s", req.VolumeId, err.Error()) } + logger := klog.FromContext(ctx).WithValues("device", devicePath) + ctx = klog.NewContext(ctx, logger) - klog.Infof("NodeExpandVolume:: volumeId: %s, devicePath: %s, volumePath: %s", diskID, devicePath, volumePath) rootPath, index, err := DefaultDeviceManager.GetDeviceRootAndPartitionIndex(devicePath) if err != nil { return nil, status.Errorf(codes.Internal, "GetDeviceRootAndIndex(%s) failed: %v", diskID, err) } if index != "" { - output, err := exec.Command("growpart", rootPath, index).CombinedOutput() + err := sfdisk.ExpandPartition(ctx, rootPath, index) if err != nil { - if bytes.Contains(output, []byte("NOCHANGE")) { - if bytes.Contains(output, []byte("it cannot be grown")) || bytes.Contains(output, []byte("could only be grown by")) { - deviceCapacity := getBlockDeviceCapacity(devicePath) - rootCapacity := getBlockDeviceCapacity(rootPath) - klog.Infof("NodeExpandVolume: Volume %s with Device Partition %s no need to grown, with request: %v, root: %v, partition: %v", - diskID, devicePath, DiskSize{requestBytes}, DiskSize{rootCapacity}, DiskSize{deviceCapacity}) - return &csi.NodeExpandVolumeResponse{}, nil - } - } - return nil, status.Errorf(codes.InvalidArgument, "NodeExpandVolume: expand volume %s at %s %s failed: %s, with output %s", diskID, rootPath, index, err.Error(), string(output)) + return nil, status.Error(codes.Internal, err.Error()) } - klog.Infof("NodeExpandVolume: Successful expand partition for volume: %s device: %s partition: %s", diskID, rootPath, index) + logger.V(2).Info("Successful expand partition", "root", rootPath, "partition", index) } + klog.V(2).Info("Expand filesystem start", "volumePath", volumePath) // use resizer to expand volume filesystem r := k8smount.NewResizeFs(utilexec.New()) ok, err := r.Resize(devicePath, volumePath) diff --git a/pkg/disk/sfdisk/expand.go b/pkg/disk/sfdisk/expand.go new file mode 100644 index 000000000..d39441a5d --- /dev/null +++ b/pkg/disk/sfdisk/expand.go @@ -0,0 +1,53 @@ +package sfdisk + +import ( + "bytes" + "context" + "fmt" + "os/exec" + + utilsos "github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/utils/os" + + "golang.org/x/sys/unix" + "k8s.io/klog/v2" +) + +func ExpandPartition(ctx context.Context, disk, partition string) error { + logger := klog.FromContext(ctx) + fd, err := unix.Open(disk, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer func() { + if err := unix.Close(fd); err != nil { + logger.Error(err, "failed to close", "fd", fd) + } + }() + + err = unix.Flock(fd, unix.LOCK_EX) // as suggested in the man sfdisk(8) + if err != nil { + return fmt.Errorf("failed to lock %s exclusively: %v", disk, err) + } + defer func() { + if err := unix.Flock(fd, unix.LOCK_UN); err != nil { + logger.Error(err, "failed to unlock", "fd", fd) + } + }() + + dump, err := exec.CommandContext(ctx, "sfdisk", "--dump", disk).Output() + if err != nil { + return fmt.Errorf("failed to dump current partition table of %s: %v", disk, utilsos.ErrWithStderr(err)) + } + dumpStr := string(dump) + logger.V(4).Info("sfdisk dump before expansion", "dump", dumpStr) + + // Don't cancel this, we don't want to corrupt the partition table + cmd := exec.Command("sfdisk", disk, "-N", partition) + cmd.Stdin = bytes.NewReader([]byte(",+")) // enlarge the partition as much as possible + result, err := cmd.Output() + if err != nil { + return fmt.Errorf("failed to expand partition %s on %s: %v\noriginal table looked like:\n%s", partition, disk, utilsos.ErrWithStderr(err), dumpStr) + } + logger.V(3).Info("sfdisk success", "output", string(result)) + return nil +} diff --git a/pkg/disk/sfdisk/expand_test.go b/pkg/disk/sfdisk/expand_test.go new file mode 100644 index 000000000..8ddbb86ee --- /dev/null +++ b/pkg/disk/sfdisk/expand_test.go @@ -0,0 +1,41 @@ +package sfdisk + +import ( + "bytes" + "context" + "errors" + "os" + "os/exec" + "strings" + "testing" + + utilsos "github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/utils/os" + "github.com/stretchr/testify/assert" +) + +func TestExpandPartition(t *testing.T) { + path, err := exec.LookPath("sfdisk") + if errors.Is(err, exec.ErrNotFound) { + t.Skip("sfdisk not found") + } + assert.NoError(t, err) + t.Logf("sfdisk found at: %s", path) + + testImage := t.TempDir() + "/test.img" + _, err = os.Create(testImage) + assert.NoError(t, err) + assert.NoError(t, os.Truncate(testImage, 1<<23)) // 8MB + + cmd := exec.Command("sfdisk", testImage) + cmd.Stdin = bytes.NewReader([]byte("label: gpt\n,\n")) // create a single partition + result, err := cmd.Output() + assert.NoError(t, utilsos.ErrWithStderr(err)) + t.Logf("create partition success: %s", string(result)) + + assert.NoError(t, os.Truncate(testImage, 1<<24)) // expand to 16MB + assert.NoError(t, ExpandPartition(context.Background(), testImage, "1")) + + dump, err := exec.Command("sfdisk", "--dump", testImage).Output() + assert.NoError(t, utilsos.ErrWithStderr(err)) + assert.Contains(t, strings.ReplaceAll(string(dump), " ", ""), "test.img1:start=2048,size=30687") +} diff --git a/pkg/utils/os/exec.go b/pkg/utils/os/exec.go new file mode 100644 index 000000000..4bfb6b484 --- /dev/null +++ b/pkg/utils/os/exec.go @@ -0,0 +1,23 @@ +package os + +import ( + "bytes" + "errors" + "os/exec" +) + +type ExitErrorWithStderr struct { + *exec.ExitError +} + +func (err ExitErrorWithStderr) Error() string { + return err.ExitError.Error() + ", with stderr: " + string(bytes.TrimSpace(err.Stderr)) +} + +func ErrWithStderr(err error) error { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && len(exitErr.Stderr) > 0 { + return ExitErrorWithStderr{exitErr} + } + return err +} diff --git a/pkg/utils/os/exec_test.go b/pkg/utils/os/exec_test.go new file mode 100644 index 000000000..9a0bbe7a7 --- /dev/null +++ b/pkg/utils/os/exec_test.go @@ -0,0 +1,19 @@ +package os + +import ( + "os/exec" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestErrWithStderr(t *testing.T) { + _, err := exec.Command("sh", "-c", "echo 'test error' 1>&2; exit 2").Output() + assert.Error(t, err) + assert.EqualError(t, ErrWithStderr(err), "exit status 2, with stderr: test error") +} + +func TestErrWithStderrNoChange(t *testing.T) { + err := exec.ErrNotFound + assert.Equal(t, ErrWithStderr(err), err) +}