Skip to content

Commit

Permalink
[bug] In Esti check that mtimes match on lakeFS and underlying store
Browse files Browse the repository at this point in the history
Azure and GCS do _not_ use the time of create-MPU.  The important thing is
that the two share the same time.

Anything else confuses not only people but also programs.  For instance
DataBricks with presigned URLs can get confused by getting one time from
lakeFS and another from underlying storage.
  • Loading branch information
arielshaqed committed Oct 29, 2024
1 parent fc7c766 commit 43f63e0
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 16 deletions.
41 changes: 25 additions & 16 deletions esti/multipart_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ const (
)

func TestMultipartUpload(t *testing.T) {
// timeSlippage is a bound on the time difference between the local server and the S3
// server. It is used to verify that the "Last-Modified" time is actually close to the
// CreateMultipartUpload time. BUT S3 provides this time only at a 1-second resolution.
// So a 1-second difference is possible.
const timeSlippage = time.Second
// timeResolution is a duration greater than the timestamp resolution of the backing
// store. Multipart object on S3 is the time of create-MPU, waiting before completion
// ensures that lakeFS did not use the current time. For other blockstores MPU
// completion time is used, meaning it will be hard to detect if the underlying and
// lakeFS objects share the same time.
const timeResolution = time.Second

ctx, logger, repo := setupTest(t)
defer tearDownTest(repo)
Expand All @@ -43,8 +44,6 @@ func TestMultipartUpload(t *testing.T) {
require.NoError(t, err, "failed to create multipart upload")
logger.Info("Created multipart upload request")

uploadTime := time.Now()

parts := make([][]byte, multipartNumberOfParts)
var partsConcat []byte
for i := 0; i < multipartNumberOfParts; i++ {
Expand All @@ -54,9 +53,9 @@ func TestMultipartUpload(t *testing.T) {

completedParts := uploadMultipartParts(t, ctx, logger, resp, parts, 0)

// Object should have Last-Modified time at around time of MPU creation. Server times
// after this Sleep will be more than timeSlippage away from uploadTime.
time.Sleep(2 * timeSlippage)
// On S3 the object should have Last-Modified time at around time of MPU creation.
// Ensure lakeFS fails the test if it fakes it by using the current time.
time.Sleep(2 * timeResolution)

completeResponse, err := uploadMultipartComplete(ctx, svc, resp, completedParts)
require.NoError(t, err, "failed to complete multipart upload")
Expand All @@ -70,13 +69,23 @@ func TestMultipartUpload(t *testing.T) {
t.Fatalf("uploaded object did not match")
}

statResp, err := client.StatObjectWithResponse(ctx, repo, mainBranch, &apigen.StatObjectParams{Path: file})
statResp, err := client.StatObjectWithResponse(ctx, repo, mainBranch, &apigen.StatObjectParams{Path: file, Presign: aws.Bool(true)})
require.NoError(t, err, "failed to get object")
require.Equal(t, http.StatusOK, getResp.StatusCode())
lastModified := time.Unix(statResp.JSON200.Mtime, 0)
require.Less(t, lastModified.Sub(uploadTime).Abs(), timeSlippage,
"(remote) last modified time %s too far away from (local) upload time %s",
lastModified, uploadTime)
require.Equal(t, http.StatusOK, getResp.StatusCode(), getResp.Status())

// Get last-modified from the underlying store.

presignedGetURL := statResp.JSON200.PhysicalAddress
res, err := http.Get(presignedGetURL)
require.NoError(t, err, "GET underlying")
// The presigned URL is usable only for GET, but we don't actually care about its body.
_ = res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode, "%s: %s", presignedURL, res.Status)

Check failure on line 83 in esti/multipart_test.go

View workflow job for this annotation

GitHub Actions / Run Go tests

undefined: presignedURL

Check failure on line 83 in esti/multipart_test.go

View workflow job for this annotation

GitHub Actions / Test unified gc

undefined: presignedURL
lastModifiedString := res.Header.Get("Last-Modified")
underlyingLastModified, err := time.Parse(time.RFC1123, lastModifiedString)
require.NoError(t, err, "Last-Modified %s", lastModifiedString)
lakeFSMTime := time.Unix(statResp.JSON200.Mtime, 0)
require.Equal(t, lakeFSMTime, underlyingLastModified, "lakeFS mtime %s should be same as on underlying object %s", lakeFSMTime, underlyingLastModified)
}

func TestMultipartUploadAbort(t *testing.T) {
Expand Down
2 changes: 2 additions & 0 deletions pkg/block/s3/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,8 @@ func (a *Adapter) CompleteMultiPartUpload(ctx context.Context, obj block.ObjectP
return nil, err
}

lg.WithField("s3-stat", fmt.Sprintf("%+v", *headResp)).Info("[DEBUG] head-object")

etag := strings.Trim(aws.ToString(resp.ETag), `"`)
return &block.CompleteMultiPartUploadResponse{
ETag: etag,
Expand Down

0 comments on commit 43f63e0

Please sign in to comment.