Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api: fix bucket assignment on new publish #1950

Merged
merged 7 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
Changes
=======

Version 2.1.4 (2024-09-13)

- deposit: fix publish on the first time by removing syncing of buckets and only rely on
snapshot of the bucket

Version 2.1.3 (2024-08-14)

- flows: fix task revoke call
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ include scripts/bootstrap
include scripts/celery
include scripts/server
include scripts/setup
include scripts/setup-tests
recursive-include .github *
recursive-include benchmarks *.py
recursive-include cds *.md
Expand Down
21 changes: 9 additions & 12 deletions cds/modules/deposit/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,25 +304,22 @@ def _process_files(self, record_id, data):

# create a copy of the deposit bucket for the record
snapshot = self.files.bucket.snapshot()
snapshot.locked = False
self._fix_tags_refs_to_master(bucket=snapshot)
# dump after fixing references
self.files.bucket.locked = False
snapshot.sync(bucket=self.files.bucket, delete_extras=True)
self.files.bucket.locked = True
data["_files"] = self.files.dumps()

snapshot.locked = False
data["_files"] = self.files.dumps(bucket=snapshot)
# during the first publish the smil file is generated only the published
# bucket i.e the snapshot
data = self._generate_smil_file(record_id, data, snapshot)
snapshot.locked = True
# dump after smil generation
self.files.bucket.locked = False
snapshot.sync(bucket=self.files.bucket, delete_extras=True)
data["_files"] = self.files.dumps()
self.files.bucket.locked = True
data["_files"] = self.files.dumps(bucket=snapshot)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And here, without passing bucket=, we were serializing the bucket files of the deposit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes that's correct

# dump the snapshot id to the record bucket
# we need this to avoid creatng a new bucket on `Record.create(...)`
snapshot.locked = False
data["_buckets"]["record"] = str(snapshot.id)
# dump record bucket also on deposit
self["_buckets"]["record"] = str(snapshot.id)

# lock snapshot bucket
snapshot.locked = True

yield data
Expand Down
5 changes: 5 additions & 0 deletions cds/modules/records/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ def pid(self):
pid = self.record_fetcher(self.id, self)
return PersistentIdentifier.get(pid.pid_type, pid.pid_value)

@property
def ref(self):
"""Get video url (for the record if it's published)."""
return "https://cds.cern.ch/api/record/{0}".format(str(self["recid"]))

@property
def depid(self):
"""Return depid of the record."""
Expand Down
2 changes: 1 addition & 1 deletion cds/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@

"""CDS version."""

__version__ = "2.1.3"
__version__ = "2.1.4"
32 changes: 32 additions & 0 deletions scripts/setup-tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2024 CERN.
#
# CERN Document Server is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# CERN Document Server is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CERN Document Server; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

set -e

pip install -e .[tests]

# build assets for the tests that require a built project
script_path=$(dirname "$0")
./"$script_path"/build-assets
16 changes: 15 additions & 1 deletion tests/unit/test_project_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ def check_connection(videos, project):
assert all({"$ref": video.ref} in project["videos"] for video in videos)
assert len(videos) == len(project["videos"])

def assert_bucket_for_video(bucket_id, video):
"""Check that the video files have the expected bucket_id."""
for f in video["_files"]:
assert f["bucket_id"] == bucket_id

project_schema = (
"https://cds.cern.ch/schemas/"
"deposits/records/videos/project/project-v1.0.0.json"
Expand Down Expand Up @@ -286,7 +291,8 @@ def check_connection(videos, project):

def get_video_record(depid):
deposit = deposit_video_resolver(depid)
return Video.get_record(deposit.fetch_published()[1].id)
published_deposit = deposit.fetch_published()[1]
return published_deposit

video_1 = get_video_record(video_1_dict["metadata"]["_deposit"]["id"])
video_2 = get_video_record(video_2_dict["metadata"]["_deposit"]["id"])
Expand All @@ -299,6 +305,14 @@ def get_video_record(depid):
assert project_dict["metadata"]["recid"] == 3
assert project_dict["metadata"]["videos"][0] == record_videos[0]
assert project_dict["metadata"]["videos"][1] == record_videos[1]

# Assert published videos have the correct bucket assigned
assert_bucket_for_video(
record_videos[0]["_buckets"]["record"], record_videos[0]
)
assert_bucket_for_video(
record_videos[1]["_buckets"]["record"], record_videos[1]
)
# check database: connection project <---> videos
check_connection(
record_videos,
Expand Down
Loading