CERNDocumentServer · zzacharo · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -25,6 +25,11 @@
 Changes
 =======
 
+Version 2.1.4 (2024-09-13)
+
+- deposit: fix publish on the first time by removing syncing of buckets and only rely on
+  snapshot of the bucket
+
 Version 2.1.3 (2024-08-14)
 
 - flows: fix task revoke call

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -43,6 +43,7 @@ include scripts/bootstrap
 include scripts/celery
 include scripts/server
 include scripts/setup
+include scripts/setup-tests
 recursive-include .github *
 recursive-include benchmarks *.py
 recursive-include cds *.md

diff --git a/cds/modules/deposit/api.py b/cds/modules/deposit/api.py
@@ -304,25 +304,22 @@ def _process_files(self, record_id, data):
 
             # create a copy of the deposit bucket for the record
             snapshot = self.files.bucket.snapshot()
+            snapshot.locked = False
             self._fix_tags_refs_to_master(bucket=snapshot)
             # dump after fixing references
-            self.files.bucket.locked = False
-            snapshot.sync(bucket=self.files.bucket, delete_extras=True)
-            self.files.bucket.locked = True
-            data["_files"] = self.files.dumps()
-
-            snapshot.locked = False
+            data["_files"] = self.files.dumps(bucket=snapshot)
+            # during the first publish the smil file is generated only the published
+            # bucket i.e the snapshot
             data = self._generate_smil_file(record_id, data, snapshot)
-            snapshot.locked = True
             # dump after smil generation
-            self.files.bucket.locked = False
-            snapshot.sync(bucket=self.files.bucket, delete_extras=True)
-            data["_files"] = self.files.dumps()
-            self.files.bucket.locked = True
+            data["_files"] = self.files.dumps(bucket=snapshot)
             # dump the snapshot id to the record bucket
             # we need this to avoid creatng a new bucket on `Record.create(...)`
-            snapshot.locked = False
             data["_buckets"]["record"] = str(snapshot.id)
+            # dump record bucket also on deposit
+            self["_buckets"]["record"] = str(snapshot.id)
+
+            # lock snapshot bucket
             snapshot.locked = True
 
             yield data

diff --git a/cds/modules/records/api.py b/cds/modules/records/api.py
@@ -209,6 +209,11 @@ def pid(self):
         pid = self.record_fetcher(self.id, self)
         return PersistentIdentifier.get(pid.pid_type, pid.pid_value)
 
+    @property
+    def ref(self):
+        """Get video url (for the record if it's published)."""
+        return "https://cds.cern.ch/api/record/{0}".format(str(self["recid"]))
+
     @property
     def depid(self):
         """Return depid of the record."""

diff --git a/cds/version.py b/cds/version.py
@@ -24,4 +24,4 @@
 
 """CDS version."""
 
-__version__ = "2.1.3"
+__version__ = "2.1.4"
diff --git a/scripts/setup-tests b/scripts/setup-tests
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# -*- coding: utf-8 -*-
+#
+# This file is part of CERN Document Server.
+# Copyright (C) 2024 CERN.
+#
+# CERN Document Server is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# CERN Document Server is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with CERN Document Server; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307, USA.
+#
+# In applying this license, CERN does not
+# waive the privileges and immunities granted to it by virtue of its status
+# as an Intergovernmental Organization or submit itself to any jurisdiction.
+
+set -e
+
+pip install -e .[tests]
+
+# build assets for the tests that require a built project
+script_path=$(dirname "$0")
+./"$script_path"/build-assets
diff --git a/tests/unit/test_project_rest.py b/tests/unit/test_project_rest.py
@@ -73,6 +73,11 @@ def check_connection(videos, project):
         assert all({"$ref": video.ref} in project["videos"] for video in videos)
         assert len(videos) == len(project["videos"])
 
+    def assert_bucket_for_video(bucket_id, video):
+        """Check that the video files have the expected bucket_id."""
+        for f in video["_files"]:
+            assert f["bucket_id"] == bucket_id
+
     project_schema = (
         "https://cds.cern.ch/schemas/"
         "deposits/records/videos/project/project-v1.0.0.json"
@@ -286,7 +291,8 @@ def check_connection(videos, project):
 
         def get_video_record(depid):
             deposit = deposit_video_resolver(depid)
-            return Video.get_record(deposit.fetch_published()[1].id)
+            published_deposit = deposit.fetch_published()[1]
+            return published_deposit
 
         video_1 = get_video_record(video_1_dict["metadata"]["_deposit"]["id"])
         video_2 = get_video_record(video_2_dict["metadata"]["_deposit"]["id"])
@@ -299,6 +305,14 @@ def get_video_record(depid):
         assert project_dict["metadata"]["recid"] == 3
         assert project_dict["metadata"]["videos"][0] == record_videos[0]
         assert project_dict["metadata"]["videos"][1] == record_videos[1]
+
+        # Assert published videos have the correct bucket assigned
+        assert_bucket_for_video(
+            record_videos[0]["_buckets"]["record"], record_videos[0]
+        )
+        assert_bucket_for_video(
+            record_videos[1]["_buckets"]["record"], record_videos[1]
+        )
         # check database: connection project <---> videos
         check_connection(
             record_videos,