Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix migration race condition #1995

Merged
merged 1 commit into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ gem "sprockets-rails", require: "sprockets/railtie"
gem "state_machines-activerecord"
gem "stimulus-rails"
gem "whenever"
gem "with_advisory_lock"

gem "net-imap", "~> 0.5.1", require: false
gem "net-pop", require: false
Expand Down
4 changes: 4 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,9 @@ GEM
websocket-extensions (0.1.5)
whenever (1.0.0)
chronic (>= 0.6.3)
with_advisory_lock (5.1.0)
activerecord (>= 6.1)
zeitwerk (>= 2.6)
with_model (2.1.7)
activerecord (>= 6.0)
xpath (3.2.0)
Expand Down Expand Up @@ -778,6 +781,7 @@ DEPENDENCIES
web-console (>= 3.3.0)
webmock (~> 3.24)
whenever
with_advisory_lock
with_model (~> 2.1, >= 2.1.7)

RUBY VERSION
Expand Down
8 changes: 5 additions & 3 deletions app/services/migration/coordinator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ def check_environment!
end

def run_migration
next_runnable_migrator = self.class.migrators.select(&:runnable?).first
Delayed::Job.with_advisory_lock("queue_next_migrators") do
next_runnable_migrators = self.class.migrators.select(&:runnable?)

return unless next_runnable_migrator
return unless next_runnable_migrators.any?

next_runnable_migrator.queue
next_runnable_migrators.each(&:queue)
end
end
end
end
15 changes: 12 additions & 3 deletions app/services/migration/migrators/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -243,16 +243,25 @@ def log_info(message)
end

def finalise_migration!
last_worker_to_finish = Migration::DataMigration.incomplete.where(model: self.class.model).one?
last_worker_to_finish = false

run_once_post_migration if last_worker_to_finish
Delayed::Job.with_advisory_lock("last_worker_#{self.class.model}") do
last_worker_to_finish = !other_incomplete_migrators.exists?
data_migration.update!(completed_at: 1.second.from_now)
end

data_migration.update!(completed_at: 1.second.from_now)
log_info("Migration completed")

# Queue a follow up migration to migrate any
# dependent models.
MigrationJob.perform_later if last_worker_to_finish

# Run any post-migration tasks.
run_once_post_migration if last_worker_to_finish
end

def other_incomplete_migrators
Migration::DataMigration.incomplete.where(model: self.class.model).where.not(id: data_migration.id)
end
end
end
40 changes: 24 additions & 16 deletions spec/features/migration_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,22 @@
RSpec.feature "Migration", :ecf_api_disabled, :in_memory_rails_cache, :rack_test_driver, type: :feature do
include Helpers::AdminLogin

# As jobs are ran sequentially in the test environment and the coordinator/migrators
# now use locking while re-queueing jobs we need to simulate a bit of async job running
# here to avoid issues.
def run_migration_and_wait_for_jobs_to_complete
perform_enqueued_jobs(only: MigrationJob) do
click_button "Run migration"
end

loop do
perform_enqueued_jobs
break if enqueued_jobs.empty?
end

visit current_path
end

context "when not authenticated" do
scenario "viewing the migrations page" do
visit npq_separation_migration_migrations_path
Expand Down Expand Up @@ -39,9 +55,9 @@
scenario "viewing the completed migration" do
visit npq_separation_migration_migrations_path

perform_enqueued_jobs do
click_button "Run migration"
end
run_migration_and_wait_for_jobs_to_complete

visit npq_separation_migration_migrations_path

expect(page).not_to have_content("A migration is currently in-progress")
expect(page).not_to have_content("It was started less than a minute ago.")
Expand Down Expand Up @@ -77,11 +93,9 @@
scenario "viewing the completed migration" do
visit npq_separation_migration_migrations_path

perform_enqueued_jobs do
click_button "Run migration"
end
run_migration_and_wait_for_jobs_to_complete

within ".data-migration-lead_provider" do
within first(".data-migration-lead_provider") do
expect(page).to have_css(".total-count", text: 3)
expect(page).to have_css(".failure-count", text: 1)
expect(page).to have_css(".percentage-successfully-migrated", text: "66%")
Expand Down Expand Up @@ -117,9 +131,7 @@
scenario "viewing the completed migration" do
visit npq_separation_migration_migrations_path

perform_enqueued_jobs do
click_button "Run migration"
end
run_migration_and_wait_for_jobs_to_complete

within ".data-migration-cohort" do
expect(page).to have_css(".total-count", text: 3)
Expand Down Expand Up @@ -163,9 +175,7 @@
scenario "viewing the completed migration" do
visit npq_separation_migration_migrations_path

perform_enqueued_jobs do
click_button "Run migration"
end
run_migration_and_wait_for_jobs_to_complete

within ".data-migration-statement" do
expect(page).to have_css(".total-count", text: 3)
Expand Down Expand Up @@ -206,9 +216,7 @@
scenario "viewing the completed migration" do
visit npq_separation_migration_migrations_path

perform_enqueued_jobs do
click_button "Run migration"
end
run_migration_and_wait_for_jobs_to_complete

within ".data-migration-user" do
expect(page).to have_css(".total-count", text: 3)
Expand Down
5 changes: 4 additions & 1 deletion spec/services/migration/coordinator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@
describe "#migrate!" do
subject(:migrate) { instance.migrate! }

it "runs the next runnable migrator" do
it "queues the next runnable migrators" do
allow(described_class.migrators.first).to receive(:runnable?).and_return(false)
allow(described_class.migrators.second).to receive(:runnable?).and_return(true)
allow(described_class.migrators.last).to receive(:runnable?).and_return(true)

expect(described_class.migrators.first).not_to receive(:queue)
expect(described_class.migrators.second).to receive(:queue)
expect(described_class.migrators.last).to receive(:queue)

migrate
end
Expand Down
Loading