Skip to content

Commit

Permalink
Reduce number of queries needed to calculate batch ranges for backgro…
Browse files Browse the repository at this point in the history
…und migrations
  • Loading branch information
fatkodima committed Jan 21, 2024
1 parent 1248fe9 commit 78a9b85
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 26 deletions.
3 changes: 3 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ Bundler/OrderedGems:
Metrics:
Enabled: false

Minitest/AssertOperator:
Enabled: false

Minitest/MultipleAssertions:
Enabled: false

Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## master (unreleased)

- Reduce number of queries needed to calculate batch ranges for background migrations
- Fix `finalize_column_type_change` to not recreate already existing indexes on the temporary column
- Remove potentially heavy queries used to get the ranges of a background migration

Expand Down
8 changes: 5 additions & 3 deletions lib/online_migrations/background_migrations/migration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,11 @@ def next_batch_range

on_shard do
# rubocop:disable Lint/UnreachableLoop
iterator.each_batch(of: batch_size, column: batch_column_name, start: next_min_value) do |relation|
arel_column = relation.arel_table[batch_column_name]
batch_range = relation.pick(arel_column.minimum, arel_column.maximum)
iterator.each_batch(of: batch_size, column: batch_column_name, start: next_min_value) do |relation, min_value, max_value|
if max_value.nil?
max_value = relation.pick(relation.arel_table[batch_column_name].maximum)
end
batch_range = [min_value, max_value]

break
end
Expand Down
39 changes: 24 additions & 15 deletions lib/online_migrations/batch_iterator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,42 @@ def each_batch(of: 1000, column: relation.primary_key, start: nil, finish: nil,
end

relation = apply_limits(self.relation, column, start, finish, order)
base_relation = relation.reselect(column).reorder(column => order)

base_relation = relation.except(:select)
.select(column)
.reorder(column => order)

start_row = base_relation.uncached { base_relation.first }

return if !start_row
start_id = start || begin
start_row = base_relation.uncached { base_relation.first }
start_row[column] if start_row
end

start_id = start_row[column]
arel_table = relation.arel_table

0.step do |index|
while start_id
if order == :asc
start_cond = arel_table[column].gteq(start_id)
else
start_cond = arel_table[column].lteq(start_id)
end

stop_row = base_relation.uncached do
last_row, stop_row = base_relation.uncached do
base_relation
.where(start_cond)
.offset(of)
.first
.offset(of - 1)
.first(2)
end

if last_row.nil?
# We are at the end of the table.
last_row, stop_row = base_relation.uncached do
base_relation
.where(start_cond)
.last(2)
end
end

batch_relation = relation.where(start_cond)

if stop_row
stop_id = stop_row[column]
start_id = stop_id

if order == :asc
stop_cond = arel_table[column].lt(stop_id)
Expand All @@ -64,10 +69,14 @@ def each_batch(of: 1000, column: relation.primary_key, start: nil, finish: nil,
# efficient UPDATE queries, hence we get rid of it.
batch_relation = batch_relation.except(:order)

last_id = (last_row && last_row[column]) || start_id

# Retaining the results in the query cache would undermine the point of batching.
batch_relation.uncached { yield batch_relation, index }
batch_relation.uncached { yield batch_relation, start_id, last_id }

break if stop_row.nil?

break if !stop_row
start_id = stop_id
end
end

Expand Down
9 changes: 2 additions & 7 deletions test/background_migrations/migration_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,6 @@ def test_next_batch_range
assert_nil m.next_batch_range
end

def test_next_batch_range_empty_relation
m = create_migration(migration_name: "EmptyRelation")
assert_nil m.next_batch_range
end

def test_next_batch_range_on_edges
_user1, _user2, user3, _user4 = 4.times.map { User.create! }
m = create_migration(max_value: user3.id, batch_size: 2, sub_batch_size: 1)
Expand All @@ -279,8 +274,8 @@ def test_next_batch_range_on_composite_migration
m = create_migration(migration_name: "MakeAllDogsNice")
child1, _child2, child3 = m.children.to_a

assert_equal [100, 100], child1.next_batch_range
assert_equal [1000, 1000], child3.next_batch_range
assert_equal [1, 100], child1.next_batch_range
assert_equal [1, 1000], child3.next_batch_range
end

def test_mark_as_succeeded_when_not_all_jobs_succeeded
Expand Down
2 changes: 1 addition & 1 deletion test/schema_statements/update_column_in_batches_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_update_column_in_batches_expression_value
connection.update_column_in_batches(:milestones, :started_at, -> { "CURRENT_TIMESTAMP" })

assert_not_nil m1.reload.started_at
assert_equal m1.started_at, m2.reload.started_at
assert m2.reload.started_at > 1.minute.ago # started_at was updated
end

def test_update_column_in_batches_value_is_subquery
Expand Down

0 comments on commit 78a9b85

Please sign in to comment.