Skip to content

Commit

Permalink
[Codegen][GPU] Fix delinearized index order in forall resolution (ire…
Browse files Browse the repository at this point in the history
  • Loading branch information
qedawkins authored Oct 8, 2024
1 parent 4636257 commit 7fb28e0
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,12 @@ LogicalResult resolveGPUMappedForallOp(RewriterBase &rewriter,
{forLoop.getInductionVar(), flatId})
: forLoop.getInductionVar();

// We require a descending relative mapping, so delinearize in reverse order.
// We require a descending relative mapping, so we can reuse the upper bound
// sizes directly.
auto delinearize = rewriter.create<affine::AffineDelinearizeIndexOp>(
loc, newFlatProducerId, llvm::to_vector(llvm::reverse(delinSizes)));
loc, newFlatProducerId, delinSizes);

SmallVector<Value> newBlockArgs =
llvm::map_to_vector(llvm::reverse(delinearize.getResults()),
[](OpResult r) -> Value { return r; });
SmallVector<Value> newBlockArgs = delinearize.getResults();

// Step 4. Inline the region of the forall op.
Operation *forallTerminator = forallOp.getBody()->getTerminator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ func.func @distribute_thread_forall_multi_dim(%out : memref<?x?x?xi32>)
// CHECK: %[[LINID:.+]] = affine.apply
// CHECK-SAME: affine_map<(d0)[s0, s1, s2] -> (d0 + s0 + s1 * 64 + s2 * 128)>(%[[I]])
// CHECK-SAME: [%[[TX]], %[[TY]], %[[TZ]]]
// CHECK: %[[DELIN:.+]]:3 = affine.delinearize_index %[[LINID]] into (%c4, %c8, %c16) : index
// CHECK: memref.store {{.*}}[%[[DELIN]]#2, %[[DELIN]]#1, %[[DELIN]]#0]
// CHECK: %[[DELIN:.+]]:3 = affine.delinearize_index %[[LINID]] into (%c16, %c8, %c4) : index
// CHECK: memref.store {{.*}}[%[[DELIN]]#0, %[[DELIN]]#1, %[[DELIN]]#2]


// -----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -627,8 +627,8 @@ hal.executable public @main {
// CHECK-DAG: %[[IDY:.+]] = gpu.thread_id y
// CHECK-DAG: %[[IDZ:.+]] = gpu.thread_id z
// CHECK: %[[LINID0:.+]] = affine.apply #[[$MAP]]()[%[[IDX]], %[[IDY]], %[[IDZ]]]
// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[LINID0:.+]] into (%c8, %c4) : index, index
// CHECK: %[[LINID1:.+]] = affine.apply #[[$MAP1]]()[%[[IDS]]#1, %[[IDS]]#0]
// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[LINID0:.+]] into (%c4, %c8) : index, index
// CHECK: %[[LINID1:.+]] = affine.apply #[[$MAP1]]()[%[[IDS]]#0, %[[IDS]]#1]
// CHECK: scf.for %{{.*}} = %c0 to %c256 step %c4 {{.*}} -> (vector<1x4xf32>)
// CHECK: scf.for %{{.*}} = %[[LINID1]] to %c4 step %c32
// CHECK: %[[READ:.+]] = vector.transfer_read {{.*}} : memref<128x256xf32, {{.*}}storage_buffer>>, vector<4xf32>
Expand Down

0 comments on commit 7fb28e0

Please sign in to comment.