Skip to content

Commit

Permalink
[GPU][DT] Add support for materializing tensor.empty and linalg.fill …
Browse files Browse the repository at this point in the history
…ops (iree-org#18563)

The revisions moves the materialization patterns of tensor.empty and
linalg.fill to "populateShapeIndependentMaterializeEncodingPatterns"
set; updates the comments. This set of patterns lower the ops with
encodings to the same op with materialized types.

It adds the tile swizzle shape inference to the tensor.empty pattern and
moves the utility to the "Utility methods" section without changes.

This is a step towards iree-org#18554

---------

Signed-off-by: hanhanW <hanhan0912@gmail.com>
  • Loading branch information
hanhanW authored Sep 23, 2024
1 parent 9d7eb9f commit 0d9c5a8
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ materializeFuncOpEncodings(FunctionOpInterface funcOp,
auto materializeEncodingValueFn = getMaterializeEncodingValueFn(targetAttr);
populateMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, typeConverter, materializeEncodingValueFn);
populateIREEMaterializeEncodingIntoPackUnPackPatterns(
populateShapeIndependentMaterializeEncodingPatterns(
materializeEncodingPattern, target, typeConverter,
materializeEncodingValueFn);

Expand Down
7 changes: 4 additions & 3 deletions compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,10 @@ void populateMaterializeEncodingIntoPackUnPackPatterns(
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);

/// Pouplates the set of patterns that lowers IREE dialect (e.g., Flow, Hal,
/// etc) ops with encoding types to pack/unpack ops.
void populateIREEMaterializeEncodingIntoPackUnPackPatterns(
/// Pouplates the set of patterns that lowers shape-like operations (e.g., Flow
/// ops, Hal ops, tensor.empty, linalg.fill, etc) with encoding types to the
/// same op with materialized shapes.
void populateShapeIndependentMaterializeEncodingPatterns(
RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target,
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ void GPUMaterializeDeviceEncodingPass::runOnOperation() {
MaterializeEncodingValueFn materializeEncodingValueFn =
[](RankedTensorType, OpBuilder,
Location) -> FailureOr<MaterializeEncodingValueInfo> { return {}; };
populateIREEMaterializeEncodingIntoPackUnPackPatterns(
populateShapeIndependentMaterializeEncodingPatterns(
patterns, target, typeConverter, materializeEncodingValueFn);

patterns.insert<GPUSetEncodingOpLoweringConversion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,28 @@
// 1. MFMA_F32_16x16x4_F32
//-----------------------------------------------------------------------------

#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<255x513xf32>,
user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
round_dims_to = array<i64: 16, 16, 16>>
#pipeline_layout = #hal.pipeline.layout<bindings = [
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>
]>
func.func @empty_fill_encoding_unroll8x8x4_MFMA_F32_16x16x4_F32() {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<255x513xf32, #encoding>>
%cst = arith.constant 0.0 : f32
%1 = tensor.empty() : tensor<255x513xf32, #encoding>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<255x513xf32, #encoding>) -> tensor<255x513xf32, #encoding>
flow.dispatch.tensor.store %2, %0, offsets = [0, 0], sizes = [255, 513], strides = [1, 1] : tensor<255x513xf32, #encoding> -> !flow.dispatch.tensor<writeonly:tensor<255x513xf32, #encoding>>
return
}
// CHECK-LABEL: func.func @empty_fill_encoding_unroll8x8x4_MFMA_F32_16x16x4_F32
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x33x8x4x16x4xf32>
// CHECK: %{{.+}} = linalg.fill ins({{.+}}) outs(%[[EMPTY]]

// -----

#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<255x513xf32>,
user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
round_dims_to = array<i64: 16, 16, 16>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ struct MaterializeEncodingIntoNopPass final
MaterializeEncodingConversionTarget target(*context);
populateMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, typeConverter, materializeEncodingValueFn);
populateIREEMaterializeEncodingIntoPackUnPackPatterns(
populateShapeIndependentMaterializeEncodingPatterns(
materializeEncodingPattern, target, typeConverter,
materializeEncodingValueFn);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,33 @@ namespace mlir::iree_compiler {
// Utility methods
//===---------------------------------------------------------------------===//

// Utility to apply a tile-swizzling to a packed shape.
static SmallVector<OpFoldResult>
getSwizzledShape(ArrayRef<OpFoldResult> packedShape,
MaterializeEncodingInfo encodingInfo) {
if (packedShape.empty() || !encodingInfo.swizzle) {
return SmallVector<OpFoldResult>(packedShape);
}

int64_t srcRank = packedShape.size() - encodingInfo.innerTileSizes.size();
SmallVector<int64_t> perm = llvm::to_vector(llvm::seq<int64_t>(0, srcRank));
for (auto i : encodingInfo.swizzle->permutation) {
perm.push_back(i + srcRank);
}

SmallVector<OpFoldResult> newShape(packedShape.take_front(srcRank));
SmallVector<int64_t> expandedTileShape =
getExpandedTileShape(encodingInfo.swizzle->expandShape);
MLIRContext *ctx = packedShape[0].getContext();
Builder b(ctx);
for (int64_t d : expandedTileShape) {
newShape.push_back(b.getIndexAttr(d));
}
applyPermutationToVector(newShape, perm);

return newShape;
}

static Operation *dropEncodingAndCloneOp(OpBuilder &builder, Operation *op,
ValueRange convertedInputOperands,
ValueRange convertedOutputOperands) {
Expand Down Expand Up @@ -368,6 +395,7 @@ lowerOpWithEncoding(RewriterBase &rewriter, tensor::EmptyOp emptyOp,
SmallVector<OpFoldResult> newShape = tensor::PackOp::getResultShape(
rewriter, loc, sourceDims, *innerTileSizesOfr, encodingInfo->innerDimsPos,
encodingInfo->outerDimsPerm);
newShape = getSwizzledShape(newShape, *encodingInfo);
Operation *newEmptyOp = rewriter.create<tensor::EmptyOp>(
loc, newShape, emptyType.getElementType());
return newEmptyOp;
Expand Down Expand Up @@ -507,33 +535,6 @@ lowerOpWithEncoding(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
.Default([](Operation *op) { return failure(); });
}

// Utility to apply a tile-swizzling to a packed shape.
static SmallVector<OpFoldResult>
getSwizzledShape(ArrayRef<OpFoldResult> packedShape,
MaterializeEncodingInfo encodingInfo) {
if (packedShape.empty() || !encodingInfo.swizzle) {
return SmallVector<OpFoldResult>(packedShape);
}

int64_t srcRank = packedShape.size() - encodingInfo.innerTileSizes.size();
SmallVector<int64_t> perm = llvm::to_vector(llvm::seq<int64_t>(0, srcRank));
for (auto i : encodingInfo.swizzle->permutation) {
perm.push_back(i + srcRank);
}

SmallVector<OpFoldResult> newShape(packedShape.take_front(srcRank));
SmallVector<int64_t> expandedTileShape =
getExpandedTileShape(encodingInfo.swizzle->expandShape);
MLIRContext *ctx = packedShape[0].getContext();
Builder b(ctx);
for (int64_t d : expandedTileShape) {
newShape.push_back(b.getIndexAttr(d));
}
applyPermutationToVector(newShape, perm);

return newShape;
}

/// For `dispatchTensorType` that bind a `RankedTensorType` with encoding,
/// returns the materialized shape of the `dispatchTensorType`. The
/// dynamic dimensions of the `dispatchTensorType` are provided in
Expand Down Expand Up @@ -818,6 +819,11 @@ struct UnsetEncodingOpToUnPackOpConversion
};

/// Generic pattern to convert operation that is in Destination Passing Style.
/// TODO(hanchung): Implement a different pattern for non-elementwise
/// operations. Because they should implement their own patterns based on
/// backends. The elementwise operations are just like shape-like op in
/// data-tiling concept. They still have the same computation but with different
/// shapes.
template <typename OpTy>
struct MaterializeDPSOperation : public OpMaterializeEncodingPattern<OpTy> {
using OpMaterializeEncodingPattern<OpTy>::OpMaterializeEncodingPattern;
Expand Down Expand Up @@ -914,16 +920,14 @@ void populateMaterializeEncodingIntoPackUnPackPatterns(
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn) {
MLIRContext *context = patterns.getContext();
patterns.insert<MaterializeDPSOperation<linalg::FillOp>,
MaterializeDPSOperation<linalg::GenericOp>,
MaterializeOperation<tensor::EmptyOp>,
patterns.insert<MaterializeDPSOperation<linalg::GenericOp>,
MaterializeContractionOp, SetEncodingOpToPackOpConversion,
UnsetEncodingOpToUnPackOpConversion>(
context, typeConverter, materializeEncodingValueFn);
memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns);
}

void populateIREEMaterializeEncodingIntoPackUnPackPatterns(
void populateShapeIndependentMaterializeEncodingPatterns(
RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target,
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn) {
Expand All @@ -949,7 +953,9 @@ void populateIREEMaterializeEncodingIntoPackUnPackPatterns(
return resultType == typeConverter.convertType(resultType);
});

patterns.insert<MaterializeFlowDispatchTensorLoadOp,
patterns.insert<MaterializeDPSOperation<linalg::FillOp>,
MaterializeOperation<tensor::EmptyOp>,
MaterializeFlowDispatchTensorLoadOp,
MaterializeFlowDispatchTensorStoreOp,
MaterializeInterfaceBindingEncoding>(
context, typeConverter, materializeEncodingValueFn);
Expand Down

0 comments on commit 0d9c5a8

Please sign in to comment.