From 3a6820fa65410661c52cfe5a3133d7537c21e9da Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Tue, 15 Oct 2024 13:07:29 -0700 Subject: [PATCH] Refactor matmul test suite. (#22) Progress on https://github.com/iree-org/iree-test-suites/issues/2. See also the long [Discord thread here](https://discord.com/channels/689900678990135345/1270451599231156266). ## Summaries of changes ### Further decoupled test suites from the core CMake project * Forked `iree_native_test.cmake` to `iree_test_suites_native_test.cmake` * Dropped support (temporarily?) for testing on Android, RISC-V, and ARM with SME * Forked `iree_e2e_generated_runner_test.cmake` to `iree_test_suites_runner_test.cmake` * Dropped support (temporarily?) for filtering within the build system which tests are defined and compile .vmfb files * Now we can set `-DIREE_BUILD_TESTS=OFF` and avoid pulling in IREE's other tests * Added a new hand-authored `linalg_ops/matmul/CMakeLists.txt` that runs tests on each backend using default flags ### Simplified the test generator * Dropped unused functions * Folded GPU-specific shapes into generic "small" and "large" shape test suites ### Ran the `generate_e2e_matmul_tests.py` script offline and checked in the generated files * Currently 56 files totaling 1.90MB on disk (~27000 lines of code according to GitHub) * Now we can inspect the test cases without needing to run the generator locally, and I fixed a few formatting issues * I think this makes test suite management easier, and having the generated files in this test suites repository doesn't cost the main repository much (just extra `git checkout` time), but I could see a case for more tightly coupling the generator with the test runner ## What is left to do? * I want to iterate some more on the `linalg_ops/matmul/CMakeLists.txt` file or move to a different test runner somehow. I mainly want to support XFAIL in some way for both compiling and running. * We should add back tests using CPU features like AVX512, GPU features like Vulkan float16 extensions, and other non-default flags somehow. Either infer what the compiler can from the host / target, or add test suites explicitly. --- linalg_ops/CMakeLists.txt | 8 +- .../iree_e2e_generated_runner_test.cmake | 502 ---- linalg_ops/iree_test_suites_native_test.cmake | 114 + linalg_ops/iree_test_suites_runner_test.cmake | 100 + linalg_ops/matmul/CMakeLists.txt | 2229 ++--------------- .../matmul/generate_e2e_matmul_tests.py | 92 +- linalg_ops/matmul/generate_test_mlir_files.sh | 89 + .../matmul_bf16_into_bf16_large.mlir | 136 + .../matmul_bf16_into_bf16_large_calls.mlir | 882 +++++++ .../matmul_bf16_into_bf16_small.mlir | 99 + .../matmul_bf16_into_bf16_small_calls.mlir | 906 +++++++ ...tmul_transpose_b_bf16_into_bf16_large.mlir | 136 + ...ranspose_b_bf16_into_bf16_large_calls.mlir | 882 +++++++ ...tmul_transpose_b_bf16_into_bf16_small.mlir | 99 + ...ranspose_b_bf16_into_bf16_small_calls.mlir | 906 +++++++ .../matmul_bf16_into_f32_large.mlir | 136 + .../matmul_bf16_into_f32_large_calls.mlir | 882 +++++++ .../matmul_bf16_into_f32_small.mlir | 99 + .../matmul_bf16_into_f32_small_calls.mlir | 906 +++++++ ...atmul_transpose_b_bf16_into_f32_large.mlir | 136 + ...transpose_b_bf16_into_f32_large_calls.mlir | 882 +++++++ ...atmul_transpose_b_bf16_into_f32_small.mlir | 99 + ...transpose_b_bf16_into_f32_small_calls.mlir | 906 +++++++ .../matmul_f16_into_f16_large.mlir | 136 + .../matmul_f16_into_f16_large_calls.mlir | 882 +++++++ .../matmul_f16_into_f16_small.mlir | 99 + .../matmul_f16_into_f16_small_calls.mlir | 906 +++++++ ...matmul_transpose_b_f16_into_f16_large.mlir | 136 + ..._transpose_b_f16_into_f16_large_calls.mlir | 882 +++++++ ...matmul_transpose_b_f16_into_f16_small.mlir | 99 + ..._transpose_b_f16_into_f16_small_calls.mlir | 906 +++++++ .../matmul_f16_into_f32_large.mlir | 136 + .../matmul_f16_into_f32_large_calls.mlir | 882 +++++++ .../matmul_f16_into_f32_small.mlir | 99 + .../matmul_f16_into_f32_small_calls.mlir | 906 +++++++ ...matmul_transpose_b_f16_into_f32_large.mlir | 136 + ..._transpose_b_f16_into_f32_large_calls.mlir | 882 +++++++ ...matmul_transpose_b_f16_into_f32_small.mlir | 99 + ..._transpose_b_f16_into_f32_small_calls.mlir | 906 +++++++ .../matmul_f32_into_f32_large.mlir | 136 + .../matmul_f32_into_f32_large_calls.mlir | 882 +++++++ .../matmul_f32_into_f32_small.mlir | 99 + .../matmul_f32_into_f32_small_calls.mlir | 906 +++++++ ...matmul_transpose_b_f32_into_f32_large.mlir | 136 + ..._transpose_b_f32_into_f32_large_calls.mlir | 882 +++++++ ...matmul_transpose_b_f32_into_f32_small.mlir | 99 + ..._transpose_b_f32_into_f32_small_calls.mlir | 906 +++++++ .../matmul_f8E4M3FNUZ_into_f32_large.mlir | 172 ++ ...atmul_f8E4M3FNUZ_into_f32_large_calls.mlir | 882 +++++++ .../matmul_f8E4M3FNUZ_into_f32_small.mlir | 131 + ...atmul_f8E4M3FNUZ_into_f32_small_calls.mlir | 906 +++++++ ...transpose_b_f8E4M3FNUZ_into_f32_large.mlir | 172 ++ ...ose_b_f8E4M3FNUZ_into_f32_large_calls.mlir | 882 +++++++ ...transpose_b_f8E4M3FNUZ_into_f32_small.mlir | 131 + ...ose_b_f8E4M3FNUZ_into_f32_small_calls.mlir | 906 +++++++ .../i8_into_i32/matmul_i8_into_i32_large.mlir | 136 + .../matmul_i8_into_i32_large_calls.mlir | 882 +++++++ .../i8_into_i32/matmul_i8_into_i32_small.mlir | 99 + .../matmul_i8_into_i32_small_calls.mlir | 906 +++++++ .../matmul_transpose_b_i8_into_i32_large.mlir | 136 + ...l_transpose_b_i8_into_i32_large_calls.mlir | 882 +++++++ .../matmul_transpose_b_i8_into_i32_small.mlir | 99 + ...l_transpose_b_i8_into_i32_small_calls.mlir | 906 +++++++ 63 files changed, 28985 insertions(+), 2607 deletions(-) delete mode 100644 linalg_ops/iree_e2e_generated_runner_test.cmake create mode 100644 linalg_ops/iree_test_suites_native_test.cmake create mode 100644 linalg_ops/iree_test_suites_runner_test.cmake create mode 100755 linalg_ops/matmul/generate_test_mlir_files.sh create mode 100644 linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_large.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_small.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_large.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_small.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_large.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_small.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_large.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_small.mlir create mode 100644 linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_large.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_small.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_large.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_small.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_large.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_small.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_large.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_small.mlir create mode 100644 linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_large.mlir create mode 100644 linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_small.mlir create mode 100644 linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_large.mlir create mode 100644 linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_small.mlir create mode 100644 linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_large.mlir create mode 100644 linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_small.mlir create mode 100644 linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_large.mlir create mode 100644 linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_small.mlir create mode 100644 linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_large.mlir create mode 100644 linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_small.mlir create mode 100644 linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_small_calls.mlir create mode 100644 linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_large.mlir create mode 100644 linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_large_calls.mlir create mode 100644 linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_small.mlir create mode 100644 linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_small_calls.mlir diff --git a/linalg_ops/CMakeLists.txt b/linalg_ops/CMakeLists.txt index 969e9e5..08cf318 100644 --- a/linalg_ops/CMakeLists.txt +++ b/linalg_ops/CMakeLists.txt @@ -29,10 +29,7 @@ set(IREE_PACKAGE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}") set(IREE_PACKAGE_ROOT_PREFIX "iree-test-suites") set(IREE_BUILD_COMPILER OFF) set(IREE_BUILD_SAMPLES OFF) -# We should also be able to set -DIREE_BUILD_TESTS=OFF, but this currently -# depends on the core project's CMake functions like iree_native_test and -# iree_bytecode_module. -set(IREE_BUILD_TESTS ON) +set(IREE_BUILD_TESTS OFF) if(IREE_USE_LOCAL_REPO) message(STATUS "Using IREE repo at path '${IREE_LOCAL_REPO_PATH}'") @@ -122,6 +119,7 @@ iree_cc_binary( #------------------------------------------------------------------------------- list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}) -include(iree_e2e_generated_runner_test) +include(iree_test_suites_native_test) +include(iree_test_suites_runner_test) add_subdirectory(matmul) diff --git a/linalg_ops/iree_e2e_generated_runner_test.cmake b/linalg_ops/iree_e2e_generated_runner_test.cmake deleted file mode 100644 index a99409a..0000000 --- a/linalg_ops/iree_e2e_generated_runner_test.cmake +++ /dev/null @@ -1,502 +0,0 @@ -# Copyright 2021 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -include(CMakeParseArguments) - -# iree_e2e_runner_test() -# -# Creates a test using a specified test runner program for the specified -# test files. -# -# Parameters: -# NAME: Name of the target -# TEST_TYPE: Type of test (Currently, matmul and conv2d are supported). -# VARIANT_NAME: Variant name to suffix NAME with. -# Will reuse the same TEST_TYPE/calls vmfb files. -# TESTS_SRC: mlir source file with TEST_TYPE to be compiled to an IREE module. -# TESTS_VMFB: specifies the path to use for the generated IREE module. -# CALLS_SRC: mlir source file with calls to be compiled to an IREE module. -# CALLS_VMFB: specifies the path to use for the generated IREE module. -# TARGET_BACKEND: target backend to compile for. -# DRIVER: driver to run the module with. -# COMPILER_FLAGS: additional flags to pass to the compiler. Bytecode output -# format and backend flags are passed automatically. -# RUNNER_ARGS: additional args to pass to the trace-runner program. The driver -# and input file flags are passed automatically. -# LABELS: Additional labels to apply to the test. The package path and -# "driver=${DRIVER}" are added automatically. -# TEST_RUNNER: trace-runner program to run. -# TARGET_CPU_FEATURES: If specified, a string passed as argument to -# --iree-llvmcpu-target-cpu-features. -# TEST_DEFINED: Whether to define a test target. -# TEST_DISABLED: The test target will be skipped and its status will be -# 'Not Run'. -function(iree_e2e_runner_test) - if(NOT IREE_BUILD_TESTS) - return() - endif() - - # See comment in iree_check_test about this condition. - if(NOT IREE_BUILD_COMPILER AND NOT IREE_HOST_BIN_DIR) - return() - endif() - - cmake_parse_arguments( - _RULE - "" - "NAME;TEST_TYPE;VARIANT_NAME;TESTS_SRC;TESTS_VMFB;CALLS_SRC;CALLS_VMFB;TRACE;TARGET_BACKEND;DRIVER;TEST_RUNNER;TEST_DEFINED;TEST_DISABLED" - "COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES" - ${ARGN} - ) - - iree_is_bytecode_module_test_excluded_by_labels(_EXCLUDED_BY_LABELS "${_RULE_LABELS}") - if(_EXCLUDED_BY_LABELS) - return() - endif() - - iree_package_name(_PACKAGE_NAME) - set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}") - - set(_BASE_COMPILER_FLAGS - "--iree-hal-target-backends=${_RULE_TARGET_BACKEND}" - ) - if (_RULE_TARGET_CPU_FEATURES) - list(APPEND _BASE_COMPILER_FLAGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}") - endif() - - if(NOT TARGET "${_NAME}_${_RULE_TEST_TYPE}_module") - iree_bytecode_module( - NAME - "${_RULE_NAME}_${_RULE_TEST_TYPE}_module" - MODULE_FILE_NAME - "${_RULE_TESTS_VMFB}" - SRC - "${_RULE_TESTS_SRC}" - FLAGS - "${_BASE_COMPILER_FLAGS}" - "${_RULE_COMPILER_FLAGS}" - ) - endif() - - if(NOT TARGET "${_NAME}_calls_module") - iree_bytecode_module( - NAME - "${_RULE_NAME}_calls_module" - MODULE_FILE_NAME - "${_RULE_CALLS_VMFB}" - SRC - "${_RULE_CALLS_SRC}" - FLAGS - "${_BASE_COMPILER_FLAGS}" - "${_RULE_COMPILER_FLAGS}" - ) - endif() - - # A target specifically for the test. We could combine this with the above, - # but we want that one to get pulled into iree_bytecode_module. - add_custom_target("${_NAME}${_RULE_VARIANT_NAME}" ALL) - add_dependencies( - "${_NAME}${_RULE_VARIANT_NAME}" - "${_NAME}_${_RULE_TEST_TYPE}_module" - "${_NAME}_calls_module" - "${_RULE_TEST_RUNNER}" - ) - - add_dependencies(iree-test-suites-linalg-ops-deps "${_NAME}${_RULE_VARIANT_NAME}") - - if(_RULE_TEST_DEFINED) - iree_native_test( - NAME - "${_RULE_NAME}${_RULE_VARIANT_NAME}" - DRIVER - "${_RULE_DRIVER}" - SRC - "${_RULE_TEST_RUNNER}" - DATA - ${_TESTS_VMFB} - ${_CALLS_VMFB} - ARGS - "--module={{${_TESTS_VMFB}}}" - "--module={{${_CALLS_VMFB}}}" - ${_RULE_RUNNER_ARGS} - LABELS - ${_RULE_LABELS} - DISABLED - ${_RULE_TEST_DISABLED} - ) - endif() -endfunction() - -# iree_single_backend_e2e_runner_test() -# -# Parameters: -# NAME: Name of the target -# TEST_TYPE: Type of test (Currently, matmul and conv are supported). -# GENERATOR: Program (at the moment, must be Python3) to run to generate the -# source file (and possibly a trace file and module path). It will be -# invoked with the following standard flags, in addition to GENERATOR_ARGS: -# --output_${TEST_TYPE}_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_${TEST_TYPE}.mlir -# --output_calls_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_calls.mlir -# and if TARGET_CPU_FEATURES is not empty: -# --requirements=${TARGET_CPU_FEATURES} -# GENERATOR_ARGS: additional args to pass to the generator program. -# TARGET_BACKEND: target backend to compile for. -# DRIVER: driver to run the module with. -# COMPILER_FLAGS: additional flags to pass to the compiler. Bytecode output -# format and backend flags are passed automatically. -# RUNNER_ARGS: additional args to pass to the trace-runner program. The driver -# and input file flags are passed automatically. -# LABELS: Additional labels to apply to the test. The package path and -# "driver=${DRIVER}" are added automatically. -# TEST_RUNNER: trace-runner program to run. -# TARGET_CPU_FEATURES: If specified, a string passed as argument to -# --iree-llvmcpu-target-cpu-features. -function(iree_single_backend_e2e_runner_test) - if(NOT IREE_BUILD_TESTS) - return() - endif() - - # Copied from iree_check_test. Refer to the comment there. - if(NOT IREE_BUILD_COMPILER AND NOT IREE_HOST_BIN_DIR) - return() - endif() - - cmake_parse_arguments( - _RULE - "" - "NAME;TEST_TYPE;GENERATOR;TARGET_BACKEND;DRIVER;TEST_RUNNER" - "GENERATOR_ARGS;COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES" - ${ARGN} - ) - - # --------------------------------------------------------------------------- - # Bytecode module builds require - # 1. the compiler, either in the same build or provided in IREE_HOST_BIN_DIR - # 2. compiler support for _RULE_INPUT_TYPE - # 3. compiler support for _RULE_TARGET_BACKEND - set(_BYTECODE_MODULE_BUILD_ENABLED TRUE) - - # 1. Check for the compiler. - if(NOT IREE_BUILD_COMPILER AND NOT IREE_HOST_BIN_DIR) - set(_BYTECODE_MODULE_BUILD_ENABLED FALSE) - endif() - - # 2. Check target backend availability. - # Note: we can only reliably check for this when building the compiler host - # tools from source. If the tools are already built, we assume that all target - # backends are enabled. We could query the tools in the binary directory for - # support dynamically if optionality would be useful. - if(NOT IREE_HOST_BIN_DIR) - string(TOUPPER ${_RULE_TARGET_BACKEND} _UPPERCASE_TARGET_BACKEND) - string(REPLACE "-" "_" _NORMALIZED_TARGET_BACKEND ${_UPPERCASE_TARGET_BACKEND}) - # TODO(scotttodd): allow plugins to provide external backends here - if(NOT DEFINED IREE_TARGET_BACKEND_${_NORMALIZED_TARGET_BACKEND}) - message(SEND_ERROR "Unknown backend '${_RULE_TARGET_BACKEND}'. Check IREE_TARGET_BACKEND_* options.") - endif() - if(NOT IREE_TARGET_BACKEND_${_NORMALIZED_TARGET_BACKEND}) - set(_BYTECODE_MODULE_BUILD_ENABLED FALSE) - endif() - endif() - # --------------------------------------------------------------------------- - - # --------------------------------------------------------------------------- - # Tests are defined if _RULE_DRIVER is defined. - set(_TEST_DEFINED TRUE) - if(NOT DEFINED _RULE_DRIVER) - set(_TEST_DEFINED FALSE) - endif() - - # Test execution requires - # 1. the bytecode module build to be enabled - # 2. _RULE_DRIVER is defined and runtime support is enabled - # 3. no other label exclusions (e.g. 'optonly' test with 'debug' config) - set(_TEST_DISABLED FALSE) - - # 1. Check bytecode module build. - if(NOT _BYTECODE_MODULE_BUILD_ENABLED) - set(_TEST_DISABLED TRUE) - endif() - - # 2. Check driver availability. - if(DEFINED _RULE_DRIVER) - string(TOUPPER ${_RULE_DRIVER} _UPPERCASE_DRIVER) - string(REPLACE "-" "_" _NORMALIZED_DRIVER ${_UPPERCASE_DRIVER}) - if((NOT IREE_HAL_DRIVER_${_NORMALIZED_DRIVER}) AND - (NOT IREE_EXTERNAL_${_NORMALIZED_DRIVER}_HAL_DRIVER_FOUND)) - set(_TEST_DISABLED TRUE) - endif() - endif() - - # 3. Check label exclusions. - iree_is_bytecode_module_test_excluded_by_labels(_EXCLUDED_BY_LABELS "${_RULE_LABELS}") - if(_EXCLUDED_BY_LABELS) - set(_TEST_DISABLED TRUE) - endif() - - if((_TEST_DISABLED OR NOT _TEST_DEFINED) AND NOT IREE_BUILD_ALL_CHECK_TEST_MODULES) - set(_BYTECODE_MODULE_BUILD_ENABLED FALSE) - endif() - # --------------------------------------------------------------------------- - - iree_package_name(_PACKAGE_NAME) - set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}") - - set(_TESTS_SRC "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}_${_RULE_TEST_TYPE}.mlir") - set(_CALLS_SRC "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}_calls.mlir") - set(_TESTS_VMFB "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}_${_RULE_TEST_TYPE}.vmfb") - set(_CALLS_VMFB "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}_calls.vmfb") - - list(APPEND _GENERATOR_STANDARD_FLAGS "--output_${_RULE_TEST_TYPE}_mlir=${_TESTS_SRC}") - list(APPEND _GENERATOR_STANDARD_FLAGS "--output_calls_mlir=${_CALLS_SRC}") - if(_RULE_TARGET_CPU_FEATURES) - list(APPEND _GENERATOR_STANDARD_FLAGS "--requirements=${_RULE_TARGET_CPU_FEATURES}") - endif() - - if(NOT _BYTECODE_MODULE_BUILD_ENABLED) - return() - endif() - - add_custom_command( - COMMAND - "${Python3_EXECUTABLE}" - "${CMAKE_CURRENT_SOURCE_DIR}/${_RULE_GENERATOR}" - ${_GENERATOR_STANDARD_FLAGS} - ${_RULE_GENERATOR_ARGS} - OUTPUT - ${_TESTS_SRC} - ${_CALLS_SRC} - DEPENDS - ${_RULE_GENERATOR} - ) - - add_custom_target( - "${_NAME}_generated_files" - DEPENDS - ${_TESTS_SRC} - ${_CALLS_SRC} - ) - - # When using the llvm-cpu backend, the runtime build config may need to - # match the compiled executable config using (`--iree-llvmcpu-sanitize=`): - # - # | Runtime type | Compatible with these executable types | - # | -------------------- | -------------------------------------- | - # | Base (no sanitizers) | Base, ASan | - # | ASan | Base, ASan | - # | TSan | TSan (ABI break) | - - # Define the regular test suite, unless the config is llvm-cpu + TSan. - if(NOT _RULE_TARGET_BACKEND STREQUAL "llvm-cpu" OR NOT IREE_ENABLE_TSAN) - iree_e2e_runner_test( - NAME ${_RULE_NAME} - TEST_TYPE ${_RULE_TEST_TYPE} - VARIANT_NAME "" - TESTS_SRC ${_TESTS_SRC} - TESTS_VMFB ${_TESTS_VMFB} - CALLS_SRC ${_CALLS_SRC} - CALLS_VMFB ${_CALLS_VMFB} - TEST_RUNNER ${_RULE_TEST_RUNNER} - TARGET_BACKEND ${_RULE_TARGET_BACKEND} - DRIVER ${_RULE_DRIVER} - COMPILER_FLAGS ${_RULE_COMPILER_FLAGS} - RUNNER_ARGS ${_RULE_RUNNER_ARGS} - LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} - TEST_DEFINED ${_TEST_DEFINED} - TEST_DISABLED ${_TEST_DISABLED} - ) - # Note we are relying on the fact that the target created by - # iree_e2e_runner_test is _NAME, even though we passed _RULE_NAME to it, - # i.e. we are relying on the prefixing to be identical. - add_dependencies("${_NAME}" "${_NAME}_generated_files") - endif() - - # Define tests for AddressSanitizer (ASan) and ThreadSanitizer (TSan). - # Normally test suites should do this sort of branching at the leaves rather - # than modify the base CMake function directly, but sanitizers are applied - # at the build system uniformly, so until we decouple the test suites from - # source builds further this felt like a reasonable compromise. - if(_RULE_TARGET_BACKEND STREQUAL "llvm-cpu") - if(IREE_ENABLE_ASAN) - set(_ASAN_COMPILER_FLAGS ${_RULE_COMPILER_FLAGS}) - list(APPEND _ASAN_COMPILER_FLAGS "--iree-llvmcpu-link-embedded=false") - list(APPEND _ASAN_COMPILER_FLAGS "--iree-llvmcpu-sanitize=address") - iree_e2e_runner_test( - NAME ${_RULE_NAME} - TEST_TYPE ${_RULE_TEST_TYPE} - VARIANT_NAME "_asan" - TESTS_SRC ${_TESTS_SRC} - TESTS_VMFB ${_TESTS_VMFB} - CALLS_SRC ${_CALLS_SRC} - CALLS_VMFB ${_CALLS_VMFB} - TEST_RUNNER ${_RULE_TEST_RUNNER} - TARGET_BACKEND ${_RULE_TARGET_BACKEND} - DRIVER ${_RULE_DRIVER} - COMPILER_FLAGS ${_ASAN_COMPILER_FLAGS} - RUNNER_ARGS ${_RULE_RUNNER_ARGS} - LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} - TEST_DEFINED ${_TEST_DEFINED} - TEST_DISABLED ${_TEST_DISABLED} - ) - # Note we are relying on the fact that the target created by - # iree_e2e_runner_test is _NAME, even though we passed _RULE_NAME to it, - # i.e. we are relying on the prefixing to be identical. - add_dependencies("${_NAME}_asan" "${_NAME}_generated_files") - endif() - - if(IREE_ENABLE_TSAN) - set(_TSAN_COMPILER_FLAGS ${_RULE_COMPILER_FLAGS}) - list(APPEND _TSAN_COMPILER_FLAGS "--iree-llvmcpu-link-embedded=false") - list(APPEND _TSAN_COMPILER_FLAGS "--iree-llvmcpu-sanitize=thread") - iree_e2e_runner_test( - NAME ${_RULE_NAME} - VARIANT_NAME "_tsan" - TESTS_SRC ${_TESTS_SRC} - TESTS_VMFB ${_TESTS_VMFB} - CALLS_SRC ${_CALLS_SRC} - CALLS_VMFB ${_CALLS_VMFB} - TEST_RUNNER ${_RULE_TEST_RUNNER} - TARGET_BACKEND ${_RULE_TARGET_BACKEND} - DRIVER ${_RULE_DRIVER} - COMPILER_FLAGS ${_TSAN_COMPILER_FLAGS} - RUNNER_ARGS ${_RULE_RUNNER_ARGS} - LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} - TEST_DEFINED ${_TEST_DEFINED} - TEST_DISABLED ${_TEST_DISABLED} - ) - # Note we are relying on the fact that the target created by - # iree_e2e_runner_test is _NAME, even though we passed _RULE_NAME to it, - # i.e. we are relying on the prefixing to be identical. - add_dependencies("${_NAME}_tsan" "${_NAME}_generated_files") - endif() - endif() -endfunction() - - -# iree_generated_e2e_runner_test() -# -# Creates a set of iree_single_backend_e2e_runner_test's differing -# by target backend and driver. -# -# Mirrors the bzl rule of the same name. -# -# One test is generated per source and backend/driver pair. -# Parameters: -# NAME: Name of the target -# TEST_TYPE: Type of test (Currently, matmul and conv are supported). -# GENERATOR: Program (at the moment, must be Python3) to run to generate the -# source file (and possibly a trace file and module path). It will be -# invoked with the following standard flags, in addition to GENERATOR_ARGS: -# --output_${TEST_TYPE}_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_${TEST_TYPE}.mlir -# --output_calls_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_calls.mlir -# GENERATOR_ARGS: additional args to pass to the generator program. -# TARGET_BACKENDS: backends to compile the module for. These form pairs with -# the DRIVERS argument (due to cmake limitations they are separate list -# arguments). The lengths must exactly match. If no backends or drivers are -# specified, a test will be generated for every supported pair. -# DRIVERS: drivers to run the module with. These form pairs with the -# TARGET_BACKENDS argument (due to cmake limitations they are separate list -# arguments). The lengths must exactly match. If no backends or drivers are -# specified, a test will be generated for every supported pair. -# COMPILER_FLAGS: additional flags to pass to the compiler. Bytecode output -# format and backend flags are passed automatically. -# RUNNER_ARGS: additional args to pass to the trace-runner program. The driver -# and input file flags are passed automatically. -# LABELS: Additional labels to apply to the test. The package path and -# "driver=${DRIVER}" are added automatically. -# TEST_RUNNER: trace-runner program to run. -# TARGET_CPU_FEATURES_VARIANTS:list of target cpu features variants. Each -# entry is either "default" for the architecture defaults, or a colon- -# separated triple "arch:name:cpu_features" where "arch" filters -# for a target CPU architecture (in IREE_ARCH format), "name" is a -# short name for the CPU features set (used to generate target names) -# and cpu_features is a comma-separated list of LLVM target attributes -# to enable. Example: -# x86_64:avx2_fma:+avx,+avx2,+fma -function(iree_generated_e2e_runner_test) - if(NOT IREE_BUILD_TESTS) - return() - endif() - - cmake_parse_arguments( - _RULE - "" - "NAME;TEST_TYPE;GENERATOR;TEST_RUNNER" - "TARGET_BACKENDS;DRIVERS;GENERATOR_ARGS;COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES_VARIANTS" - ${ARGN} - ) - - iree_is_bytecode_module_test_excluded_by_labels(_EXCLUDED_BY_LABELS "${_RULE_LABELS}") - if(_EXCLUDED_BY_LABELS) - return() - endif() - - if(_RULE_TARGET_CPU_FEATURES_VARIANTS) - set(_TARGET_CPU_FEATURES_VARIANTS "${_RULE_TARGET_CPU_FEATURES_VARIANTS}") - else() - set(_TARGET_CPU_FEATURES_VARIANTS "default") - endif() - - - if(NOT DEFINED _RULE_TARGET_BACKENDS AND NOT DEFINED _RULE_DRIVERS) - set(_RULE_TARGET_BACKENDS "vmvx" "vulkan-spirv" "llvm-cpu") - set(_RULE_DRIVERS "local-task" "vulkan" "local-task") - endif() - - list(LENGTH _RULE_TARGET_BACKENDS _TARGET_BACKEND_COUNT) - list(LENGTH _RULE_DRIVERS _DRIVER_COUNT) - - if(NOT _TARGET_BACKEND_COUNT EQUAL _DRIVER_COUNT) - message(SEND_ERROR - "TARGET_BACKENDS count ${_TARGET_BACKEND_COUNT} does not match DRIVERS count ${_DRIVER_COUNT}") - endif() - - math(EXPR _MAX_INDEX "${_TARGET_BACKEND_COUNT} - 1") - foreach(_INDEX RANGE "${_MAX_INDEX}") - list(GET _RULE_TARGET_BACKENDS ${_INDEX} _TARGET_BACKEND) - list(GET _RULE_DRIVERS ${_INDEX} _DRIVER) - foreach(_VARIANT_STRING IN LISTS _TARGET_CPU_FEATURES_VARIANTS) - parse_target_cpu_features_variant("${_VARIANT_STRING}" - _ENABLED _TARGET_CPU_FEATURES_NAME _TARGET_CPU_FEATURES) - if(NOT _ENABLED) - # The current entry is disabled on the target CPU architecture. - continue() - endif() - set(_TARGET_CPU_FEATURES_SUFFIX "") - set(_LABELS "${_RULE_LABELS}") - if (_TARGET_CPU_FEATURES_NAME) - set(_TARGET_CPU_FEATURES_SUFFIX "_${_TARGET_CPU_FEATURES_NAME}") - list(APPEND _LABELS "cpu_features=${_TARGET_CPU_FEATURES_NAME}") - endif() - iree_single_backend_e2e_runner_test( - NAME - "${_RULE_NAME}_${_TARGET_BACKEND}_${_DRIVER}${_TARGET_CPU_FEATURES_SUFFIX}" - TEST_TYPE - ${_RULE_TEST_TYPE} - GENERATOR - ${_RULE_GENERATOR} - GENERATOR_ARGS - ${_RULE_GENERATOR_ARGS} - TEST_RUNNER - ${_RULE_TEST_RUNNER} - TARGET_BACKEND - ${_TARGET_BACKEND} - DRIVER - ${_DRIVER} - COMPILER_FLAGS - ${_RULE_COMPILER_FLAGS} - RUNNER_ARGS - ${_RULE_RUNNER_ARGS} - LABELS - ${_LABELS} - TARGET_CPU_FEATURES - ${_TARGET_CPU_FEATURES} - ) - endforeach() - endforeach() -endfunction() diff --git a/linalg_ops/iree_test_suites_native_test.cmake b/linalg_ops/iree_test_suites_native_test.cmake new file mode 100644 index 0000000..a2c3a8f --- /dev/null +++ b/linalg_ops/iree_test_suites_native_test.cmake @@ -0,0 +1,114 @@ +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +include(CMakeParseArguments) + +# iree_test_suites_native_test() +# +# Creates a test that runs the specified binary with the specified arguments. +# +# Parameters: +# NAME: name of target +# DRIVER: If specified, will pass --device=DRIVER to the test binary and adds +# a driver label to the test. +# TODO(scotttodd): Remove automatic args/labels, push those up a level +# DATA: Additional input files needed by the test binary. +# ARGS: additional arguments passed to the test binary. +# --device=DRIVER is automatically added if specified. +# File-related arguments can be passed with `{{}}` locator, +# e.g., --input=@{{foo.npy}}. The locator is used to portably +# pass the file arguments to tests and add the file to DATA. +# SRC: binary target to run as the test. +# WILL_FAIL: The target will run, but its pass/fail status will be inverted. +# DISABLED: The target will be skipped and its status will be 'Not Run'. +# LABELS: Additional labels to apply to the test. The package path is added +# automatically. +# TIMEOUT: Test target timeout in seconds. +# +# Note: the DATA argument is not actually adding dependencies because CMake +# doesn't have a good way to specify a data dependency for a test. +# +# Usage: +# iree_cc_binary( +# NAME +# requires_args_to_run +# ... +# ) +# iree_test_suites_native_test( +# NAME +# requires_args_to_run_test +# ARGS +# --do-the-right-thing +# SRC +# ::requires_args_to_run +# ) + +function(iree_test_suites_native_test) + cmake_parse_arguments( + _RULE + "" + "NAME;SRC;DRIVER;WILL_FAIL;DISABLED" + "ARGS;LABELS;DATA;TIMEOUT" + ${ARGN} + ) + + # Prefix the test with the package name, so we get: iree_package_name + iree_package_name(_PACKAGE_NAME) + set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}") + iree_package_ns(_PACKAGE_NS) + iree_package_path(_PACKAGE_PATH) + set(_TEST_NAME "${_PACKAGE_PATH}/${_RULE_NAME}") + + # If driver was specified, add the corresponding test arg and label. + if(DEFINED _RULE_DRIVER) + list(APPEND _RULE_ARGS "--device=${_RULE_DRIVER}") + list(APPEND _RULE_LABELS "driver=${_RULE_DRIVER}") + endif() + + # Detect file location with `{{}}` and handle its portability for all entries + # in `_RULE_ARGS`. + foreach(_ARG ${_RULE_ARGS}) + string(REGEX MATCH ".*{{(.+)}}" _FILE_ARG "${_ARG}") + if(_FILE_ARG) + set(_FILE_PATH ${CMAKE_MATCH_1}) + list(APPEND _RULE_DATA "${_FILE_PATH}") + # remove the `{{}}` from `_ARG` and append it to `_TEST_ARGS`. + string(REGEX REPLACE "{{.+}}" "" _FILE_FLAG_PREFIX "${_ARG}") + list(APPEND _TEST_ARGS "${_FILE_FLAG_PREFIX}${_FILE_PATH}") + else() # naive append + list(APPEND _TEST_ARGS "${_ARG}") + endif(_FILE_ARG) + endforeach(_ARG) + + # Replace binary passed by relative ::name with iree::package::name + string(REGEX REPLACE "^::" "${_PACKAGE_NS}::" _SRC_TARGET ${_RULE_SRC}) + + add_test( + NAME + ${_TEST_NAME} + COMMAND + "$" + ${_TEST_ARGS} + ) + + # File extension cmake uses for the target platform. + set_property(TEST ${TEST_NAME} APPEND PROPERTY ENVIRONMENT "IREE_DYLIB_EXT=${CMAKE_SHARED_LIBRARY_SUFFIX}") + + if (NOT DEFINED _RULE_TIMEOUT) + set(_RULE_TIMEOUT 60) + endif() + + list(APPEND _RULE_LABELS "${_PACKAGE_PATH}") + set_property(TEST ${_TEST_NAME} PROPERTY LABELS "${_RULE_LABELS}") + set_property(TEST "${_TEST_NAME}" PROPERTY REQUIRED_FILES "${_RULE_DATA}") + set_property(TEST ${_TEST_NAME} PROPERTY TIMEOUT ${_RULE_TIMEOUT}) + if(_RULE_WILL_FAIL) + set_property(TEST ${_TEST_NAME} PROPERTY WILL_FAIL ${_RULE_WILL_FAIL}) + endif() + if(_RULE_DISABLED) + set_property(TEST ${_TEST_NAME} PROPERTY DISABLED ${_RULE_DISABLED}) + endif() +endfunction() diff --git a/linalg_ops/iree_test_suites_runner_test.cmake b/linalg_ops/iree_test_suites_runner_test.cmake new file mode 100644 index 0000000..fc8ccfc --- /dev/null +++ b/linalg_ops/iree_test_suites_runner_test.cmake @@ -0,0 +1,100 @@ +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +include(CMakeParseArguments) + +# iree_test_suites_runner_test() +# +# Creates a test using a specified test runner program for the specified +# test files. +# +# Parameters: +# NAME: Name of the target +# TESTS_SRC: MLIR source file to be compiled to an IREE module. +# CALLS_SRC: MLIR source file with calls to be compiled to an IREE module. +# TEST_RUNNER: Test runner program. +# TARGET_BACKEND: Target backend to compile for. +# DRIVER: Driver to run the module with. +# COMPILER_ARGS: additional args to pass to the compiler. +# Target backend flags are passed automatically. +# RUNNER_ARGS: Additional args to pass to the runner program. +# The device and input file flags are passed automatically. +# LABELS: Additional labels to apply to the test. +# "driver=${DRIVER}" is added automatically. +function(iree_test_suites_runner_test) + cmake_parse_arguments( + _RULE + "" + "NAME;TESTS_SRC;CALLS_SRC;TEST_RUNNER;TARGET_BACKEND;DRIVER" + "COMPILER_ARGS;RUNNER_ARGS;LABELS" + ${ARGN} + ) + + iree_package_name(_PACKAGE_NAME) + set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}") + + set(_BASE_COMPILER_FLAGS + "--iree-hal-target-backends=${_RULE_TARGET_BACKEND}" + ) + + set(_TESTS_VMFB "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}.vmfb") + set(_CALLS_VMFB "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}_calls.vmfb") + + iree_bytecode_module( + NAME + "${_RULE_NAME}_module" + MODULE_FILE_NAME + "${_TESTS_VMFB}" + SRC + "${_RULE_TESTS_SRC}" + FLAGS + "${_BASE_COMPILER_FLAGS}" + "${_RULE_COMPILER_FLAGS}" + ) + iree_bytecode_module( + NAME + "${_RULE_NAME}_calls_module" + MODULE_FILE_NAME + "${_CALLS_VMFB}" + SRC + "${_RULE_CALLS_SRC}" + FLAGS + "${_BASE_COMPILER_FLAGS}" + "${_RULE_COMPILER_FLAGS}" + ) + + # A target specifically for the test. We could combine this with the above, + # but we want that one to get pulled into iree_bytecode_module. + add_custom_target("${_NAME}" ALL) + add_dependencies( + "${_NAME}" + "${_NAME}_module" + "${_NAME}_calls_module" + "${_RULE_TEST_RUNNER}" + ) + + add_dependencies(iree-test-suites-linalg-ops-deps "${_NAME}") + + iree_test_suites_native_test( + NAME + "${_RULE_NAME}${_RULE_VARIANT_NAME}" + DRIVER + "${_RULE_DRIVER}" + SRC + "${_RULE_TEST_RUNNER}" + DATA + ${_TESTS_VMFB} + ${_CALLS_VMFB} + ARGS + "--module={{${_TESTS_VMFB}}}" + "--module={{${_CALLS_VMFB}}}" + ${_RULE_RUNNER_ARGS} + LABELS + ${_RULE_LABELS} + DISABLED + ${_RULE_TEST_DISABLED} + ) +endfunction() diff --git a/linalg_ops/matmul/CMakeLists.txt b/linalg_ops/matmul/CMakeLists.txt index 346e251..b3d6a70 100644 --- a/linalg_ops/matmul/CMakeLists.txt +++ b/linalg_ops/matmul/CMakeLists.txt @@ -1,2036 +1,193 @@ -################################################################################ -# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # -# tests/e2e/matmul/BUILD.bazel # -# # -# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # -# CMake-only content. # -# # -# To disable autogeneration for this file entirely, delete this header. # -################################################################################ - -iree_add_all_subdirs() - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_arm_sme_nondt_f32_small_transpose_lhs_peel - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)" - "--iree-llvmcpu-vector-pproc-strategy=peel" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_arm_sme_nondt_f32_small_transpose_lhs - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_arm_sme_nondt_f32_small_peel - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-llvmcpu-vector-pproc-strategy=peel" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_arm_sme_nondt_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_arm_sme_nondt_f32_large_transpose_lhs_peel - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)" - "--iree-llvmcpu-vector-pproc-strategy=peel" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_arm_sme_nondt_f32_large_transpose_lhs - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_arm_sme_nondt_f32_large_peel - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-llvmcpu-vector-pproc-strategy=peel" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_arm_sme_nondt_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_i8_i32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_i8_i32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_f32_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_f32_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_f16_f16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_f16_f16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_f16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_f16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_bf16_bf16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_bf16_bf16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_bf16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_bf16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_i8_i32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_i8_i32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_f32_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_f32_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_f16_f16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_f16_f16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_f16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_f16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_bf16_bf16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_bf16_bf16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_bf16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_dt_uk_bf16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_i8_i32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_i8_i32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_f32_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_f32_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_f16_f16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_f16_f16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_f16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_f16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_bf16_bf16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_bf16_bf16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_bf16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_bf16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_i8_i32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_i8_i32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_f32_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_f32_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_f16_f16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_f16_f16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_f16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_f16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_bf16_bf16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_bf16_bf16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_bf16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cpu_experimental_dt_uk_bf16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - matmul_vmvx_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "vmvx" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - -) - -iree_generated_e2e_runner_test( - NAME - matmul_vmvx_experimental_dt_uk_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "vmvx" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - -) - -iree_generated_e2e_runner_test( - NAME - matmul_cuda_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - -) - -iree_generated_e2e_runner_test( - NAME - matmul_spirv_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "vulkan-spirv" - DRIVERS - "vulkan" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - -) - -iree_generated_e2e_runner_test( - NAME - matmul_vmvx_dt_uk_i8_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "vmvx" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-vmvx-enable-microkernels" - "--iree-opt-data-tiling" -) - -iree_generated_e2e_runner_test( - NAME - matmul_vmvx_dt_uk_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "vmvx" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-vmvx-enable-microkernels" - "--iree-opt-data-tiling" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cuda_f32_large_unaligned - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=gpu_large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-cuda-target=sm_80" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-sm80" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cuda_f16_large_unaligned - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=gpu_large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-cuda-target=sm_80" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-sm80" -) - -iree_generated_e2e_runner_test( - NAME - matmul_cuda_f32_large_splitk - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-dispatch-creation-split-matmul-reduction=4" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" - "noriscv" -) - -### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### - -# To distinguish between CDNA(gfx9) and RDNA3(gfx11) -if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9") - -unset(IREE_HIP_TEST_COMPILER_FLAGS) -list(APPEND IREE_HIP_TEST_COMPILER_FLAGS - "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}" -) - -if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx94") - -# I8 Intrinsics has different layout on CDNA3/gfx94x, -# and only CDNA3/gfx94x has F8 intrinsics. - -iree_generated_e2e_runner_test( - NAME - matmul_cdna_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-cdna3" -) -endif() - -elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11") - -unset(IREE_HIP_TEST_COMPILER_FLAGS) -list(APPEND IREE_HIP_TEST_COMPILER_FLAGS - "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}" -) - -iree_generated_e2e_runner_test( - NAME - matmul_rdna3_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree-test-suites_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-rdna3" -) - -endif() +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# TODO(scotttodd): add filtering here, in the helper functions, or in ctest to +# choose which tests to compile and run + +set(_SIZES) +list(APPEND _SIZES "large") +list(APPEND _SIZES "small") + +############################################################################### +# +# CPU - llvm-cpu on local-task, default flags. +# +############################################################################### + +set(_DTYPES) +list(APPEND _DTYPES "i8_into_i32") +list(APPEND _DTYPES "f32_into_f32") +list(APPEND _DTYPES "f16_into_f16") +list(APPEND _DTYPES "f16_into_f32") +list(APPEND _DTYPES "bf16_into_bf16") +list(APPEND _DTYPES "bf16_into_f32") +# list(APPEND _DTYPES "f8E4M3FNUZ_into_f32") # Unsupported data type. +foreach(_DTYPE IN LISTS _DTYPES) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + matmul_cpu_${_DTYPE}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-matmul-test + TARGET_BACKEND + "llvm-cpu" + DRIVER + "local-task" + COMPILER_ARGS + RUNNER_ARGS + LABELS + ) + endforeach() +endforeach() + +############################################################################### +# +# CPU - vmvx on local-task, default flags. +# +############################################################################### + +set(_DTYPES) +list(APPEND _DTYPES "i8_into_i32") +list(APPEND _DTYPES "f32_into_f32") +# list(APPEND _DTYPES "f16_into_f16") # Unsupported data type. +# list(APPEND _DTYPES "f16_into_f32") # Unsupported data type. +# list(APPEND _DTYPES "bf16_into_bf16") # Unsupported data type. +# list(APPEND _DTYPES "bf16_into_f32") # Unsupported data type. +# list(APPEND _DTYPES "f8E4M3FNUZ_into_f32") # Unsupported data type. +foreach(_DTYPE IN LISTS _DTYPES) + # Note: not running large tests on vmvx, too slow. + set(_SIZE "small") + iree_test_suites_runner_test( + NAME + matmul_vmvx_${_DTYPE}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-matmul-test + TARGET_BACKEND + "vmvx" + DRIVER + "local-task" + COMPILER_ARGS + RUNNER_ARGS + LABELS + ) +endforeach() + +############################################################################### +# +# GPU - Vulkan, default flags. +# +############################################################################### + +set(_DTYPES) +# list(APPEND _DTYPES "i8_into_i32") # Currently failing. +list(APPEND _DTYPES "f32_into_f32") +# list(APPEND _DTYPES "f16_into_f16") # Failing to compile. +# list(APPEND _DTYPES "f16_into_f32") # Failing to compile. +# list(APPEND _DTYPES "bf16_into_bf16") # Failing to compile. +# list(APPEND _DTYPES "bf16_into_f32") # Failing to compile. +# list(APPEND _DTYPES "f8E4M3FNUZ_into_f32") # Unsupported data type. +foreach(_DTYPE IN LISTS _DTYPES) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + matmul_vulkan_${_DTYPE}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-matmul-test + TARGET_BACKEND + "vulkan-spirv" + DRIVER + "vulkan" + COMPILER_ARGS + RUNNER_ARGS + LABELS + ) + endforeach() +endforeach() + +############################################################################### +# +# GPU - CUDA, default flags. +# +############################################################################### + +set(_DTYPES) +list(APPEND _DTYPES "i8_into_i32") +list(APPEND _DTYPES "f32_into_f32") +# list(APPEND _DTYPES "f16_into_f16") # Timeout running. +list(APPEND _DTYPES "f16_into_f32") +# list(APPEND _DTYPES "bf16_into_bf16") # Timeout running. +list(APPEND _DTYPES "bf16_into_f32") +# list(APPEND _DTYPES "f8E4M3FNUZ_into_f32") # Unsupported data type. +foreach(_DTYPE IN LISTS _DTYPES) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + matmul_cuda_${_DTYPE}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-matmul-test + TARGET_BACKEND + "cuda" + DRIVER + "cuda" + COMPILER_ARGS + RUNNER_ARGS + LABELS + ) + endforeach() +endforeach() + +############################################################################### +# +# GPU - ROCm/HIP, default flags. +# +############################################################################### + +set(_DTYPES) +list(APPEND _DTYPES "i8_into_i32") +list(APPEND _DTYPES "f32_into_f32") +list(APPEND _DTYPES "f16_into_f16") +list(APPEND _DTYPES "f16_into_f32") +list(APPEND _DTYPES "bf16_into_bf16") +list(APPEND _DTYPES "bf16_into_f32") +# list(APPEND _DTYPES "f8E4M3FNUZ_into_f32") # Failing to compile. +foreach(_DTYPE IN LISTS _DTYPES) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + matmul_hip_${_DTYPE}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-matmul-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_ARGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_ARGS + LABELS + ) + endforeach() +endforeach() diff --git a/linalg_ops/matmul/generate_e2e_matmul_tests.py b/linalg_ops/matmul/generate_e2e_matmul_tests.py index 1565c0b..0f6f8e0 100644 --- a/linalg_ops/matmul/generate_e2e_matmul_tests.py +++ b/linalg_ops/matmul/generate_e2e_matmul_tests.py @@ -33,8 +33,6 @@ class MatrixElemTypeId(enum.Enum): class ShapesId(enum.Enum): SMALL = "small" LARGE = "large" - GPU_LARGE = "gpu_large" - GPU_LARGE_ALIGNED = "gpu_large_aligned" # Enumerates ways to construct MLIR tensor types. @@ -45,13 +43,6 @@ class Dynamicity(enum.Enum): MIXED = "mixed" # Randomly mix '?' and values. Example: tensor. -# Enumerates ways to initialize matrix buffer contents. -@enum.unique -class MatrixGenerator(enum.Enum): - ZERO = "zero" # Fill with zeros - RANDOM = "random" # Fill with (deterministic) pseudorandom values. - - # Describes the shape of a matrix multiplication in the usual convention: # the LHS is {m}x{k}, the RHS is {k}x{n}, the accumulator/result is {m}x{n}. # The extra `accumulate` boolean tells whether the matmul is accumulating into @@ -73,9 +64,8 @@ def get_test_shapes(shapes_id: ShapesId): # build and execution latency of tests. The build latency is nearly the # same for all shapes, while execution latency grows cubicly i.e. # linearly with m*k*n. - # 2. Some shapes are commented out: they used to be tested but have been - # disabled to improve the trade-off between test coverage and build - # latency. + # 2. Some shapes may be commented out to improve the trade-off between test + # coverage and build latency. if shapes_id == ShapesId.SMALL: return [ # square matrices. Start by the simplest case of 1x1x1. @@ -107,57 +97,35 @@ def get_test_shapes(shapes_id: ShapesId): ] if shapes_id == ShapesId.LARGE: return [ - # some random large sizes - TestShape(m=123, k=456, n=789, accumulate=True), - TestShape(m=654, k=321, n=234, accumulate=False), - # shapes involving vectors (i.e. most rectangular cases) - TestShape(m=1, k=1000, n=1000, accumulate=True), # large vector*matrix - TestShape(m=1000, k=1000, n=1, accumulate=True), # large matrix*vector - TestShape(m=1000, k=1000, n=1, accumulate=False), # large matrix*vector - # Be conservative in adding larger shapes. They can result in - # high latency tests. If you have to, consider splitting them - # out in a way that constrains the latency impact, e.g. by - # running on fewer backends/drivers or with fewer generators - # (see get_test_generators). - ] - if shapes_id == ShapesId.GPU_LARGE_ALIGNED: - return [ + # Large aligned sizes. TestShape(m=512, k=128, n=512, accumulate=True), TestShape(m=512, k=128, n=512, accumulate=False), - ] - if shapes_id == ShapesId.GPU_LARGE: - return [ - # unaligned cases. - TestShape(m=457, k=330, n=512, accumulate=False), - TestShape(m=457, k=330, n=514, accumulate=False), - TestShape(m=438, k=330, n=514, accumulate=False), - TestShape(m=540, k=332, n=516, accumulate=False), TestShape(m=1000, k=4, n=512, accumulate=False), TestShape(m=4, k=1000, n=512, accumulate=False), TestShape(m=512, k=1000, n=4, accumulate=False), TestShape(m=512, k=128, n=500, accumulate=False), + # Large unaligned sizes. + # TestShape(m=123, k=456, n=789, accumulate=True), # Failing on Vulkan + TestShape(m=457, k=330, n=512, accumulate=False), + TestShape(m=457, k=330, n=514, accumulate=False), + TestShape(m=438, k=330, n=514, accumulate=False), + TestShape(m=540, k=332, n=516, accumulate=False), + TestShape(m=654, k=321, n=234, accumulate=False), TestShape(m=457, k=160, n=512, accumulate=False), TestShape(m=512, k=330, n=512, accumulate=False), + # Shapes involving vectors (i.e. most rectangular cases). + TestShape(m=1, k=1000, n=1000, accumulate=True), # large vector*matrix + TestShape(m=1000, k=1000, n=1, accumulate=True), # large matrix*vector + TestShape(m=1000, k=1000, n=1, accumulate=False), # large matrix*vector + # Be conservative in adding larger shapes. They can result in + # high latency tests. If you have to, consider splitting them + # out in a way that constrains the latency impact, e.g. by + # running on fewer backends/drivers. ] raise ValueError(shapes_id) -# Returns the list of Dynamicity's to use for the collection of shapes -# identified by shapes_id. -def get_dynamicities(shapes_id: ShapesId): - if shapes_id == ShapesId.GPU_LARGE or shapes_id == ShapesId.GPU_LARGE_ALIGNED: - return [ - Dynamicity.STATIC, - ] - else: - return [ - Dynamicity.DYNAMIC, - Dynamicity.STATIC, - ] - raise ValueError(shapes_id) - - # A shape dimension value, i.e. a size value that could appear in a MLIR type # such as 'tensor'. None means a dynamic size, similar to '?' in MLIR. @dataclasses.dataclass @@ -312,14 +280,14 @@ def generate_function( compute = ( f" %lhs_casted = {castback_op} %lhs: {lhs_tensor_type} to {compute_lhs_tensor_type}\n" f" %rhs_casted = {castback_op} %rhs: {rhs_tensor_type} to {compute_rhs_tensor_type}\n" - f" %result = {op_name} ins(%lhs_casted, %rhs_casted: {compute_lhs_tensor_type}, {compute_rhs_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}" + f" %result = {op_name} ins(%lhs_casted, %rhs_casted: {compute_lhs_tensor_type}, {compute_rhs_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n" ) if shape.accumulate: signature = f"({lhs_tensor_type}, {rhs_tensor_type}, {acc_tensor_type}) -> {acc_tensor_type}" import_declaration = f"func.func private @module.{func_name}(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view" func_definition = func_definition + ( f"func.func @{func_name}(%lhs: {lhs_tensor_type}, %rhs: {rhs_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n" - f"{compute}\n" + f"{compute}" f" return %result: {acc_tensor_type}\n" f"}}\n" ) @@ -375,17 +343,6 @@ class TestCall: pseudorandom_generator_seed = 1 -def contents_generator_tag(generator: MatrixGenerator): - if generator == MatrixGenerator.ZERO: - return "" - elif generator == MatrixGenerator.RANDOM: - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed + 1 - return f"!tag:iree:fully_specified_pseudorandom {pseudorandom_generator_seed}" - else: - raise ValueError(generator) - - # Generate a matrix function argument of the given size as `%name`. def generate_random_matrix( name: str, @@ -483,7 +440,7 @@ def generate( calls = [] for shape in get_test_shapes(shapes_id): - for dynamicity in get_dynamicities(shapes_id): + for dynamicity in [Dynamicity.DYNAMIC, Dynamicity.STATIC]: function = generate_function( lhs_rhs_type, acc_type, @@ -558,13 +515,20 @@ def parse_arguments(): def write_code_file(functions, filename): + # TODO(scotttodd): write "GENERATED BY" comment to the top of the file + with open(filename, "w") as file: for function in functions.values(): file.write(function.definition + "\n") def write_calls_file(functions, calls, filename, requirements): + # TODO(scotttodd): write "GENERATED BY" comment to the top of the file + # Module-level reflection information used to control the test tool. + # TODO(scotttodd): drop this and whatever logic in the test tool used it + # multiple backends should be able to use the same input IR, so the + # input IR shouldn't need things like CPU features in it reflection = "" if requirements: reflection = ( diff --git a/linalg_ops/matmul/generate_test_mlir_files.sh b/linalg_ops/matmul/generate_test_mlir_files.sh new file mode 100755 index 0000000..0f797da --- /dev/null +++ b/linalg_ops/matmul/generate_test_mlir_files.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# This script runs generate_e2e_matmul_tests for all argument combinations that +# we are interested in testing. +# +# The output is a 'generated' folder with contents like this: +# linalg_ops/ +# matmul/ +# generated/ +# f16_into_f16/ +# matmul_f16_into_f16_large_calls.mlir +# matmul_f16_into_f16_large.mlir +# matmul_f16_into_f16_small_calls.mlir +# matmul_f16_into_f16_small.mlir +# ... +# matmul_transpose_b_f16_into_f16_large_calls.mlir +# matmul_transpose_b_f16_into_f16_large.mlir +# matmul_transpose_b_f16_into_f16_small_calls.mlir +# matmul_transpose_b_f16_into_f16_small.mlir +# f16_into_f32/ +# ... +# f32_into_f32 +# ... +# ... +# +# Usage: +# generate_test_mlir_files.sh + +set -euo pipefail + +this_dir="$(cd $(dirname $0) && pwd)" +generated_dir_root="${this_dir}/generated" + +# Reset generated directory. +rm -rf ${generated_dir_root?} +mkdir -p ${generated_dir_root?} + +shapes=( + "small" + "large" +) + +# lhs_rhs_type;accumulator_type +type_combinations=( + "i8;i32" + "f32;f32" + "f16;f16" + "f16;f32" + "bf16;bf16" + "bf16;f32" + "f8E4M3FNUZ;f32" +) + +for type_combination in ${type_combinations[@]}; do + IFS=";" read -r -a types <<< "${type_combination}" + lhs_rhs_type="${types[0]}" + acc_type="${types[1]}" + type_name="${lhs_rhs_type}_into_${acc_type}" + + type_combination_dir="${generated_dir_root}/${type_name}" + mkdir -p ${type_combination_dir} + + for shape in ${shapes[@]}; do + echo "Generating matmul test files for ${type_name}_${shape}" + + name="matmul_${type_name}_${shape}" + python ${this_dir}/generate_e2e_matmul_tests.py \ + --output_matmul_mlir=${type_combination_dir}/${name}.mlir \ + --output_calls_mlir=${type_combination_dir}/${name}_calls.mlir \ + --lhs_rhs_type=${lhs_rhs_type} \ + --acc_type=${acc_type} \ + --shapes=${shape} + + name="matmul_transpose_b_${type_name}_${shape}" + python ${this_dir}/generate_e2e_matmul_tests.py \ + --output_matmul_mlir=${type_combination_dir}/${name}.mlir \ + --output_calls_mlir=${type_combination_dir}/${name}_calls.mlir \ + --lhs_rhs_type=${lhs_rhs_type} \ + --acc_type=${acc_type} \ + --shapes=${shape} \ + --transpose_rhs + done +done diff --git a/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_large.mlir b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_large.mlir new file mode 100644 index 0000000..52e78d6 --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xbf16_times_128x512xbf16_into_512x512xbf16(%lhs: tensor<512x128xbf16>, %rhs: tensor<128x512xbf16>, %acc: tensor<512x512xbf16>) -> tensor<512x512xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<128x512xbf16>) outs(%acc: tensor<512x512xbf16>) -> tensor<512x512xbf16> + return %result: tensor<512x512xbf16> +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xbf16_times_128x512xbf16_into_512x512xbf16(%lhs: tensor<512x128xbf16>, %rhs: tensor<128x512xbf16>) -> tensor<512x512xbf16> { + %init_acc = tensor.empty() : tensor<512x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<512x512xbf16>) -> tensor<512x512xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<128x512xbf16>) outs(%acc: tensor<512x512xbf16>) -> tensor<512x512xbf16> + return %result: tensor<512x512xbf16> +} + +func.func @matmul_1000x4xbf16_times_4x512xbf16_into_1000x512xbf16(%lhs: tensor<1000x4xbf16>, %rhs: tensor<4x512xbf16>) -> tensor<1000x512xbf16> { + %init_acc = tensor.empty() : tensor<1000x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<1000x512xbf16>) -> tensor<1000x512xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x4xbf16>, tensor<4x512xbf16>) outs(%acc: tensor<1000x512xbf16>) -> tensor<1000x512xbf16> + return %result: tensor<1000x512xbf16> +} + +func.func @matmul_4x1000xbf16_times_1000x512xbf16_into_4x512xbf16(%lhs: tensor<4x1000xbf16>, %rhs: tensor<1000x512xbf16>) -> tensor<4x512xbf16> { + %init_acc = tensor.empty() : tensor<4x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<4x512xbf16>) -> tensor<4x512xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x1000xbf16>, tensor<1000x512xbf16>) outs(%acc: tensor<4x512xbf16>) -> tensor<4x512xbf16> + return %result: tensor<4x512xbf16> +} + +func.func @matmul_512x1000xbf16_times_1000x4xbf16_into_512x4xbf16(%lhs: tensor<512x1000xbf16>, %rhs: tensor<1000x4xbf16>) -> tensor<512x4xbf16> { + %init_acc = tensor.empty() : tensor<512x4xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<512x4xbf16>) -> tensor<512x4xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x1000xbf16>, tensor<1000x4xbf16>) outs(%acc: tensor<512x4xbf16>) -> tensor<512x4xbf16> + return %result: tensor<512x4xbf16> +} + +func.func @matmul_512x128xbf16_times_128x500xbf16_into_512x500xbf16(%lhs: tensor<512x128xbf16>, %rhs: tensor<128x500xbf16>) -> tensor<512x500xbf16> { + %init_acc = tensor.empty() : tensor<512x500xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<512x500xbf16>) -> tensor<512x500xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<128x500xbf16>) outs(%acc: tensor<512x500xbf16>) -> tensor<512x500xbf16> + return %result: tensor<512x500xbf16> +} + +func.func @matmul_457x330xbf16_times_330x512xbf16_into_457x512xbf16(%lhs: tensor<457x330xbf16>, %rhs: tensor<330x512xbf16>) -> tensor<457x512xbf16> { + %init_acc = tensor.empty() : tensor<457x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<457x512xbf16>) -> tensor<457x512xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xbf16>, tensor<330x512xbf16>) outs(%acc: tensor<457x512xbf16>) -> tensor<457x512xbf16> + return %result: tensor<457x512xbf16> +} + +func.func @matmul_457x330xbf16_times_330x514xbf16_into_457x514xbf16(%lhs: tensor<457x330xbf16>, %rhs: tensor<330x514xbf16>) -> tensor<457x514xbf16> { + %init_acc = tensor.empty() : tensor<457x514xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<457x514xbf16>) -> tensor<457x514xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xbf16>, tensor<330x514xbf16>) outs(%acc: tensor<457x514xbf16>) -> tensor<457x514xbf16> + return %result: tensor<457x514xbf16> +} + +func.func @matmul_438x330xbf16_times_330x514xbf16_into_438x514xbf16(%lhs: tensor<438x330xbf16>, %rhs: tensor<330x514xbf16>) -> tensor<438x514xbf16> { + %init_acc = tensor.empty() : tensor<438x514xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<438x514xbf16>) -> tensor<438x514xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<438x330xbf16>, tensor<330x514xbf16>) outs(%acc: tensor<438x514xbf16>) -> tensor<438x514xbf16> + return %result: tensor<438x514xbf16> +} + +func.func @matmul_540x332xbf16_times_332x516xbf16_into_540x516xbf16(%lhs: tensor<540x332xbf16>, %rhs: tensor<332x516xbf16>) -> tensor<540x516xbf16> { + %init_acc = tensor.empty() : tensor<540x516xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<540x516xbf16>) -> tensor<540x516xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<540x332xbf16>, tensor<332x516xbf16>) outs(%acc: tensor<540x516xbf16>) -> tensor<540x516xbf16> + return %result: tensor<540x516xbf16> +} + +func.func @matmul_654x321xbf16_times_321x234xbf16_into_654x234xbf16(%lhs: tensor<654x321xbf16>, %rhs: tensor<321x234xbf16>) -> tensor<654x234xbf16> { + %init_acc = tensor.empty() : tensor<654x234xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<654x234xbf16>) -> tensor<654x234xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<654x321xbf16>, tensor<321x234xbf16>) outs(%acc: tensor<654x234xbf16>) -> tensor<654x234xbf16> + return %result: tensor<654x234xbf16> +} + +func.func @matmul_457x160xbf16_times_160x512xbf16_into_457x512xbf16(%lhs: tensor<457x160xbf16>, %rhs: tensor<160x512xbf16>) -> tensor<457x512xbf16> { + %init_acc = tensor.empty() : tensor<457x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<457x512xbf16>) -> tensor<457x512xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x160xbf16>, tensor<160x512xbf16>) outs(%acc: tensor<457x512xbf16>) -> tensor<457x512xbf16> + return %result: tensor<457x512xbf16> +} + +func.func @matmul_512x330xbf16_times_330x512xbf16_into_512x512xbf16(%lhs: tensor<512x330xbf16>, %rhs: tensor<330x512xbf16>) -> tensor<512x512xbf16> { + %init_acc = tensor.empty() : tensor<512x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<512x512xbf16>) -> tensor<512x512xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x330xbf16>, tensor<330x512xbf16>) outs(%acc: tensor<512x512xbf16>) -> tensor<512x512xbf16> + return %result: tensor<512x512xbf16> +} + +func.func @matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xbf16(%lhs: tensor<1x1000xbf16>, %rhs: tensor<1000x1000xbf16>, %acc: tensor<1x1000xbf16>) -> tensor<1x1000xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1000xbf16>, tensor<1000x1000xbf16>) outs(%acc: tensor<1x1000xbf16>) -> tensor<1x1000xbf16> + return %result: tensor<1x1000xbf16> +} + +func.func @matmul_accumulate_1000x1000xbf16_times_1000x1xbf16_into_1000x1xbf16(%lhs: tensor<1000x1000xbf16>, %rhs: tensor<1000x1xbf16>, %acc: tensor<1000x1xbf16>) -> tensor<1000x1xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xbf16>, tensor<1000x1xbf16>) outs(%acc: tensor<1000x1xbf16>) -> tensor<1000x1xbf16> + return %result: tensor<1000x1xbf16> +} + +func.func @matmul_1000x1000xbf16_times_1000x1xbf16_into_1000x1xbf16(%lhs: tensor<1000x1000xbf16>, %rhs: tensor<1000x1xbf16>) -> tensor<1000x1xbf16> { + %init_acc = tensor.empty() : tensor<1000x1xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<1000x1xbf16>) -> tensor<1000x1xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xbf16>, tensor<1000x1xbf16>) outs(%acc: tensor<1000x1xbf16>) -> tensor<1000x1xbf16> + return %result: tensor<1000x1xbf16> +} + diff --git a/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_large_calls.mlir b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_large_calls.mlir new file mode 100644 index 0000000..c2e72e9 --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xbf16_times_128x512xbf16_into_512x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xbf16_times_128x512xbf16_into_512x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xbf16_times_4x512xbf16_into_1000x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xbf16_times_1000x512xbf16_into_4x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xbf16_times_1000x4xbf16_into_512x4xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xbf16_times_128x500xbf16_into_512x500xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xbf16_times_330x512xbf16_into_457x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xbf16_times_330x514xbf16_into_457x514xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xbf16_times_330x514xbf16_into_438x514xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xbf16_times_332x516xbf16_into_540x516xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xbf16_times_321x234xbf16_into_654x234xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xbf16_times_160x512xbf16_into_457x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xbf16_times_330x512xbf16_into_512x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xbf16_times_1000x1xbf16_into_1000x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xbf16_times_1000x1xbf16_into_1000x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xbf16_times_128x512xbf16_into_512x512xbf16_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xbf16_times_128x512xbf16_into_512x512xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xbf16_times_128x512xbf16_into_512x512xbf16_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xbf16_times_128x512xbf16_into_512x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xbf16_times_4x512xbf16_into_1000x512xbf16_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xbf16_times_4x512xbf16_into_1000x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xbf16_times_1000x512xbf16_into_4x512xbf16_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xbf16_times_1000x512xbf16_into_4x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xbf16_times_1000x4xbf16_into_512x4xbf16_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xbf16_times_1000x4xbf16_into_512x4xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xbf16_times_128x500xbf16_into_512x500xbf16_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xbf16_times_128x500xbf16_into_512x500xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xbf16_times_330x512xbf16_into_457x512xbf16_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xbf16_times_330x512xbf16_into_457x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xbf16_times_330x514xbf16_into_457x514xbf16_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xbf16_times_330x514xbf16_into_457x514xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xbf16_times_330x514xbf16_into_438x514xbf16_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xbf16_times_330x514xbf16_into_438x514xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xbf16_times_332x516xbf16_into_540x516xbf16_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xbf16_times_332x516xbf16_into_540x516xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xbf16_times_321x234xbf16_into_654x234xbf16_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xbf16_times_321x234xbf16_into_654x234xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xbf16_times_160x512xbf16_into_457x512xbf16_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xbf16_times_160x512xbf16_into_457x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xbf16_times_330x512xbf16_into_512x512xbf16_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xbf16_times_330x512xbf16_into_512x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xbf16_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xbf16_times_1000x1xbf16_into_1000x1xbf16_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xbf16_times_1000x1xbf16_into_1000x1xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xbf16_times_1000x1xbf16_into_1000x1xbf16_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xbf16_times_1000x1xbf16_into_1000x1xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_small.mlir b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_small.mlir new file mode 100644 index 0000000..4537ce7 --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs: tensor<1x1xbf16>, %rhs: tensor<1x1xbf16>, %acc: tensor<1x1xbf16>) -> tensor<1x1xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xbf16>, tensor<1x1xbf16>) outs(%acc: tensor<1x1xbf16>) -> tensor<1x1xbf16> + return %result: tensor<1x1xbf16> +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs: tensor<1x1xbf16>, %rhs: tensor<1x1xbf16>) -> tensor<1x1xbf16> { + %init_acc = tensor.empty() : tensor<1x1xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<1x1xbf16>) -> tensor<1x1xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xbf16>, tensor<1x1xbf16>) outs(%acc: tensor<1x1xbf16>) -> tensor<1x1xbf16> + return %result: tensor<1x1xbf16> +} + +func.func @matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xbf16(%lhs: tensor<2x2xbf16>, %rhs: tensor<2x2xbf16>, %acc: tensor<2x2xbf16>) -> tensor<2x2xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<2x2xbf16>, tensor<2x2xbf16>) outs(%acc: tensor<2x2xbf16>) -> tensor<2x2xbf16> + return %result: tensor<2x2xbf16> +} + +func.func @matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xbf16(%lhs: tensor<4x4xbf16>, %rhs: tensor<4x4xbf16>, %acc: tensor<4x4xbf16>) -> tensor<4x4xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x4xbf16>, tensor<4x4xbf16>) outs(%acc: tensor<4x4xbf16>) -> tensor<4x4xbf16> + return %result: tensor<4x4xbf16> +} + +func.func @matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xbf16(%lhs: tensor<8x8xbf16>, %rhs: tensor<8x8xbf16>, %acc: tensor<8x8xbf16>) -> tensor<8x8xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<8x8xbf16>, tensor<8x8xbf16>) outs(%acc: tensor<8x8xbf16>) -> tensor<8x8xbf16> + return %result: tensor<8x8xbf16> +} + +func.func @matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xbf16(%lhs: tensor<9x9xbf16>, %rhs: tensor<9x9xbf16>, %acc: tensor<9x9xbf16>) -> tensor<9x9xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<9x9xbf16>, tensor<9x9xbf16>) outs(%acc: tensor<9x9xbf16>) -> tensor<9x9xbf16> + return %result: tensor<9x9xbf16> +} + +func.func @matmul_accumulate_6x13xbf16_times_13x3xbf16_into_6x3xbf16(%lhs: tensor<6x13xbf16>, %rhs: tensor<13x3xbf16>, %acc: tensor<6x3xbf16>) -> tensor<6x3xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<6x13xbf16>, tensor<13x3xbf16>) outs(%acc: tensor<6x3xbf16>) -> tensor<6x3xbf16> + return %result: tensor<6x3xbf16> +} + +func.func @matmul_15x37xbf16_times_37x7xbf16_into_15x7xbf16(%lhs: tensor<15x37xbf16>, %rhs: tensor<37x7xbf16>) -> tensor<15x7xbf16> { + %init_acc = tensor.empty() : tensor<15x7xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<15x7xbf16>) -> tensor<15x7xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<15x37xbf16>, tensor<37x7xbf16>) outs(%acc: tensor<15x7xbf16>) -> tensor<15x7xbf16> + return %result: tensor<15x7xbf16> +} + +func.func @matmul_accumulate_81x19xbf16_times_19x41xbf16_into_81x41xbf16(%lhs: tensor<81x19xbf16>, %rhs: tensor<19x41xbf16>, %acc: tensor<81x41xbf16>) -> tensor<81x41xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<81x19xbf16>, tensor<19x41xbf16>) outs(%acc: tensor<81x41xbf16>) -> tensor<81x41xbf16> + return %result: tensor<81x41xbf16> +} + +func.func @matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs: tensor<1x10xbf16>, %rhs: tensor<10x10xbf16>, %acc: tensor<1x10xbf16>) -> tensor<1x10xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xbf16>, tensor<10x10xbf16>) outs(%acc: tensor<1x10xbf16>) -> tensor<1x10xbf16> + return %result: tensor<1x10xbf16> +} + +func.func @matmul_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs: tensor<1x10xbf16>, %rhs: tensor<10x10xbf16>) -> tensor<1x10xbf16> { + %init_acc = tensor.empty() : tensor<1x10xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<1x10xbf16>) -> tensor<1x10xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xbf16>, tensor<10x10xbf16>) outs(%acc: tensor<1x10xbf16>) -> tensor<1x10xbf16> + return %result: tensor<1x10xbf16> +} + +func.func @matmul_accumulate_10x1xbf16_times_1x10xbf16_into_10x10xbf16(%lhs: tensor<10x1xbf16>, %rhs: tensor<1x10xbf16>, %acc: tensor<10x10xbf16>) -> tensor<10x10xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x1xbf16>, tensor<1x10xbf16>) outs(%acc: tensor<10x10xbf16>) -> tensor<10x10xbf16> + return %result: tensor<10x10xbf16> +} + +func.func @matmul_accumulate_10x10xbf16_times_10x1xbf16_into_10x1xbf16(%lhs: tensor<10x10xbf16>, %rhs: tensor<10x1xbf16>, %acc: tensor<10x1xbf16>) -> tensor<10x1xbf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xbf16>, tensor<10x1xbf16>) outs(%acc: tensor<10x1xbf16>) -> tensor<10x1xbf16> + return %result: tensor<10x1xbf16> +} + +func.func @matmul_10x10xbf16_times_10x1xbf16_into_10x1xbf16(%lhs: tensor<10x10xbf16>, %rhs: tensor<10x1xbf16>) -> tensor<10x1xbf16> { + %init_acc = tensor.empty() : tensor<10x1xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<10x1xbf16>) -> tensor<10x1xbf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xbf16>, tensor<10x1xbf16>) outs(%acc: tensor<10x1xbf16>) -> tensor<10x1xbf16> + return %result: tensor<10x1xbf16> +} + diff --git a/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_small_calls.mlir b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_small_calls.mlir new file mode 100644 index 0000000..e577a7c --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_bf16_into_bf16_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xbf16_times_13x3xbf16_into_6x3xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xbf16_times_37x7xbf16_into_15x7xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xbf16_times_19x41xbf16_into_81x41xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xbf16_times_1x10xbf16_into_10x10xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xbf16_times_10x1xbf16_into_10x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xbf16_times_10x1xbf16_into_10x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xbf16_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xbf16_times_1x1xbf16_into_1x1xbf16_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xbf16_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xbf16_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xbf16_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xbf16_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xbf16_times_13x3xbf16_into_6x3xbf16_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xbf16_times_13x3xbf16_into_6x3xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xbf16_times_37x7xbf16_into_15x7xbf16_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xbf16_times_37x7xbf16_into_15x7xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xbf16_times_19x41xbf16_into_81x41xbf16_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xbf16_times_19x41xbf16_into_81x41xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xbf16_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xbf16_times_10x10xbf16_into_1x10xbf16_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xbf16_times_1x10xbf16_into_10x10xbf16_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xbf16_times_1x10xbf16_into_10x10xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xbf16_times_10x1xbf16_into_10x1xbf16_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xbf16_times_10x1xbf16_into_10x1xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xbf16_times_10x1xbf16_into_10x1xbf16_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xbf16_times_10x1xbf16_into_10x1xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_large.mlir b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_large.mlir new file mode 100644 index 0000000..47d9f91 --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xbf16_times_512x128xbf16_into_512x512xbf16(%lhs: tensor<512x128xbf16>, %rhs: tensor<512x128xbf16>, %acc: tensor<512x512xbf16>) -> tensor<512x512xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<512x128xbf16>) outs(%acc: tensor<512x512xbf16>) -> tensor<512x512xbf16> + return %result: tensor<512x512xbf16> +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xbf16_times_512x128xbf16_into_512x512xbf16(%lhs: tensor<512x128xbf16>, %rhs: tensor<512x128xbf16>) -> tensor<512x512xbf16> { + %init_acc = tensor.empty() : tensor<512x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<512x512xbf16>) -> tensor<512x512xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<512x128xbf16>) outs(%acc: tensor<512x512xbf16>) -> tensor<512x512xbf16> + return %result: tensor<512x512xbf16> +} + +func.func @matmul_1000x4xbf16_times_512x4xbf16_into_1000x512xbf16(%lhs: tensor<1000x4xbf16>, %rhs: tensor<512x4xbf16>) -> tensor<1000x512xbf16> { + %init_acc = tensor.empty() : tensor<1000x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<1000x512xbf16>) -> tensor<1000x512xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x4xbf16>, tensor<512x4xbf16>) outs(%acc: tensor<1000x512xbf16>) -> tensor<1000x512xbf16> + return %result: tensor<1000x512xbf16> +} + +func.func @matmul_4x1000xbf16_times_512x1000xbf16_into_4x512xbf16(%lhs: tensor<4x1000xbf16>, %rhs: tensor<512x1000xbf16>) -> tensor<4x512xbf16> { + %init_acc = tensor.empty() : tensor<4x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<4x512xbf16>) -> tensor<4x512xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x1000xbf16>, tensor<512x1000xbf16>) outs(%acc: tensor<4x512xbf16>) -> tensor<4x512xbf16> + return %result: tensor<4x512xbf16> +} + +func.func @matmul_512x1000xbf16_times_4x1000xbf16_into_512x4xbf16(%lhs: tensor<512x1000xbf16>, %rhs: tensor<4x1000xbf16>) -> tensor<512x4xbf16> { + %init_acc = tensor.empty() : tensor<512x4xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<512x4xbf16>) -> tensor<512x4xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x1000xbf16>, tensor<4x1000xbf16>) outs(%acc: tensor<512x4xbf16>) -> tensor<512x4xbf16> + return %result: tensor<512x4xbf16> +} + +func.func @matmul_512x128xbf16_times_500x128xbf16_into_512x500xbf16(%lhs: tensor<512x128xbf16>, %rhs: tensor<500x128xbf16>) -> tensor<512x500xbf16> { + %init_acc = tensor.empty() : tensor<512x500xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<512x500xbf16>) -> tensor<512x500xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<500x128xbf16>) outs(%acc: tensor<512x500xbf16>) -> tensor<512x500xbf16> + return %result: tensor<512x500xbf16> +} + +func.func @matmul_457x330xbf16_times_512x330xbf16_into_457x512xbf16(%lhs: tensor<457x330xbf16>, %rhs: tensor<512x330xbf16>) -> tensor<457x512xbf16> { + %init_acc = tensor.empty() : tensor<457x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<457x512xbf16>) -> tensor<457x512xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xbf16>, tensor<512x330xbf16>) outs(%acc: tensor<457x512xbf16>) -> tensor<457x512xbf16> + return %result: tensor<457x512xbf16> +} + +func.func @matmul_457x330xbf16_times_514x330xbf16_into_457x514xbf16(%lhs: tensor<457x330xbf16>, %rhs: tensor<514x330xbf16>) -> tensor<457x514xbf16> { + %init_acc = tensor.empty() : tensor<457x514xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<457x514xbf16>) -> tensor<457x514xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xbf16>, tensor<514x330xbf16>) outs(%acc: tensor<457x514xbf16>) -> tensor<457x514xbf16> + return %result: tensor<457x514xbf16> +} + +func.func @matmul_438x330xbf16_times_514x330xbf16_into_438x514xbf16(%lhs: tensor<438x330xbf16>, %rhs: tensor<514x330xbf16>) -> tensor<438x514xbf16> { + %init_acc = tensor.empty() : tensor<438x514xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<438x514xbf16>) -> tensor<438x514xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<438x330xbf16>, tensor<514x330xbf16>) outs(%acc: tensor<438x514xbf16>) -> tensor<438x514xbf16> + return %result: tensor<438x514xbf16> +} + +func.func @matmul_540x332xbf16_times_516x332xbf16_into_540x516xbf16(%lhs: tensor<540x332xbf16>, %rhs: tensor<516x332xbf16>) -> tensor<540x516xbf16> { + %init_acc = tensor.empty() : tensor<540x516xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<540x516xbf16>) -> tensor<540x516xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<540x332xbf16>, tensor<516x332xbf16>) outs(%acc: tensor<540x516xbf16>) -> tensor<540x516xbf16> + return %result: tensor<540x516xbf16> +} + +func.func @matmul_654x321xbf16_times_234x321xbf16_into_654x234xbf16(%lhs: tensor<654x321xbf16>, %rhs: tensor<234x321xbf16>) -> tensor<654x234xbf16> { + %init_acc = tensor.empty() : tensor<654x234xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<654x234xbf16>) -> tensor<654x234xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<654x321xbf16>, tensor<234x321xbf16>) outs(%acc: tensor<654x234xbf16>) -> tensor<654x234xbf16> + return %result: tensor<654x234xbf16> +} + +func.func @matmul_457x160xbf16_times_512x160xbf16_into_457x512xbf16(%lhs: tensor<457x160xbf16>, %rhs: tensor<512x160xbf16>) -> tensor<457x512xbf16> { + %init_acc = tensor.empty() : tensor<457x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<457x512xbf16>) -> tensor<457x512xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x160xbf16>, tensor<512x160xbf16>) outs(%acc: tensor<457x512xbf16>) -> tensor<457x512xbf16> + return %result: tensor<457x512xbf16> +} + +func.func @matmul_512x330xbf16_times_512x330xbf16_into_512x512xbf16(%lhs: tensor<512x330xbf16>, %rhs: tensor<512x330xbf16>) -> tensor<512x512xbf16> { + %init_acc = tensor.empty() : tensor<512x512xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<512x512xbf16>) -> tensor<512x512xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x330xbf16>, tensor<512x330xbf16>) outs(%acc: tensor<512x512xbf16>) -> tensor<512x512xbf16> + return %result: tensor<512x512xbf16> +} + +func.func @matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xbf16(%lhs: tensor<1x1000xbf16>, %rhs: tensor<1000x1000xbf16>, %acc: tensor<1x1000xbf16>) -> tensor<1x1000xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1000xbf16>, tensor<1000x1000xbf16>) outs(%acc: tensor<1x1000xbf16>) -> tensor<1x1000xbf16> + return %result: tensor<1x1000xbf16> +} + +func.func @matmul_accumulate_1000x1000xbf16_times_1x1000xbf16_into_1000x1xbf16(%lhs: tensor<1000x1000xbf16>, %rhs: tensor<1x1000xbf16>, %acc: tensor<1000x1xbf16>) -> tensor<1000x1xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xbf16>, tensor<1x1000xbf16>) outs(%acc: tensor<1000x1xbf16>) -> tensor<1000x1xbf16> + return %result: tensor<1000x1xbf16> +} + +func.func @matmul_1000x1000xbf16_times_1x1000xbf16_into_1000x1xbf16(%lhs: tensor<1000x1000xbf16>, %rhs: tensor<1x1000xbf16>) -> tensor<1000x1xbf16> { + %init_acc = tensor.empty() : tensor<1000x1xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<1000x1xbf16>) -> tensor<1000x1xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xbf16>, tensor<1x1000xbf16>) outs(%acc: tensor<1000x1xbf16>) -> tensor<1000x1xbf16> + return %result: tensor<1000x1xbf16> +} + diff --git a/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_large_calls.mlir b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_large_calls.mlir new file mode 100644 index 0000000..28b0f2a --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xbf16_times_512x128xbf16_into_512x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xbf16_times_512x128xbf16_into_512x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xbf16_times_512x4xbf16_into_1000x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xbf16_times_512x1000xbf16_into_4x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xbf16_times_4x1000xbf16_into_512x4xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xbf16_times_500x128xbf16_into_512x500xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xbf16_times_512x330xbf16_into_457x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xbf16_times_514x330xbf16_into_457x514xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xbf16_times_514x330xbf16_into_438x514xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xbf16_times_516x332xbf16_into_540x516xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xbf16_times_234x321xbf16_into_654x234xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xbf16_times_512x160xbf16_into_457x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xbf16_times_512x330xbf16_into_512x512xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xbf16_times_1x1000xbf16_into_1000x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xbf16_times_1x1000xbf16_into_1000x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xbf16_times_512x128xbf16_into_512x512xbf16_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xbf16_times_512x128xbf16_into_512x512xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xbf16_times_512x128xbf16_into_512x512xbf16_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xbf16_times_512x128xbf16_into_512x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xbf16_times_512x4xbf16_into_1000x512xbf16_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xbf16_times_512x4xbf16_into_1000x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xbf16_times_512x1000xbf16_into_4x512xbf16_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xbf16_times_512x1000xbf16_into_4x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xbf16_times_4x1000xbf16_into_512x4xbf16_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xbf16_times_4x1000xbf16_into_512x4xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xbf16_times_500x128xbf16_into_512x500xbf16_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xbf16_times_500x128xbf16_into_512x500xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xbf16_times_512x330xbf16_into_457x512xbf16_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xbf16_times_512x330xbf16_into_457x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xbf16_times_514x330xbf16_into_457x514xbf16_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xbf16_times_514x330xbf16_into_457x514xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xbf16_times_514x330xbf16_into_438x514xbf16_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xbf16_times_514x330xbf16_into_438x514xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xbf16_times_516x332xbf16_into_540x516xbf16_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xbf16_times_516x332xbf16_into_540x516xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xbf16_times_234x321xbf16_into_654x234xbf16_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xbf16_times_234x321xbf16_into_654x234xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xbf16_times_512x160xbf16_into_457x512xbf16_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xbf16_times_512x160xbf16_into_457x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xbf16_times_512x330xbf16_into_512x512xbf16_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xbf16_times_512x330xbf16_into_512x512xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xbf16_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xbf16_times_1x1000xbf16_into_1000x1xbf16_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xbf16_times_1x1000xbf16_into_1000x1xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xbf16_times_1x1000xbf16_into_1000x1xbf16_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xbf16_times_1x1000xbf16_into_1000x1xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_small.mlir b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_small.mlir new file mode 100644 index 0000000..439259c --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs: tensor<1x1xbf16>, %rhs: tensor<1x1xbf16>, %acc: tensor<1x1xbf16>) -> tensor<1x1xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xbf16>, tensor<1x1xbf16>) outs(%acc: tensor<1x1xbf16>) -> tensor<1x1xbf16> + return %result: tensor<1x1xbf16> +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs: tensor<1x1xbf16>, %rhs: tensor<1x1xbf16>) -> tensor<1x1xbf16> { + %init_acc = tensor.empty() : tensor<1x1xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<1x1xbf16>) -> tensor<1x1xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xbf16>, tensor<1x1xbf16>) outs(%acc: tensor<1x1xbf16>) -> tensor<1x1xbf16> + return %result: tensor<1x1xbf16> +} + +func.func @matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xbf16(%lhs: tensor<2x2xbf16>, %rhs: tensor<2x2xbf16>, %acc: tensor<2x2xbf16>) -> tensor<2x2xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<2x2xbf16>, tensor<2x2xbf16>) outs(%acc: tensor<2x2xbf16>) -> tensor<2x2xbf16> + return %result: tensor<2x2xbf16> +} + +func.func @matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xbf16(%lhs: tensor<4x4xbf16>, %rhs: tensor<4x4xbf16>, %acc: tensor<4x4xbf16>) -> tensor<4x4xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x4xbf16>, tensor<4x4xbf16>) outs(%acc: tensor<4x4xbf16>) -> tensor<4x4xbf16> + return %result: tensor<4x4xbf16> +} + +func.func @matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xbf16(%lhs: tensor<8x8xbf16>, %rhs: tensor<8x8xbf16>, %acc: tensor<8x8xbf16>) -> tensor<8x8xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<8x8xbf16>, tensor<8x8xbf16>) outs(%acc: tensor<8x8xbf16>) -> tensor<8x8xbf16> + return %result: tensor<8x8xbf16> +} + +func.func @matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xbf16(%lhs: tensor<9x9xbf16>, %rhs: tensor<9x9xbf16>, %acc: tensor<9x9xbf16>) -> tensor<9x9xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<9x9xbf16>, tensor<9x9xbf16>) outs(%acc: tensor<9x9xbf16>) -> tensor<9x9xbf16> + return %result: tensor<9x9xbf16> +} + +func.func @matmul_accumulate_6x13xbf16_times_3x13xbf16_into_6x3xbf16(%lhs: tensor<6x13xbf16>, %rhs: tensor<3x13xbf16>, %acc: tensor<6x3xbf16>) -> tensor<6x3xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<6x13xbf16>, tensor<3x13xbf16>) outs(%acc: tensor<6x3xbf16>) -> tensor<6x3xbf16> + return %result: tensor<6x3xbf16> +} + +func.func @matmul_15x37xbf16_times_7x37xbf16_into_15x7xbf16(%lhs: tensor<15x37xbf16>, %rhs: tensor<7x37xbf16>) -> tensor<15x7xbf16> { + %init_acc = tensor.empty() : tensor<15x7xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<15x7xbf16>) -> tensor<15x7xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<15x37xbf16>, tensor<7x37xbf16>) outs(%acc: tensor<15x7xbf16>) -> tensor<15x7xbf16> + return %result: tensor<15x7xbf16> +} + +func.func @matmul_accumulate_81x19xbf16_times_41x19xbf16_into_81x41xbf16(%lhs: tensor<81x19xbf16>, %rhs: tensor<41x19xbf16>, %acc: tensor<81x41xbf16>) -> tensor<81x41xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<81x19xbf16>, tensor<41x19xbf16>) outs(%acc: tensor<81x41xbf16>) -> tensor<81x41xbf16> + return %result: tensor<81x41xbf16> +} + +func.func @matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs: tensor<1x10xbf16>, %rhs: tensor<10x10xbf16>, %acc: tensor<1x10xbf16>) -> tensor<1x10xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xbf16>, tensor<10x10xbf16>) outs(%acc: tensor<1x10xbf16>) -> tensor<1x10xbf16> + return %result: tensor<1x10xbf16> +} + +func.func @matmul_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs: tensor<1x10xbf16>, %rhs: tensor<10x10xbf16>) -> tensor<1x10xbf16> { + %init_acc = tensor.empty() : tensor<1x10xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<1x10xbf16>) -> tensor<1x10xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xbf16>, tensor<10x10xbf16>) outs(%acc: tensor<1x10xbf16>) -> tensor<1x10xbf16> + return %result: tensor<1x10xbf16> +} + +func.func @matmul_accumulate_10x1xbf16_times_10x1xbf16_into_10x10xbf16(%lhs: tensor<10x1xbf16>, %rhs: tensor<10x1xbf16>, %acc: tensor<10x10xbf16>) -> tensor<10x10xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x1xbf16>, tensor<10x1xbf16>) outs(%acc: tensor<10x10xbf16>) -> tensor<10x10xbf16> + return %result: tensor<10x10xbf16> +} + +func.func @matmul_accumulate_10x10xbf16_times_1x10xbf16_into_10x1xbf16(%lhs: tensor<10x10xbf16>, %rhs: tensor<1x10xbf16>, %acc: tensor<10x1xbf16>) -> tensor<10x1xbf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xbf16>, tensor<1x10xbf16>) outs(%acc: tensor<10x1xbf16>) -> tensor<10x1xbf16> + return %result: tensor<10x1xbf16> +} + +func.func @matmul_10x10xbf16_times_1x10xbf16_into_10x1xbf16(%lhs: tensor<10x10xbf16>, %rhs: tensor<1x10xbf16>) -> tensor<10x1xbf16> { + %init_acc = tensor.empty() : tensor<10x1xbf16> + %c0_acc_type = arith.constant 0.0: bf16 + %acc = linalg.fill ins(%c0_acc_type : bf16) outs(%init_acc : tensor<10x1xbf16>) -> tensor<10x1xbf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xbf16>, tensor<1x10xbf16>) outs(%acc: tensor<10x1xbf16>) -> tensor<10x1xbf16> + return %result: tensor<10x1xbf16> +} + diff --git a/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_small_calls.mlir b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_small_calls.mlir new file mode 100644 index 0000000..b06ab1f --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_bf16/matmul_transpose_b_bf16_into_bf16_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xbf16_times_3x13xbf16_into_6x3xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xbf16_times_7x37xbf16_into_15x7xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xbf16_times_41x19xbf16_into_81x41xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xbf16_times_10x1xbf16_into_10x10xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xbf16_times_1x10xbf16_into_10x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xbf16_times_1x10xbf16_into_10x1xbf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xbf16_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xbf16_times_1x1xbf16_into_1x1xbf16_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xbf16_times_1x1xbf16_into_1x1xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xbf16_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xbf16_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xbf16_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xbf16_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xbf16_times_3x13xbf16_into_6x3xbf16_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xbf16_times_3x13xbf16_into_6x3xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xbf16_times_7x37xbf16_into_15x7xbf16_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xbf16_times_7x37xbf16_into_15x7xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xbf16_times_41x19xbf16_into_81x41xbf16_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xbf16_times_41x19xbf16_into_81x41xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xbf16_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xbf16_times_10x10xbf16_into_1x10xbf16_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xbf16_times_10x10xbf16_into_1x10xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xbf16_times_10x1xbf16_into_10x10xbf16_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xbf16_times_10x1xbf16_into_10x10xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xbf16_times_1x10xbf16_into_10x1xbf16_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xbf16_times_1x10xbf16_into_10x1xbf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xbf16_times_1x10xbf16_into_10x1xbf16_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xbf16_times_1x10xbf16_into_10x1xbf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_large.mlir b/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_large.mlir new file mode 100644 index 0000000..4753c6a --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xbf16_times_128x512xbf16_into_512x512xf32(%lhs: tensor<512x128xbf16>, %rhs: tensor<128x512xbf16>, %acc: tensor<512x512xf32>) -> tensor<512x512xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<128x512xbf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xbf16_times_128x512xbf16_into_512x512xf32(%lhs: tensor<512x128xbf16>, %rhs: tensor<128x512xbf16>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<128x512xbf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_1000x4xbf16_times_4x512xbf16_into_1000x512xf32(%lhs: tensor<1000x4xbf16>, %rhs: tensor<4x512xbf16>) -> tensor<1000x512xf32> { + %init_acc = tensor.empty() : tensor<1000x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x512xf32>) -> tensor<1000x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x4xbf16>, tensor<4x512xbf16>) outs(%acc: tensor<1000x512xf32>) -> tensor<1000x512xf32> + return %result: tensor<1000x512xf32> +} + +func.func @matmul_4x1000xbf16_times_1000x512xbf16_into_4x512xf32(%lhs: tensor<4x1000xbf16>, %rhs: tensor<1000x512xbf16>) -> tensor<4x512xf32> { + %init_acc = tensor.empty() : tensor<4x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<4x512xf32>) -> tensor<4x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x1000xbf16>, tensor<1000x512xbf16>) outs(%acc: tensor<4x512xf32>) -> tensor<4x512xf32> + return %result: tensor<4x512xf32> +} + +func.func @matmul_512x1000xbf16_times_1000x4xbf16_into_512x4xf32(%lhs: tensor<512x1000xbf16>, %rhs: tensor<1000x4xbf16>) -> tensor<512x4xf32> { + %init_acc = tensor.empty() : tensor<512x4xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x4xf32>) -> tensor<512x4xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x1000xbf16>, tensor<1000x4xbf16>) outs(%acc: tensor<512x4xf32>) -> tensor<512x4xf32> + return %result: tensor<512x4xf32> +} + +func.func @matmul_512x128xbf16_times_128x500xbf16_into_512x500xf32(%lhs: tensor<512x128xbf16>, %rhs: tensor<128x500xbf16>) -> tensor<512x500xf32> { + %init_acc = tensor.empty() : tensor<512x500xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x500xf32>) -> tensor<512x500xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<128x500xbf16>) outs(%acc: tensor<512x500xf32>) -> tensor<512x500xf32> + return %result: tensor<512x500xf32> +} + +func.func @matmul_457x330xbf16_times_330x512xbf16_into_457x512xf32(%lhs: tensor<457x330xbf16>, %rhs: tensor<330x512xbf16>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xbf16>, tensor<330x512xbf16>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_457x330xbf16_times_330x514xbf16_into_457x514xf32(%lhs: tensor<457x330xbf16>, %rhs: tensor<330x514xbf16>) -> tensor<457x514xf32> { + %init_acc = tensor.empty() : tensor<457x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x514xf32>) -> tensor<457x514xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xbf16>, tensor<330x514xbf16>) outs(%acc: tensor<457x514xf32>) -> tensor<457x514xf32> + return %result: tensor<457x514xf32> +} + +func.func @matmul_438x330xbf16_times_330x514xbf16_into_438x514xf32(%lhs: tensor<438x330xbf16>, %rhs: tensor<330x514xbf16>) -> tensor<438x514xf32> { + %init_acc = tensor.empty() : tensor<438x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<438x514xf32>) -> tensor<438x514xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<438x330xbf16>, tensor<330x514xbf16>) outs(%acc: tensor<438x514xf32>) -> tensor<438x514xf32> + return %result: tensor<438x514xf32> +} + +func.func @matmul_540x332xbf16_times_332x516xbf16_into_540x516xf32(%lhs: tensor<540x332xbf16>, %rhs: tensor<332x516xbf16>) -> tensor<540x516xf32> { + %init_acc = tensor.empty() : tensor<540x516xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<540x516xf32>) -> tensor<540x516xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<540x332xbf16>, tensor<332x516xbf16>) outs(%acc: tensor<540x516xf32>) -> tensor<540x516xf32> + return %result: tensor<540x516xf32> +} + +func.func @matmul_654x321xbf16_times_321x234xbf16_into_654x234xf32(%lhs: tensor<654x321xbf16>, %rhs: tensor<321x234xbf16>) -> tensor<654x234xf32> { + %init_acc = tensor.empty() : tensor<654x234xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<654x234xf32>) -> tensor<654x234xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<654x321xbf16>, tensor<321x234xbf16>) outs(%acc: tensor<654x234xf32>) -> tensor<654x234xf32> + return %result: tensor<654x234xf32> +} + +func.func @matmul_457x160xbf16_times_160x512xbf16_into_457x512xf32(%lhs: tensor<457x160xbf16>, %rhs: tensor<160x512xbf16>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x160xbf16>, tensor<160x512xbf16>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_512x330xbf16_times_330x512xbf16_into_512x512xf32(%lhs: tensor<512x330xbf16>, %rhs: tensor<330x512xbf16>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x330xbf16>, tensor<330x512xbf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xf32(%lhs: tensor<1x1000xbf16>, %rhs: tensor<1000x1000xbf16>, %acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1000xbf16>, tensor<1000x1000xbf16>) outs(%acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> + return %result: tensor<1x1000xf32> +} + +func.func @matmul_accumulate_1000x1000xbf16_times_1000x1xbf16_into_1000x1xf32(%lhs: tensor<1000x1000xbf16>, %rhs: tensor<1000x1xbf16>, %acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xbf16>, tensor<1000x1xbf16>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + +func.func @matmul_1000x1000xbf16_times_1000x1xbf16_into_1000x1xf32(%lhs: tensor<1000x1000xbf16>, %rhs: tensor<1000x1xbf16>) -> tensor<1000x1xf32> { + %init_acc = tensor.empty() : tensor<1000x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x1xf32>) -> tensor<1000x1xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xbf16>, tensor<1000x1xbf16>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + diff --git a/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_large_calls.mlir b/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_large_calls.mlir new file mode 100644 index 0000000..e21103c --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xbf16_times_128x512xbf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xbf16_times_128x512xbf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xbf16_times_4x512xbf16_into_1000x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xbf16_times_1000x512xbf16_into_4x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xbf16_times_1000x4xbf16_into_512x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xbf16_times_128x500xbf16_into_512x500xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xbf16_times_330x512xbf16_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xbf16_times_330x514xbf16_into_457x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xbf16_times_330x514xbf16_into_438x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xbf16_times_332x516xbf16_into_540x516xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xbf16_times_321x234xbf16_into_654x234xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xbf16_times_160x512xbf16_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xbf16_times_330x512xbf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xbf16_times_1000x1xbf16_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xbf16_times_1000x1xbf16_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xbf16_times_128x512xbf16_into_512x512xf32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xbf16_times_128x512xbf16_into_512x512xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xbf16_times_128x512xbf16_into_512x512xf32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xbf16_times_128x512xbf16_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xbf16_times_4x512xbf16_into_1000x512xf32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xbf16_times_4x512xbf16_into_1000x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xbf16_times_1000x512xbf16_into_4x512xf32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xbf16_times_1000x512xbf16_into_4x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xbf16_times_1000x4xbf16_into_512x4xf32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xbf16_times_1000x4xbf16_into_512x4xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xbf16_times_128x500xbf16_into_512x500xf32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xbf16_times_128x500xbf16_into_512x500xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xbf16_times_330x512xbf16_into_457x512xf32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xbf16_times_330x512xbf16_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xbf16_times_330x514xbf16_into_457x514xf32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xbf16_times_330x514xbf16_into_457x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xbf16_times_330x514xbf16_into_438x514xf32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xbf16_times_330x514xbf16_into_438x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xbf16_times_332x516xbf16_into_540x516xf32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xbf16_times_332x516xbf16_into_540x516xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xbf16_times_321x234xbf16_into_654x234xf32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xbf16_times_321x234xbf16_into_654x234xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xbf16_times_160x512xbf16_into_457x512xf32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xbf16_times_160x512xbf16_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xbf16_times_330x512xbf16_into_512x512xf32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xbf16_times_330x512xbf16_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xf32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xbf16_times_1000x1xbf16_into_1000x1xf32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xbf16_times_1000x1xbf16_into_1000x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xbf16_times_1000x1xbf16_into_1000x1xf32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xbf16_times_1000x1xbf16_into_1000x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_small.mlir b/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_small.mlir new file mode 100644 index 0000000..44378a6 --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs: tensor<1x1xbf16>, %rhs: tensor<1x1xbf16>, %acc: tensor<1x1xf32>) -> tensor<1x1xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xbf16>, tensor<1x1xbf16>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs: tensor<1x1xbf16>, %rhs: tensor<1x1xbf16>) -> tensor<1x1xf32> { + %init_acc = tensor.empty() : tensor<1x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x1xf32>) -> tensor<1x1xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xbf16>, tensor<1x1xbf16>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xf32(%lhs: tensor<2x2xbf16>, %rhs: tensor<2x2xbf16>, %acc: tensor<2x2xf32>) -> tensor<2x2xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<2x2xbf16>, tensor<2x2xbf16>) outs(%acc: tensor<2x2xf32>) -> tensor<2x2xf32> + return %result: tensor<2x2xf32> +} + +func.func @matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xf32(%lhs: tensor<4x4xbf16>, %rhs: tensor<4x4xbf16>, %acc: tensor<4x4xf32>) -> tensor<4x4xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x4xbf16>, tensor<4x4xbf16>) outs(%acc: tensor<4x4xf32>) -> tensor<4x4xf32> + return %result: tensor<4x4xf32> +} + +func.func @matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xf32(%lhs: tensor<8x8xbf16>, %rhs: tensor<8x8xbf16>, %acc: tensor<8x8xf32>) -> tensor<8x8xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<8x8xbf16>, tensor<8x8xbf16>) outs(%acc: tensor<8x8xf32>) -> tensor<8x8xf32> + return %result: tensor<8x8xf32> +} + +func.func @matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xf32(%lhs: tensor<9x9xbf16>, %rhs: tensor<9x9xbf16>, %acc: tensor<9x9xf32>) -> tensor<9x9xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<9x9xbf16>, tensor<9x9xbf16>) outs(%acc: tensor<9x9xf32>) -> tensor<9x9xf32> + return %result: tensor<9x9xf32> +} + +func.func @matmul_accumulate_6x13xbf16_times_13x3xbf16_into_6x3xf32(%lhs: tensor<6x13xbf16>, %rhs: tensor<13x3xbf16>, %acc: tensor<6x3xf32>) -> tensor<6x3xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<6x13xbf16>, tensor<13x3xbf16>) outs(%acc: tensor<6x3xf32>) -> tensor<6x3xf32> + return %result: tensor<6x3xf32> +} + +func.func @matmul_15x37xbf16_times_37x7xbf16_into_15x7xf32(%lhs: tensor<15x37xbf16>, %rhs: tensor<37x7xbf16>) -> tensor<15x7xf32> { + %init_acc = tensor.empty() : tensor<15x7xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<15x7xf32>) -> tensor<15x7xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<15x37xbf16>, tensor<37x7xbf16>) outs(%acc: tensor<15x7xf32>) -> tensor<15x7xf32> + return %result: tensor<15x7xf32> +} + +func.func @matmul_accumulate_81x19xbf16_times_19x41xbf16_into_81x41xf32(%lhs: tensor<81x19xbf16>, %rhs: tensor<19x41xbf16>, %acc: tensor<81x41xf32>) -> tensor<81x41xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<81x19xbf16>, tensor<19x41xbf16>) outs(%acc: tensor<81x41xf32>) -> tensor<81x41xf32> + return %result: tensor<81x41xf32> +} + +func.func @matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs: tensor<1x10xbf16>, %rhs: tensor<10x10xbf16>, %acc: tensor<1x10xf32>) -> tensor<1x10xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xbf16>, tensor<10x10xbf16>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs: tensor<1x10xbf16>, %rhs: tensor<10x10xbf16>) -> tensor<1x10xf32> { + %init_acc = tensor.empty() : tensor<1x10xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x10xf32>) -> tensor<1x10xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xbf16>, tensor<10x10xbf16>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_accumulate_10x1xbf16_times_1x10xbf16_into_10x10xf32(%lhs: tensor<10x1xbf16>, %rhs: tensor<1x10xbf16>, %acc: tensor<10x10xf32>) -> tensor<10x10xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x1xbf16>, tensor<1x10xbf16>) outs(%acc: tensor<10x10xf32>) -> tensor<10x10xf32> + return %result: tensor<10x10xf32> +} + +func.func @matmul_accumulate_10x10xbf16_times_10x1xbf16_into_10x1xf32(%lhs: tensor<10x10xbf16>, %rhs: tensor<10x1xbf16>, %acc: tensor<10x1xf32>) -> tensor<10x1xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xbf16>, tensor<10x1xbf16>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + +func.func @matmul_10x10xbf16_times_10x1xbf16_into_10x1xf32(%lhs: tensor<10x10xbf16>, %rhs: tensor<10x1xbf16>) -> tensor<10x1xf32> { + %init_acc = tensor.empty() : tensor<10x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<10x1xf32>) -> tensor<10x1xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xbf16>, tensor<10x1xbf16>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + diff --git a/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_small_calls.mlir b/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_small_calls.mlir new file mode 100644 index 0000000..2e6303c --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_f32/matmul_bf16_into_f32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xbf16_times_13x3xbf16_into_6x3xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xbf16_times_37x7xbf16_into_15x7xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xbf16_times_19x41xbf16_into_81x41xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xbf16_times_1x10xbf16_into_10x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xbf16_times_10x1xbf16_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xbf16_times_10x1xbf16_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xf32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xbf16_times_1x1xbf16_into_1x1xf32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xf32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xf32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xf32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xf32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xbf16_times_13x3xbf16_into_6x3xf32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xbf16_times_13x3xbf16_into_6x3xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xbf16_times_37x7xbf16_into_15x7xf32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xbf16_times_37x7xbf16_into_15x7xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xbf16_times_19x41xbf16_into_81x41xf32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xbf16_times_19x41xbf16_into_81x41xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xf32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xbf16_times_10x10xbf16_into_1x10xf32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xbf16_times_1x10xbf16_into_10x10xf32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xbf16_times_1x10xbf16_into_10x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xbf16_times_10x1xbf16_into_10x1xf32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xbf16_times_10x1xbf16_into_10x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xbf16_times_10x1xbf16_into_10x1xf32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xbf16_times_10x1xbf16_into_10x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_large.mlir b/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_large.mlir new file mode 100644 index 0000000..980ae69 --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xbf16_times_512x128xbf16_into_512x512xf32(%lhs: tensor<512x128xbf16>, %rhs: tensor<512x128xbf16>, %acc: tensor<512x512xf32>) -> tensor<512x512xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<512x128xbf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xbf16_times_512x128xbf16_into_512x512xf32(%lhs: tensor<512x128xbf16>, %rhs: tensor<512x128xbf16>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<512x128xbf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_1000x4xbf16_times_512x4xbf16_into_1000x512xf32(%lhs: tensor<1000x4xbf16>, %rhs: tensor<512x4xbf16>) -> tensor<1000x512xf32> { + %init_acc = tensor.empty() : tensor<1000x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x512xf32>) -> tensor<1000x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x4xbf16>, tensor<512x4xbf16>) outs(%acc: tensor<1000x512xf32>) -> tensor<1000x512xf32> + return %result: tensor<1000x512xf32> +} + +func.func @matmul_4x1000xbf16_times_512x1000xbf16_into_4x512xf32(%lhs: tensor<4x1000xbf16>, %rhs: tensor<512x1000xbf16>) -> tensor<4x512xf32> { + %init_acc = tensor.empty() : tensor<4x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<4x512xf32>) -> tensor<4x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x1000xbf16>, tensor<512x1000xbf16>) outs(%acc: tensor<4x512xf32>) -> tensor<4x512xf32> + return %result: tensor<4x512xf32> +} + +func.func @matmul_512x1000xbf16_times_4x1000xbf16_into_512x4xf32(%lhs: tensor<512x1000xbf16>, %rhs: tensor<4x1000xbf16>) -> tensor<512x4xf32> { + %init_acc = tensor.empty() : tensor<512x4xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x4xf32>) -> tensor<512x4xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x1000xbf16>, tensor<4x1000xbf16>) outs(%acc: tensor<512x4xf32>) -> tensor<512x4xf32> + return %result: tensor<512x4xf32> +} + +func.func @matmul_512x128xbf16_times_500x128xbf16_into_512x500xf32(%lhs: tensor<512x128xbf16>, %rhs: tensor<500x128xbf16>) -> tensor<512x500xf32> { + %init_acc = tensor.empty() : tensor<512x500xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x500xf32>) -> tensor<512x500xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xbf16>, tensor<500x128xbf16>) outs(%acc: tensor<512x500xf32>) -> tensor<512x500xf32> + return %result: tensor<512x500xf32> +} + +func.func @matmul_457x330xbf16_times_512x330xbf16_into_457x512xf32(%lhs: tensor<457x330xbf16>, %rhs: tensor<512x330xbf16>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xbf16>, tensor<512x330xbf16>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_457x330xbf16_times_514x330xbf16_into_457x514xf32(%lhs: tensor<457x330xbf16>, %rhs: tensor<514x330xbf16>) -> tensor<457x514xf32> { + %init_acc = tensor.empty() : tensor<457x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x514xf32>) -> tensor<457x514xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xbf16>, tensor<514x330xbf16>) outs(%acc: tensor<457x514xf32>) -> tensor<457x514xf32> + return %result: tensor<457x514xf32> +} + +func.func @matmul_438x330xbf16_times_514x330xbf16_into_438x514xf32(%lhs: tensor<438x330xbf16>, %rhs: tensor<514x330xbf16>) -> tensor<438x514xf32> { + %init_acc = tensor.empty() : tensor<438x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<438x514xf32>) -> tensor<438x514xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<438x330xbf16>, tensor<514x330xbf16>) outs(%acc: tensor<438x514xf32>) -> tensor<438x514xf32> + return %result: tensor<438x514xf32> +} + +func.func @matmul_540x332xbf16_times_516x332xbf16_into_540x516xf32(%lhs: tensor<540x332xbf16>, %rhs: tensor<516x332xbf16>) -> tensor<540x516xf32> { + %init_acc = tensor.empty() : tensor<540x516xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<540x516xf32>) -> tensor<540x516xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<540x332xbf16>, tensor<516x332xbf16>) outs(%acc: tensor<540x516xf32>) -> tensor<540x516xf32> + return %result: tensor<540x516xf32> +} + +func.func @matmul_654x321xbf16_times_234x321xbf16_into_654x234xf32(%lhs: tensor<654x321xbf16>, %rhs: tensor<234x321xbf16>) -> tensor<654x234xf32> { + %init_acc = tensor.empty() : tensor<654x234xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<654x234xf32>) -> tensor<654x234xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<654x321xbf16>, tensor<234x321xbf16>) outs(%acc: tensor<654x234xf32>) -> tensor<654x234xf32> + return %result: tensor<654x234xf32> +} + +func.func @matmul_457x160xbf16_times_512x160xbf16_into_457x512xf32(%lhs: tensor<457x160xbf16>, %rhs: tensor<512x160xbf16>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x160xbf16>, tensor<512x160xbf16>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_512x330xbf16_times_512x330xbf16_into_512x512xf32(%lhs: tensor<512x330xbf16>, %rhs: tensor<512x330xbf16>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x330xbf16>, tensor<512x330xbf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xf32(%lhs: tensor<1x1000xbf16>, %rhs: tensor<1000x1000xbf16>, %acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1000xbf16>, tensor<1000x1000xbf16>) outs(%acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> + return %result: tensor<1x1000xf32> +} + +func.func @matmul_accumulate_1000x1000xbf16_times_1x1000xbf16_into_1000x1xf32(%lhs: tensor<1000x1000xbf16>, %rhs: tensor<1x1000xbf16>, %acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xbf16>, tensor<1x1000xbf16>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + +func.func @matmul_1000x1000xbf16_times_1x1000xbf16_into_1000x1xf32(%lhs: tensor<1000x1000xbf16>, %rhs: tensor<1x1000xbf16>) -> tensor<1000x1xf32> { + %init_acc = tensor.empty() : tensor<1000x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x1xf32>) -> tensor<1000x1xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xbf16>, tensor<1x1000xbf16>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + diff --git a/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_large_calls.mlir b/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_large_calls.mlir new file mode 100644 index 0000000..0aed8ea --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xbf16_times_512x128xbf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xbf16_times_512x128xbf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xbf16_times_512x4xbf16_into_1000x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xbf16_times_512x1000xbf16_into_4x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xbf16_times_4x1000xbf16_into_512x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xbf16_times_500x128xbf16_into_512x500xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xbf16_times_512x330xbf16_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xbf16_times_514x330xbf16_into_457x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xbf16_times_514x330xbf16_into_438x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xbf16_times_516x332xbf16_into_540x516xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xbf16_times_234x321xbf16_into_654x234xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xbf16_times_512x160xbf16_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xbf16_times_512x330xbf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xbf16_times_1x1000xbf16_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xbf16_times_1x1000xbf16_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xbf16_times_512x128xbf16_into_512x512xf32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xbf16_times_512x128xbf16_into_512x512xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xbf16_times_512x128xbf16_into_512x512xf32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xbf16_times_512x128xbf16_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xbf16_times_512x4xbf16_into_1000x512xf32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xbf16_times_512x4xbf16_into_1000x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xbf16_times_512x1000xbf16_into_4x512xf32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xbf16_times_512x1000xbf16_into_4x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xbf16_times_4x1000xbf16_into_512x4xf32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xbf16_times_4x1000xbf16_into_512x4xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xbf16_times_500x128xbf16_into_512x500xf32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xbf16_times_500x128xbf16_into_512x500xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xbf16_times_512x330xbf16_into_457x512xf32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xbf16_times_512x330xbf16_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xbf16_times_514x330xbf16_into_457x514xf32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xbf16_times_514x330xbf16_into_457x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xbf16_times_514x330xbf16_into_438x514xf32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xbf16_times_514x330xbf16_into_438x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xbf16_times_516x332xbf16_into_540x516xf32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xbf16_times_516x332xbf16_into_540x516xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xbf16_times_234x321xbf16_into_654x234xf32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xbf16_times_234x321xbf16_into_654x234xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xbf16_times_512x160xbf16_into_457x512xf32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xbf16_times_512x160xbf16_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xbf16_times_512x330xbf16_into_512x512xf32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xbf16_times_512x330xbf16_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xf32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xbf16_times_1000x1000xbf16_into_1x1000xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xbf16_times_1x1000xbf16_into_1000x1xf32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xbf16_times_1x1000xbf16_into_1000x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xbf16_times_1x1000xbf16_into_1000x1xf32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xbf16_times_1x1000xbf16_into_1000x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_small.mlir b/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_small.mlir new file mode 100644 index 0000000..f867453 --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs: tensor<1x1xbf16>, %rhs: tensor<1x1xbf16>, %acc: tensor<1x1xf32>) -> tensor<1x1xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xbf16>, tensor<1x1xbf16>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs: tensor<1x1xbf16>, %rhs: tensor<1x1xbf16>) -> tensor<1x1xf32> { + %init_acc = tensor.empty() : tensor<1x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x1xf32>) -> tensor<1x1xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xbf16>, tensor<1x1xbf16>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xf32(%lhs: tensor<2x2xbf16>, %rhs: tensor<2x2xbf16>, %acc: tensor<2x2xf32>) -> tensor<2x2xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<2x2xbf16>, tensor<2x2xbf16>) outs(%acc: tensor<2x2xf32>) -> tensor<2x2xf32> + return %result: tensor<2x2xf32> +} + +func.func @matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xf32(%lhs: tensor<4x4xbf16>, %rhs: tensor<4x4xbf16>, %acc: tensor<4x4xf32>) -> tensor<4x4xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x4xbf16>, tensor<4x4xbf16>) outs(%acc: tensor<4x4xf32>) -> tensor<4x4xf32> + return %result: tensor<4x4xf32> +} + +func.func @matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xf32(%lhs: tensor<8x8xbf16>, %rhs: tensor<8x8xbf16>, %acc: tensor<8x8xf32>) -> tensor<8x8xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<8x8xbf16>, tensor<8x8xbf16>) outs(%acc: tensor<8x8xf32>) -> tensor<8x8xf32> + return %result: tensor<8x8xf32> +} + +func.func @matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xf32(%lhs: tensor<9x9xbf16>, %rhs: tensor<9x9xbf16>, %acc: tensor<9x9xf32>) -> tensor<9x9xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<9x9xbf16>, tensor<9x9xbf16>) outs(%acc: tensor<9x9xf32>) -> tensor<9x9xf32> + return %result: tensor<9x9xf32> +} + +func.func @matmul_accumulate_6x13xbf16_times_3x13xbf16_into_6x3xf32(%lhs: tensor<6x13xbf16>, %rhs: tensor<3x13xbf16>, %acc: tensor<6x3xf32>) -> tensor<6x3xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<6x13xbf16>, tensor<3x13xbf16>) outs(%acc: tensor<6x3xf32>) -> tensor<6x3xf32> + return %result: tensor<6x3xf32> +} + +func.func @matmul_15x37xbf16_times_7x37xbf16_into_15x7xf32(%lhs: tensor<15x37xbf16>, %rhs: tensor<7x37xbf16>) -> tensor<15x7xf32> { + %init_acc = tensor.empty() : tensor<15x7xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<15x7xf32>) -> tensor<15x7xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<15x37xbf16>, tensor<7x37xbf16>) outs(%acc: tensor<15x7xf32>) -> tensor<15x7xf32> + return %result: tensor<15x7xf32> +} + +func.func @matmul_accumulate_81x19xbf16_times_41x19xbf16_into_81x41xf32(%lhs: tensor<81x19xbf16>, %rhs: tensor<41x19xbf16>, %acc: tensor<81x41xf32>) -> tensor<81x41xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<81x19xbf16>, tensor<41x19xbf16>) outs(%acc: tensor<81x41xf32>) -> tensor<81x41xf32> + return %result: tensor<81x41xf32> +} + +func.func @matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs: tensor<1x10xbf16>, %rhs: tensor<10x10xbf16>, %acc: tensor<1x10xf32>) -> tensor<1x10xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xbf16>, tensor<10x10xbf16>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs: tensor<1x10xbf16>, %rhs: tensor<10x10xbf16>) -> tensor<1x10xf32> { + %init_acc = tensor.empty() : tensor<1x10xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x10xf32>) -> tensor<1x10xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xbf16>, tensor<10x10xbf16>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_accumulate_10x1xbf16_times_10x1xbf16_into_10x10xf32(%lhs: tensor<10x1xbf16>, %rhs: tensor<10x1xbf16>, %acc: tensor<10x10xf32>) -> tensor<10x10xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x1xbf16>, tensor<10x1xbf16>) outs(%acc: tensor<10x10xf32>) -> tensor<10x10xf32> + return %result: tensor<10x10xf32> +} + +func.func @matmul_accumulate_10x10xbf16_times_1x10xbf16_into_10x1xf32(%lhs: tensor<10x10xbf16>, %rhs: tensor<1x10xbf16>, %acc: tensor<10x1xf32>) -> tensor<10x1xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xbf16>, tensor<1x10xbf16>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + +func.func @matmul_10x10xbf16_times_1x10xbf16_into_10x1xf32(%lhs: tensor<10x10xbf16>, %rhs: tensor<1x10xbf16>) -> tensor<10x1xf32> { + %init_acc = tensor.empty() : tensor<10x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<10x1xf32>) -> tensor<10x1xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xbf16>, tensor<1x10xbf16>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + diff --git a/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_small_calls.mlir b/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_small_calls.mlir new file mode 100644 index 0000000..24b3cae --- /dev/null +++ b/linalg_ops/matmul/generated/bf16_into_f32/matmul_transpose_b_bf16_into_f32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xbf16_times_3x13xbf16_into_6x3xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xbf16_times_7x37xbf16_into_15x7xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xbf16_times_41x19xbf16_into_81x41xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xbf16_times_10x1xbf16_into_10x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xbf16_times_1x10xbf16_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xbf16_times_1x10xbf16_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xf32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xbf16_times_1x1xbf16_into_1x1xf32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xbf16_times_1x1xbf16_into_1x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xf32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xbf16_times_2x2xbf16_into_2x2xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xf32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xbf16_times_4x4xbf16_into_4x4xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xf32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xbf16_times_8x8xbf16_into_8x8xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xf32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xbf16_times_9x9xbf16_into_9x9xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xbf16_times_3x13xbf16_into_6x3xf32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xbf16_times_3x13xbf16_into_6x3xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xbf16_times_7x37xbf16_into_15x7xf32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xbf16_times_7x37xbf16_into_15x7xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xbf16_times_41x19xbf16_into_81x41xf32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xbf16_times_41x19xbf16_into_81x41xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xf32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xbf16_times_10x10xbf16_into_1x10xf32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xbf16_times_10x10xbf16_into_1x10xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xbf16_times_10x1xbf16_into_10x10xf32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xbf16_times_10x1xbf16_into_10x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xbf16_times_1x10xbf16_into_10x1xf32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xbf16_times_1x10xbf16_into_10x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxbf16_times_DYNxDYNxbf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xbf16_times_1x10xbf16_into_10x1xf32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xbf16_times_1x10xbf16_into_10x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_large.mlir b/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_large.mlir new file mode 100644 index 0000000..649e4fb --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xf16_times_128x512xf16_into_512x512xf16(%lhs: tensor<512x128xf16>, %rhs: tensor<128x512xf16>, %acc: tensor<512x512xf16>) -> tensor<512x512xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xf16>, tensor<128x512xf16>) outs(%acc: tensor<512x512xf16>) -> tensor<512x512xf16> + return %result: tensor<512x512xf16> +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xf16_times_128x512xf16_into_512x512xf16(%lhs: tensor<512x128xf16>, %rhs: tensor<128x512xf16>) -> tensor<512x512xf16> { + %init_acc = tensor.empty() : tensor<512x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<512x512xf16>) -> tensor<512x512xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xf16>, tensor<128x512xf16>) outs(%acc: tensor<512x512xf16>) -> tensor<512x512xf16> + return %result: tensor<512x512xf16> +} + +func.func @matmul_1000x4xf16_times_4x512xf16_into_1000x512xf16(%lhs: tensor<1000x4xf16>, %rhs: tensor<4x512xf16>) -> tensor<1000x512xf16> { + %init_acc = tensor.empty() : tensor<1000x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<1000x512xf16>) -> tensor<1000x512xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x4xf16>, tensor<4x512xf16>) outs(%acc: tensor<1000x512xf16>) -> tensor<1000x512xf16> + return %result: tensor<1000x512xf16> +} + +func.func @matmul_4x1000xf16_times_1000x512xf16_into_4x512xf16(%lhs: tensor<4x1000xf16>, %rhs: tensor<1000x512xf16>) -> tensor<4x512xf16> { + %init_acc = tensor.empty() : tensor<4x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<4x512xf16>) -> tensor<4x512xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x1000xf16>, tensor<1000x512xf16>) outs(%acc: tensor<4x512xf16>) -> tensor<4x512xf16> + return %result: tensor<4x512xf16> +} + +func.func @matmul_512x1000xf16_times_1000x4xf16_into_512x4xf16(%lhs: tensor<512x1000xf16>, %rhs: tensor<1000x4xf16>) -> tensor<512x4xf16> { + %init_acc = tensor.empty() : tensor<512x4xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<512x4xf16>) -> tensor<512x4xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x1000xf16>, tensor<1000x4xf16>) outs(%acc: tensor<512x4xf16>) -> tensor<512x4xf16> + return %result: tensor<512x4xf16> +} + +func.func @matmul_512x128xf16_times_128x500xf16_into_512x500xf16(%lhs: tensor<512x128xf16>, %rhs: tensor<128x500xf16>) -> tensor<512x500xf16> { + %init_acc = tensor.empty() : tensor<512x500xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<512x500xf16>) -> tensor<512x500xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xf16>, tensor<128x500xf16>) outs(%acc: tensor<512x500xf16>) -> tensor<512x500xf16> + return %result: tensor<512x500xf16> +} + +func.func @matmul_457x330xf16_times_330x512xf16_into_457x512xf16(%lhs: tensor<457x330xf16>, %rhs: tensor<330x512xf16>) -> tensor<457x512xf16> { + %init_acc = tensor.empty() : tensor<457x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<457x512xf16>) -> tensor<457x512xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xf16>, tensor<330x512xf16>) outs(%acc: tensor<457x512xf16>) -> tensor<457x512xf16> + return %result: tensor<457x512xf16> +} + +func.func @matmul_457x330xf16_times_330x514xf16_into_457x514xf16(%lhs: tensor<457x330xf16>, %rhs: tensor<330x514xf16>) -> tensor<457x514xf16> { + %init_acc = tensor.empty() : tensor<457x514xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<457x514xf16>) -> tensor<457x514xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xf16>, tensor<330x514xf16>) outs(%acc: tensor<457x514xf16>) -> tensor<457x514xf16> + return %result: tensor<457x514xf16> +} + +func.func @matmul_438x330xf16_times_330x514xf16_into_438x514xf16(%lhs: tensor<438x330xf16>, %rhs: tensor<330x514xf16>) -> tensor<438x514xf16> { + %init_acc = tensor.empty() : tensor<438x514xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<438x514xf16>) -> tensor<438x514xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<438x330xf16>, tensor<330x514xf16>) outs(%acc: tensor<438x514xf16>) -> tensor<438x514xf16> + return %result: tensor<438x514xf16> +} + +func.func @matmul_540x332xf16_times_332x516xf16_into_540x516xf16(%lhs: tensor<540x332xf16>, %rhs: tensor<332x516xf16>) -> tensor<540x516xf16> { + %init_acc = tensor.empty() : tensor<540x516xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<540x516xf16>) -> tensor<540x516xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<540x332xf16>, tensor<332x516xf16>) outs(%acc: tensor<540x516xf16>) -> tensor<540x516xf16> + return %result: tensor<540x516xf16> +} + +func.func @matmul_654x321xf16_times_321x234xf16_into_654x234xf16(%lhs: tensor<654x321xf16>, %rhs: tensor<321x234xf16>) -> tensor<654x234xf16> { + %init_acc = tensor.empty() : tensor<654x234xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<654x234xf16>) -> tensor<654x234xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<654x321xf16>, tensor<321x234xf16>) outs(%acc: tensor<654x234xf16>) -> tensor<654x234xf16> + return %result: tensor<654x234xf16> +} + +func.func @matmul_457x160xf16_times_160x512xf16_into_457x512xf16(%lhs: tensor<457x160xf16>, %rhs: tensor<160x512xf16>) -> tensor<457x512xf16> { + %init_acc = tensor.empty() : tensor<457x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<457x512xf16>) -> tensor<457x512xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x160xf16>, tensor<160x512xf16>) outs(%acc: tensor<457x512xf16>) -> tensor<457x512xf16> + return %result: tensor<457x512xf16> +} + +func.func @matmul_512x330xf16_times_330x512xf16_into_512x512xf16(%lhs: tensor<512x330xf16>, %rhs: tensor<330x512xf16>) -> tensor<512x512xf16> { + %init_acc = tensor.empty() : tensor<512x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<512x512xf16>) -> tensor<512x512xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x330xf16>, tensor<330x512xf16>) outs(%acc: tensor<512x512xf16>) -> tensor<512x512xf16> + return %result: tensor<512x512xf16> +} + +func.func @matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf16(%lhs: tensor<1x1000xf16>, %rhs: tensor<1000x1000xf16>, %acc: tensor<1x1000xf16>) -> tensor<1x1000xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1000xf16>, tensor<1000x1000xf16>) outs(%acc: tensor<1x1000xf16>) -> tensor<1x1000xf16> + return %result: tensor<1x1000xf16> +} + +func.func @matmul_accumulate_1000x1000xf16_times_1000x1xf16_into_1000x1xf16(%lhs: tensor<1000x1000xf16>, %rhs: tensor<1000x1xf16>, %acc: tensor<1000x1xf16>) -> tensor<1000x1xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xf16>, tensor<1000x1xf16>) outs(%acc: tensor<1000x1xf16>) -> tensor<1000x1xf16> + return %result: tensor<1000x1xf16> +} + +func.func @matmul_1000x1000xf16_times_1000x1xf16_into_1000x1xf16(%lhs: tensor<1000x1000xf16>, %rhs: tensor<1000x1xf16>) -> tensor<1000x1xf16> { + %init_acc = tensor.empty() : tensor<1000x1xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<1000x1xf16>) -> tensor<1000x1xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xf16>, tensor<1000x1xf16>) outs(%acc: tensor<1000x1xf16>) -> tensor<1000x1xf16> + return %result: tensor<1000x1xf16> +} + diff --git a/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_large_calls.mlir b/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_large_calls.mlir new file mode 100644 index 0000000..998c031 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xf16_times_128x512xf16_into_512x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf16_times_128x512xf16_into_512x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xf16_times_4x512xf16_into_1000x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xf16_times_1000x512xf16_into_4x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xf16_times_1000x4xf16_into_512x4xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf16_times_128x500xf16_into_512x500xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf16_times_330x512xf16_into_457x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf16_times_330x514xf16_into_457x514xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xf16_times_330x514xf16_into_438x514xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xf16_times_332x516xf16_into_540x516xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xf16_times_321x234xf16_into_654x234xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xf16_times_160x512xf16_into_457x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xf16_times_330x512xf16_into_512x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xf16_times_1000x1xf16_into_1000x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xf16_times_1000x1xf16_into_1000x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xf16_times_128x512xf16_into_512x512xf16_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xf16_times_128x512xf16_into_512x512xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf16_times_128x512xf16_into_512x512xf16_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf16_times_128x512xf16_into_512x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xf16_times_4x512xf16_into_1000x512xf16_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xf16_times_4x512xf16_into_1000x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xf16_times_1000x512xf16_into_4x512xf16_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xf16_times_1000x512xf16_into_4x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xf16_times_1000x4xf16_into_512x4xf16_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xf16_times_1000x4xf16_into_512x4xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf16_times_128x500xf16_into_512x500xf16_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf16_times_128x500xf16_into_512x500xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf16_times_330x512xf16_into_457x512xf16_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf16_times_330x512xf16_into_457x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf16_times_330x514xf16_into_457x514xf16_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf16_times_330x514xf16_into_457x514xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xf16_times_330x514xf16_into_438x514xf16_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xf16_times_330x514xf16_into_438x514xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xf16_times_332x516xf16_into_540x516xf16_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xf16_times_332x516xf16_into_540x516xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xf16_times_321x234xf16_into_654x234xf16_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xf16_times_321x234xf16_into_654x234xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xf16_times_160x512xf16_into_457x512xf16_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xf16_times_160x512xf16_into_457x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xf16_times_330x512xf16_into_512x512xf16_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xf16_times_330x512xf16_into_512x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf16_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xf16_times_1000x1xf16_into_1000x1xf16_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xf16_times_1000x1xf16_into_1000x1xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xf16_times_1000x1xf16_into_1000x1xf16_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xf16_times_1000x1xf16_into_1000x1xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_small.mlir b/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_small.mlir new file mode 100644 index 0000000..3cfe7cd --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs: tensor<1x1xf16>, %rhs: tensor<1x1xf16>, %acc: tensor<1x1xf16>) -> tensor<1x1xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xf16>, tensor<1x1xf16>) outs(%acc: tensor<1x1xf16>) -> tensor<1x1xf16> + return %result: tensor<1x1xf16> +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs: tensor<1x1xf16>, %rhs: tensor<1x1xf16>) -> tensor<1x1xf16> { + %init_acc = tensor.empty() : tensor<1x1xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<1x1xf16>) -> tensor<1x1xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xf16>, tensor<1x1xf16>) outs(%acc: tensor<1x1xf16>) -> tensor<1x1xf16> + return %result: tensor<1x1xf16> +} + +func.func @matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf16(%lhs: tensor<2x2xf16>, %rhs: tensor<2x2xf16>, %acc: tensor<2x2xf16>) -> tensor<2x2xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<2x2xf16>, tensor<2x2xf16>) outs(%acc: tensor<2x2xf16>) -> tensor<2x2xf16> + return %result: tensor<2x2xf16> +} + +func.func @matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf16(%lhs: tensor<4x4xf16>, %rhs: tensor<4x4xf16>, %acc: tensor<4x4xf16>) -> tensor<4x4xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x4xf16>, tensor<4x4xf16>) outs(%acc: tensor<4x4xf16>) -> tensor<4x4xf16> + return %result: tensor<4x4xf16> +} + +func.func @matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf16(%lhs: tensor<8x8xf16>, %rhs: tensor<8x8xf16>, %acc: tensor<8x8xf16>) -> tensor<8x8xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<8x8xf16>, tensor<8x8xf16>) outs(%acc: tensor<8x8xf16>) -> tensor<8x8xf16> + return %result: tensor<8x8xf16> +} + +func.func @matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf16(%lhs: tensor<9x9xf16>, %rhs: tensor<9x9xf16>, %acc: tensor<9x9xf16>) -> tensor<9x9xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<9x9xf16>, tensor<9x9xf16>) outs(%acc: tensor<9x9xf16>) -> tensor<9x9xf16> + return %result: tensor<9x9xf16> +} + +func.func @matmul_accumulate_6x13xf16_times_13x3xf16_into_6x3xf16(%lhs: tensor<6x13xf16>, %rhs: tensor<13x3xf16>, %acc: tensor<6x3xf16>) -> tensor<6x3xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<6x13xf16>, tensor<13x3xf16>) outs(%acc: tensor<6x3xf16>) -> tensor<6x3xf16> + return %result: tensor<6x3xf16> +} + +func.func @matmul_15x37xf16_times_37x7xf16_into_15x7xf16(%lhs: tensor<15x37xf16>, %rhs: tensor<37x7xf16>) -> tensor<15x7xf16> { + %init_acc = tensor.empty() : tensor<15x7xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<15x7xf16>) -> tensor<15x7xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<15x37xf16>, tensor<37x7xf16>) outs(%acc: tensor<15x7xf16>) -> tensor<15x7xf16> + return %result: tensor<15x7xf16> +} + +func.func @matmul_accumulate_81x19xf16_times_19x41xf16_into_81x41xf16(%lhs: tensor<81x19xf16>, %rhs: tensor<19x41xf16>, %acc: tensor<81x41xf16>) -> tensor<81x41xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<81x19xf16>, tensor<19x41xf16>) outs(%acc: tensor<81x41xf16>) -> tensor<81x41xf16> + return %result: tensor<81x41xf16> +} + +func.func @matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs: tensor<1x10xf16>, %rhs: tensor<10x10xf16>, %acc: tensor<1x10xf16>) -> tensor<1x10xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xf16>, tensor<10x10xf16>) outs(%acc: tensor<1x10xf16>) -> tensor<1x10xf16> + return %result: tensor<1x10xf16> +} + +func.func @matmul_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs: tensor<1x10xf16>, %rhs: tensor<10x10xf16>) -> tensor<1x10xf16> { + %init_acc = tensor.empty() : tensor<1x10xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<1x10xf16>) -> tensor<1x10xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xf16>, tensor<10x10xf16>) outs(%acc: tensor<1x10xf16>) -> tensor<1x10xf16> + return %result: tensor<1x10xf16> +} + +func.func @matmul_accumulate_10x1xf16_times_1x10xf16_into_10x10xf16(%lhs: tensor<10x1xf16>, %rhs: tensor<1x10xf16>, %acc: tensor<10x10xf16>) -> tensor<10x10xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x1xf16>, tensor<1x10xf16>) outs(%acc: tensor<10x10xf16>) -> tensor<10x10xf16> + return %result: tensor<10x10xf16> +} + +func.func @matmul_accumulate_10x10xf16_times_10x1xf16_into_10x1xf16(%lhs: tensor<10x10xf16>, %rhs: tensor<10x1xf16>, %acc: tensor<10x1xf16>) -> tensor<10x1xf16> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xf16>, tensor<10x1xf16>) outs(%acc: tensor<10x1xf16>) -> tensor<10x1xf16> + return %result: tensor<10x1xf16> +} + +func.func @matmul_10x10xf16_times_10x1xf16_into_10x1xf16(%lhs: tensor<10x10xf16>, %rhs: tensor<10x1xf16>) -> tensor<10x1xf16> { + %init_acc = tensor.empty() : tensor<10x1xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<10x1xf16>) -> tensor<10x1xf16> + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xf16>, tensor<10x1xf16>) outs(%acc: tensor<10x1xf16>) -> tensor<10x1xf16> + return %result: tensor<10x1xf16> +} + diff --git a/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_small_calls.mlir b/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_small_calls.mlir new file mode 100644 index 0000000..fd681dc --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f16/matmul_f16_into_f16_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xf16_times_13x3xf16_into_6x3xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xf16_times_37x7xf16_into_15x7xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xf16_times_19x41xf16_into_81x41xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xf16_times_1x10xf16_into_10x10xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xf16_times_10x1xf16_into_10x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xf16_times_10x1xf16_into_10x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf16_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xf16_times_1x1xf16_into_1x1xf16_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf16_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf16_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf16_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf16_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xf16_times_13x3xf16_into_6x3xf16_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xf16_times_13x3xf16_into_6x3xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xf16_times_37x7xf16_into_15x7xf16_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xf16_times_37x7xf16_into_15x7xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xf16_times_19x41xf16_into_81x41xf16_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xf16_times_19x41xf16_into_81x41xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf16_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xf16_times_10x10xf16_into_1x10xf16_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xf16_times_1x10xf16_into_10x10xf16_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xf16_times_1x10xf16_into_10x10xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xf16_times_10x1xf16_into_10x1xf16_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xf16_times_10x1xf16_into_10x1xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xf16_times_10x1xf16_into_10x1xf16_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xf16_times_10x1xf16_into_10x1xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_large.mlir b/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_large.mlir new file mode 100644 index 0000000..6663571 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xf16_times_512x128xf16_into_512x512xf16(%lhs: tensor<512x128xf16>, %rhs: tensor<512x128xf16>, %acc: tensor<512x512xf16>) -> tensor<512x512xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xf16>, tensor<512x128xf16>) outs(%acc: tensor<512x512xf16>) -> tensor<512x512xf16> + return %result: tensor<512x512xf16> +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xf16_times_512x128xf16_into_512x512xf16(%lhs: tensor<512x128xf16>, %rhs: tensor<512x128xf16>) -> tensor<512x512xf16> { + %init_acc = tensor.empty() : tensor<512x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<512x512xf16>) -> tensor<512x512xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xf16>, tensor<512x128xf16>) outs(%acc: tensor<512x512xf16>) -> tensor<512x512xf16> + return %result: tensor<512x512xf16> +} + +func.func @matmul_1000x4xf16_times_512x4xf16_into_1000x512xf16(%lhs: tensor<1000x4xf16>, %rhs: tensor<512x4xf16>) -> tensor<1000x512xf16> { + %init_acc = tensor.empty() : tensor<1000x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<1000x512xf16>) -> tensor<1000x512xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x4xf16>, tensor<512x4xf16>) outs(%acc: tensor<1000x512xf16>) -> tensor<1000x512xf16> + return %result: tensor<1000x512xf16> +} + +func.func @matmul_4x1000xf16_times_512x1000xf16_into_4x512xf16(%lhs: tensor<4x1000xf16>, %rhs: tensor<512x1000xf16>) -> tensor<4x512xf16> { + %init_acc = tensor.empty() : tensor<4x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<4x512xf16>) -> tensor<4x512xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x1000xf16>, tensor<512x1000xf16>) outs(%acc: tensor<4x512xf16>) -> tensor<4x512xf16> + return %result: tensor<4x512xf16> +} + +func.func @matmul_512x1000xf16_times_4x1000xf16_into_512x4xf16(%lhs: tensor<512x1000xf16>, %rhs: tensor<4x1000xf16>) -> tensor<512x4xf16> { + %init_acc = tensor.empty() : tensor<512x4xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<512x4xf16>) -> tensor<512x4xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x1000xf16>, tensor<4x1000xf16>) outs(%acc: tensor<512x4xf16>) -> tensor<512x4xf16> + return %result: tensor<512x4xf16> +} + +func.func @matmul_512x128xf16_times_500x128xf16_into_512x500xf16(%lhs: tensor<512x128xf16>, %rhs: tensor<500x128xf16>) -> tensor<512x500xf16> { + %init_acc = tensor.empty() : tensor<512x500xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<512x500xf16>) -> tensor<512x500xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xf16>, tensor<500x128xf16>) outs(%acc: tensor<512x500xf16>) -> tensor<512x500xf16> + return %result: tensor<512x500xf16> +} + +func.func @matmul_457x330xf16_times_512x330xf16_into_457x512xf16(%lhs: tensor<457x330xf16>, %rhs: tensor<512x330xf16>) -> tensor<457x512xf16> { + %init_acc = tensor.empty() : tensor<457x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<457x512xf16>) -> tensor<457x512xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xf16>, tensor<512x330xf16>) outs(%acc: tensor<457x512xf16>) -> tensor<457x512xf16> + return %result: tensor<457x512xf16> +} + +func.func @matmul_457x330xf16_times_514x330xf16_into_457x514xf16(%lhs: tensor<457x330xf16>, %rhs: tensor<514x330xf16>) -> tensor<457x514xf16> { + %init_acc = tensor.empty() : tensor<457x514xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<457x514xf16>) -> tensor<457x514xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xf16>, tensor<514x330xf16>) outs(%acc: tensor<457x514xf16>) -> tensor<457x514xf16> + return %result: tensor<457x514xf16> +} + +func.func @matmul_438x330xf16_times_514x330xf16_into_438x514xf16(%lhs: tensor<438x330xf16>, %rhs: tensor<514x330xf16>) -> tensor<438x514xf16> { + %init_acc = tensor.empty() : tensor<438x514xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<438x514xf16>) -> tensor<438x514xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<438x330xf16>, tensor<514x330xf16>) outs(%acc: tensor<438x514xf16>) -> tensor<438x514xf16> + return %result: tensor<438x514xf16> +} + +func.func @matmul_540x332xf16_times_516x332xf16_into_540x516xf16(%lhs: tensor<540x332xf16>, %rhs: tensor<516x332xf16>) -> tensor<540x516xf16> { + %init_acc = tensor.empty() : tensor<540x516xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<540x516xf16>) -> tensor<540x516xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<540x332xf16>, tensor<516x332xf16>) outs(%acc: tensor<540x516xf16>) -> tensor<540x516xf16> + return %result: tensor<540x516xf16> +} + +func.func @matmul_654x321xf16_times_234x321xf16_into_654x234xf16(%lhs: tensor<654x321xf16>, %rhs: tensor<234x321xf16>) -> tensor<654x234xf16> { + %init_acc = tensor.empty() : tensor<654x234xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<654x234xf16>) -> tensor<654x234xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<654x321xf16>, tensor<234x321xf16>) outs(%acc: tensor<654x234xf16>) -> tensor<654x234xf16> + return %result: tensor<654x234xf16> +} + +func.func @matmul_457x160xf16_times_512x160xf16_into_457x512xf16(%lhs: tensor<457x160xf16>, %rhs: tensor<512x160xf16>) -> tensor<457x512xf16> { + %init_acc = tensor.empty() : tensor<457x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<457x512xf16>) -> tensor<457x512xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x160xf16>, tensor<512x160xf16>) outs(%acc: tensor<457x512xf16>) -> tensor<457x512xf16> + return %result: tensor<457x512xf16> +} + +func.func @matmul_512x330xf16_times_512x330xf16_into_512x512xf16(%lhs: tensor<512x330xf16>, %rhs: tensor<512x330xf16>) -> tensor<512x512xf16> { + %init_acc = tensor.empty() : tensor<512x512xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<512x512xf16>) -> tensor<512x512xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x330xf16>, tensor<512x330xf16>) outs(%acc: tensor<512x512xf16>) -> tensor<512x512xf16> + return %result: tensor<512x512xf16> +} + +func.func @matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf16(%lhs: tensor<1x1000xf16>, %rhs: tensor<1000x1000xf16>, %acc: tensor<1x1000xf16>) -> tensor<1x1000xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1000xf16>, tensor<1000x1000xf16>) outs(%acc: tensor<1x1000xf16>) -> tensor<1x1000xf16> + return %result: tensor<1x1000xf16> +} + +func.func @matmul_accumulate_1000x1000xf16_times_1x1000xf16_into_1000x1xf16(%lhs: tensor<1000x1000xf16>, %rhs: tensor<1x1000xf16>, %acc: tensor<1000x1xf16>) -> tensor<1000x1xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xf16>, tensor<1x1000xf16>) outs(%acc: tensor<1000x1xf16>) -> tensor<1000x1xf16> + return %result: tensor<1000x1xf16> +} + +func.func @matmul_1000x1000xf16_times_1x1000xf16_into_1000x1xf16(%lhs: tensor<1000x1000xf16>, %rhs: tensor<1x1000xf16>) -> tensor<1000x1xf16> { + %init_acc = tensor.empty() : tensor<1000x1xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<1000x1xf16>) -> tensor<1000x1xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xf16>, tensor<1x1000xf16>) outs(%acc: tensor<1000x1xf16>) -> tensor<1000x1xf16> + return %result: tensor<1000x1xf16> +} + diff --git a/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_large_calls.mlir b/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_large_calls.mlir new file mode 100644 index 0000000..fad0e2b --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xf16_times_512x128xf16_into_512x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf16_times_512x128xf16_into_512x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xf16_times_512x4xf16_into_1000x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xf16_times_512x1000xf16_into_4x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xf16_times_4x1000xf16_into_512x4xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf16_times_500x128xf16_into_512x500xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf16_times_512x330xf16_into_457x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf16_times_514x330xf16_into_457x514xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xf16_times_514x330xf16_into_438x514xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xf16_times_516x332xf16_into_540x516xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xf16_times_234x321xf16_into_654x234xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xf16_times_512x160xf16_into_457x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xf16_times_512x330xf16_into_512x512xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xf16_times_1x1000xf16_into_1000x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xf16_times_1x1000xf16_into_1000x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xf16_times_512x128xf16_into_512x512xf16_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xf16_times_512x128xf16_into_512x512xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf16_times_512x128xf16_into_512x512xf16_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf16_times_512x128xf16_into_512x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xf16_times_512x4xf16_into_1000x512xf16_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xf16_times_512x4xf16_into_1000x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xf16_times_512x1000xf16_into_4x512xf16_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xf16_times_512x1000xf16_into_4x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xf16_times_4x1000xf16_into_512x4xf16_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xf16_times_4x1000xf16_into_512x4xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf16_times_500x128xf16_into_512x500xf16_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf16_times_500x128xf16_into_512x500xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf16_times_512x330xf16_into_457x512xf16_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf16_times_512x330xf16_into_457x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf16_times_514x330xf16_into_457x514xf16_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf16_times_514x330xf16_into_457x514xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xf16_times_514x330xf16_into_438x514xf16_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xf16_times_514x330xf16_into_438x514xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xf16_times_516x332xf16_into_540x516xf16_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xf16_times_516x332xf16_into_540x516xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xf16_times_234x321xf16_into_654x234xf16_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xf16_times_234x321xf16_into_654x234xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xf16_times_512x160xf16_into_457x512xf16_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xf16_times_512x160xf16_into_457x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xf16_times_512x330xf16_into_512x512xf16_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xf16_times_512x330xf16_into_512x512xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf16_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xf16_times_1x1000xf16_into_1000x1xf16_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xf16_times_1x1000xf16_into_1000x1xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xf16_times_1x1000xf16_into_1000x1xf16_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xf16_times_1x1000xf16_into_1000x1xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_small.mlir b/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_small.mlir new file mode 100644 index 0000000..fb93898 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs: tensor<1x1xf16>, %rhs: tensor<1x1xf16>, %acc: tensor<1x1xf16>) -> tensor<1x1xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xf16>, tensor<1x1xf16>) outs(%acc: tensor<1x1xf16>) -> tensor<1x1xf16> + return %result: tensor<1x1xf16> +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs: tensor<1x1xf16>, %rhs: tensor<1x1xf16>) -> tensor<1x1xf16> { + %init_acc = tensor.empty() : tensor<1x1xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<1x1xf16>) -> tensor<1x1xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xf16>, tensor<1x1xf16>) outs(%acc: tensor<1x1xf16>) -> tensor<1x1xf16> + return %result: tensor<1x1xf16> +} + +func.func @matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf16(%lhs: tensor<2x2xf16>, %rhs: tensor<2x2xf16>, %acc: tensor<2x2xf16>) -> tensor<2x2xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<2x2xf16>, tensor<2x2xf16>) outs(%acc: tensor<2x2xf16>) -> tensor<2x2xf16> + return %result: tensor<2x2xf16> +} + +func.func @matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf16(%lhs: tensor<4x4xf16>, %rhs: tensor<4x4xf16>, %acc: tensor<4x4xf16>) -> tensor<4x4xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x4xf16>, tensor<4x4xf16>) outs(%acc: tensor<4x4xf16>) -> tensor<4x4xf16> + return %result: tensor<4x4xf16> +} + +func.func @matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf16(%lhs: tensor<8x8xf16>, %rhs: tensor<8x8xf16>, %acc: tensor<8x8xf16>) -> tensor<8x8xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<8x8xf16>, tensor<8x8xf16>) outs(%acc: tensor<8x8xf16>) -> tensor<8x8xf16> + return %result: tensor<8x8xf16> +} + +func.func @matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf16(%lhs: tensor<9x9xf16>, %rhs: tensor<9x9xf16>, %acc: tensor<9x9xf16>) -> tensor<9x9xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<9x9xf16>, tensor<9x9xf16>) outs(%acc: tensor<9x9xf16>) -> tensor<9x9xf16> + return %result: tensor<9x9xf16> +} + +func.func @matmul_accumulate_6x13xf16_times_3x13xf16_into_6x3xf16(%lhs: tensor<6x13xf16>, %rhs: tensor<3x13xf16>, %acc: tensor<6x3xf16>) -> tensor<6x3xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<6x13xf16>, tensor<3x13xf16>) outs(%acc: tensor<6x3xf16>) -> tensor<6x3xf16> + return %result: tensor<6x3xf16> +} + +func.func @matmul_15x37xf16_times_7x37xf16_into_15x7xf16(%lhs: tensor<15x37xf16>, %rhs: tensor<7x37xf16>) -> tensor<15x7xf16> { + %init_acc = tensor.empty() : tensor<15x7xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<15x7xf16>) -> tensor<15x7xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<15x37xf16>, tensor<7x37xf16>) outs(%acc: tensor<15x7xf16>) -> tensor<15x7xf16> + return %result: tensor<15x7xf16> +} + +func.func @matmul_accumulate_81x19xf16_times_41x19xf16_into_81x41xf16(%lhs: tensor<81x19xf16>, %rhs: tensor<41x19xf16>, %acc: tensor<81x41xf16>) -> tensor<81x41xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<81x19xf16>, tensor<41x19xf16>) outs(%acc: tensor<81x41xf16>) -> tensor<81x41xf16> + return %result: tensor<81x41xf16> +} + +func.func @matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs: tensor<1x10xf16>, %rhs: tensor<10x10xf16>, %acc: tensor<1x10xf16>) -> tensor<1x10xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xf16>, tensor<10x10xf16>) outs(%acc: tensor<1x10xf16>) -> tensor<1x10xf16> + return %result: tensor<1x10xf16> +} + +func.func @matmul_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs: tensor<1x10xf16>, %rhs: tensor<10x10xf16>) -> tensor<1x10xf16> { + %init_acc = tensor.empty() : tensor<1x10xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<1x10xf16>) -> tensor<1x10xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xf16>, tensor<10x10xf16>) outs(%acc: tensor<1x10xf16>) -> tensor<1x10xf16> + return %result: tensor<1x10xf16> +} + +func.func @matmul_accumulate_10x1xf16_times_10x1xf16_into_10x10xf16(%lhs: tensor<10x1xf16>, %rhs: tensor<10x1xf16>, %acc: tensor<10x10xf16>) -> tensor<10x10xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x1xf16>, tensor<10x1xf16>) outs(%acc: tensor<10x10xf16>) -> tensor<10x10xf16> + return %result: tensor<10x10xf16> +} + +func.func @matmul_accumulate_10x10xf16_times_1x10xf16_into_10x1xf16(%lhs: tensor<10x10xf16>, %rhs: tensor<1x10xf16>, %acc: tensor<10x1xf16>) -> tensor<10x1xf16> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xf16>, tensor<1x10xf16>) outs(%acc: tensor<10x1xf16>) -> tensor<10x1xf16> + return %result: tensor<10x1xf16> +} + +func.func @matmul_10x10xf16_times_1x10xf16_into_10x1xf16(%lhs: tensor<10x10xf16>, %rhs: tensor<1x10xf16>) -> tensor<10x1xf16> { + %init_acc = tensor.empty() : tensor<10x1xf16> + %c0_acc_type = arith.constant 0.0: f16 + %acc = linalg.fill ins(%c0_acc_type : f16) outs(%init_acc : tensor<10x1xf16>) -> tensor<10x1xf16> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xf16>, tensor<1x10xf16>) outs(%acc: tensor<10x1xf16>) -> tensor<10x1xf16> + return %result: tensor<10x1xf16> +} + diff --git a/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_small_calls.mlir b/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_small_calls.mlir new file mode 100644 index 0000000..90bdf06 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f16/matmul_transpose_b_f16_into_f16_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xf16_times_3x13xf16_into_6x3xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xf16_times_7x37xf16_into_15x7xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xf16_times_41x19xf16_into_81x41xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xf16_times_10x1xf16_into_10x10xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xf16_times_1x10xf16_into_10x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xf16_times_1x10xf16_into_10x1xf16(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf16_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xf16_times_1x1xf16_into_1x1xf16_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf16_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf16_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf16_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf16_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xf16_times_3x13xf16_into_6x3xf16_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xf16_times_3x13xf16_into_6x3xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xf16_times_7x37xf16_into_15x7xf16_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xf16_times_7x37xf16_into_15x7xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xf16_times_41x19xf16_into_81x41xf16_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xf16_times_41x19xf16_into_81x41xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf16_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xf16_times_10x10xf16_into_1x10xf16_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xf16_times_10x10xf16_into_1x10xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xf16_times_10x1xf16_into_10x10xf16_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xf16_times_10x1xf16_into_10x10xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xf16_times_1x10xf16_into_10x1xf16_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xf16_times_1x10xf16_into_10x1xf16(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xf16_times_1x10xf16_into_10x1xf16_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xf16_times_1x10xf16_into_10x1xf16(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_large.mlir b/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_large.mlir new file mode 100644 index 0000000..a5662d7 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xf16_times_128x512xf16_into_512x512xf32(%lhs: tensor<512x128xf16>, %rhs: tensor<128x512xf16>, %acc: tensor<512x512xf32>) -> tensor<512x512xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xf16>, tensor<128x512xf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xf16_times_128x512xf16_into_512x512xf32(%lhs: tensor<512x128xf16>, %rhs: tensor<128x512xf16>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xf16>, tensor<128x512xf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_1000x4xf16_times_4x512xf16_into_1000x512xf32(%lhs: tensor<1000x4xf16>, %rhs: tensor<4x512xf16>) -> tensor<1000x512xf32> { + %init_acc = tensor.empty() : tensor<1000x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x512xf32>) -> tensor<1000x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x4xf16>, tensor<4x512xf16>) outs(%acc: tensor<1000x512xf32>) -> tensor<1000x512xf32> + return %result: tensor<1000x512xf32> +} + +func.func @matmul_4x1000xf16_times_1000x512xf16_into_4x512xf32(%lhs: tensor<4x1000xf16>, %rhs: tensor<1000x512xf16>) -> tensor<4x512xf32> { + %init_acc = tensor.empty() : tensor<4x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<4x512xf32>) -> tensor<4x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x1000xf16>, tensor<1000x512xf16>) outs(%acc: tensor<4x512xf32>) -> tensor<4x512xf32> + return %result: tensor<4x512xf32> +} + +func.func @matmul_512x1000xf16_times_1000x4xf16_into_512x4xf32(%lhs: tensor<512x1000xf16>, %rhs: tensor<1000x4xf16>) -> tensor<512x4xf32> { + %init_acc = tensor.empty() : tensor<512x4xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x4xf32>) -> tensor<512x4xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x1000xf16>, tensor<1000x4xf16>) outs(%acc: tensor<512x4xf32>) -> tensor<512x4xf32> + return %result: tensor<512x4xf32> +} + +func.func @matmul_512x128xf16_times_128x500xf16_into_512x500xf32(%lhs: tensor<512x128xf16>, %rhs: tensor<128x500xf16>) -> tensor<512x500xf32> { + %init_acc = tensor.empty() : tensor<512x500xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x500xf32>) -> tensor<512x500xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xf16>, tensor<128x500xf16>) outs(%acc: tensor<512x500xf32>) -> tensor<512x500xf32> + return %result: tensor<512x500xf32> +} + +func.func @matmul_457x330xf16_times_330x512xf16_into_457x512xf32(%lhs: tensor<457x330xf16>, %rhs: tensor<330x512xf16>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xf16>, tensor<330x512xf16>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_457x330xf16_times_330x514xf16_into_457x514xf32(%lhs: tensor<457x330xf16>, %rhs: tensor<330x514xf16>) -> tensor<457x514xf32> { + %init_acc = tensor.empty() : tensor<457x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x514xf32>) -> tensor<457x514xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xf16>, tensor<330x514xf16>) outs(%acc: tensor<457x514xf32>) -> tensor<457x514xf32> + return %result: tensor<457x514xf32> +} + +func.func @matmul_438x330xf16_times_330x514xf16_into_438x514xf32(%lhs: tensor<438x330xf16>, %rhs: tensor<330x514xf16>) -> tensor<438x514xf32> { + %init_acc = tensor.empty() : tensor<438x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<438x514xf32>) -> tensor<438x514xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<438x330xf16>, tensor<330x514xf16>) outs(%acc: tensor<438x514xf32>) -> tensor<438x514xf32> + return %result: tensor<438x514xf32> +} + +func.func @matmul_540x332xf16_times_332x516xf16_into_540x516xf32(%lhs: tensor<540x332xf16>, %rhs: tensor<332x516xf16>) -> tensor<540x516xf32> { + %init_acc = tensor.empty() : tensor<540x516xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<540x516xf32>) -> tensor<540x516xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<540x332xf16>, tensor<332x516xf16>) outs(%acc: tensor<540x516xf32>) -> tensor<540x516xf32> + return %result: tensor<540x516xf32> +} + +func.func @matmul_654x321xf16_times_321x234xf16_into_654x234xf32(%lhs: tensor<654x321xf16>, %rhs: tensor<321x234xf16>) -> tensor<654x234xf32> { + %init_acc = tensor.empty() : tensor<654x234xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<654x234xf32>) -> tensor<654x234xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<654x321xf16>, tensor<321x234xf16>) outs(%acc: tensor<654x234xf32>) -> tensor<654x234xf32> + return %result: tensor<654x234xf32> +} + +func.func @matmul_457x160xf16_times_160x512xf16_into_457x512xf32(%lhs: tensor<457x160xf16>, %rhs: tensor<160x512xf16>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x160xf16>, tensor<160x512xf16>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_512x330xf16_times_330x512xf16_into_512x512xf32(%lhs: tensor<512x330xf16>, %rhs: tensor<330x512xf16>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x330xf16>, tensor<330x512xf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf32(%lhs: tensor<1x1000xf16>, %rhs: tensor<1000x1000xf16>, %acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1000xf16>, tensor<1000x1000xf16>) outs(%acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> + return %result: tensor<1x1000xf32> +} + +func.func @matmul_accumulate_1000x1000xf16_times_1000x1xf16_into_1000x1xf32(%lhs: tensor<1000x1000xf16>, %rhs: tensor<1000x1xf16>, %acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xf16>, tensor<1000x1xf16>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + +func.func @matmul_1000x1000xf16_times_1000x1xf16_into_1000x1xf32(%lhs: tensor<1000x1000xf16>, %rhs: tensor<1000x1xf16>) -> tensor<1000x1xf32> { + %init_acc = tensor.empty() : tensor<1000x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x1xf32>) -> tensor<1000x1xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xf16>, tensor<1000x1xf16>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_large_calls.mlir b/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_large_calls.mlir new file mode 100644 index 0000000..eb94823 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xf16_times_128x512xf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf16_times_128x512xf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xf16_times_4x512xf16_into_1000x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xf16_times_1000x512xf16_into_4x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xf16_times_1000x4xf16_into_512x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf16_times_128x500xf16_into_512x500xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf16_times_330x512xf16_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf16_times_330x514xf16_into_457x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xf16_times_330x514xf16_into_438x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xf16_times_332x516xf16_into_540x516xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xf16_times_321x234xf16_into_654x234xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xf16_times_160x512xf16_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xf16_times_330x512xf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xf16_times_1000x1xf16_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xf16_times_1000x1xf16_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xf16_times_128x512xf16_into_512x512xf32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xf16_times_128x512xf16_into_512x512xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf16_times_128x512xf16_into_512x512xf32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf16_times_128x512xf16_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xf16_times_4x512xf16_into_1000x512xf32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xf16_times_4x512xf16_into_1000x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xf16_times_1000x512xf16_into_4x512xf32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xf16_times_1000x512xf16_into_4x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xf16_times_1000x4xf16_into_512x4xf32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xf16_times_1000x4xf16_into_512x4xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf16_times_128x500xf16_into_512x500xf32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf16_times_128x500xf16_into_512x500xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf16_times_330x512xf16_into_457x512xf32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf16_times_330x512xf16_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf16_times_330x514xf16_into_457x514xf32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf16_times_330x514xf16_into_457x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xf16_times_330x514xf16_into_438x514xf32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xf16_times_330x514xf16_into_438x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xf16_times_332x516xf16_into_540x516xf32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xf16_times_332x516xf16_into_540x516xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xf16_times_321x234xf16_into_654x234xf32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xf16_times_321x234xf16_into_654x234xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xf16_times_160x512xf16_into_457x512xf32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xf16_times_160x512xf16_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xf16_times_330x512xf16_into_512x512xf32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xf16_times_330x512xf16_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xf16_times_1000x1xf16_into_1000x1xf32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xf16_times_1000x1xf16_into_1000x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xf16_times_1000x1xf16_into_1000x1xf32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xf16_times_1000x1xf16_into_1000x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_small.mlir b/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_small.mlir new file mode 100644 index 0000000..e70d950 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs: tensor<1x1xf16>, %rhs: tensor<1x1xf16>, %acc: tensor<1x1xf32>) -> tensor<1x1xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xf16>, tensor<1x1xf16>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs: tensor<1x1xf16>, %rhs: tensor<1x1xf16>) -> tensor<1x1xf32> { + %init_acc = tensor.empty() : tensor<1x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x1xf32>) -> tensor<1x1xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xf16>, tensor<1x1xf16>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf32(%lhs: tensor<2x2xf16>, %rhs: tensor<2x2xf16>, %acc: tensor<2x2xf32>) -> tensor<2x2xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<2x2xf16>, tensor<2x2xf16>) outs(%acc: tensor<2x2xf32>) -> tensor<2x2xf32> + return %result: tensor<2x2xf32> +} + +func.func @matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf32(%lhs: tensor<4x4xf16>, %rhs: tensor<4x4xf16>, %acc: tensor<4x4xf32>) -> tensor<4x4xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x4xf16>, tensor<4x4xf16>) outs(%acc: tensor<4x4xf32>) -> tensor<4x4xf32> + return %result: tensor<4x4xf32> +} + +func.func @matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf32(%lhs: tensor<8x8xf16>, %rhs: tensor<8x8xf16>, %acc: tensor<8x8xf32>) -> tensor<8x8xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<8x8xf16>, tensor<8x8xf16>) outs(%acc: tensor<8x8xf32>) -> tensor<8x8xf32> + return %result: tensor<8x8xf32> +} + +func.func @matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf32(%lhs: tensor<9x9xf16>, %rhs: tensor<9x9xf16>, %acc: tensor<9x9xf32>) -> tensor<9x9xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<9x9xf16>, tensor<9x9xf16>) outs(%acc: tensor<9x9xf32>) -> tensor<9x9xf32> + return %result: tensor<9x9xf32> +} + +func.func @matmul_accumulate_6x13xf16_times_13x3xf16_into_6x3xf32(%lhs: tensor<6x13xf16>, %rhs: tensor<13x3xf16>, %acc: tensor<6x3xf32>) -> tensor<6x3xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<6x13xf16>, tensor<13x3xf16>) outs(%acc: tensor<6x3xf32>) -> tensor<6x3xf32> + return %result: tensor<6x3xf32> +} + +func.func @matmul_15x37xf16_times_37x7xf16_into_15x7xf32(%lhs: tensor<15x37xf16>, %rhs: tensor<37x7xf16>) -> tensor<15x7xf32> { + %init_acc = tensor.empty() : tensor<15x7xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<15x7xf32>) -> tensor<15x7xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<15x37xf16>, tensor<37x7xf16>) outs(%acc: tensor<15x7xf32>) -> tensor<15x7xf32> + return %result: tensor<15x7xf32> +} + +func.func @matmul_accumulate_81x19xf16_times_19x41xf16_into_81x41xf32(%lhs: tensor<81x19xf16>, %rhs: tensor<19x41xf16>, %acc: tensor<81x41xf32>) -> tensor<81x41xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<81x19xf16>, tensor<19x41xf16>) outs(%acc: tensor<81x41xf32>) -> tensor<81x41xf32> + return %result: tensor<81x41xf32> +} + +func.func @matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs: tensor<1x10xf16>, %rhs: tensor<10x10xf16>, %acc: tensor<1x10xf32>) -> tensor<1x10xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xf16>, tensor<10x10xf16>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs: tensor<1x10xf16>, %rhs: tensor<10x10xf16>) -> tensor<1x10xf32> { + %init_acc = tensor.empty() : tensor<1x10xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x10xf32>) -> tensor<1x10xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xf16>, tensor<10x10xf16>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_accumulate_10x1xf16_times_1x10xf16_into_10x10xf32(%lhs: tensor<10x1xf16>, %rhs: tensor<1x10xf16>, %acc: tensor<10x10xf32>) -> tensor<10x10xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x1xf16>, tensor<1x10xf16>) outs(%acc: tensor<10x10xf32>) -> tensor<10x10xf32> + return %result: tensor<10x10xf32> +} + +func.func @matmul_accumulate_10x10xf16_times_10x1xf16_into_10x1xf32(%lhs: tensor<10x10xf16>, %rhs: tensor<10x1xf16>, %acc: tensor<10x1xf32>) -> tensor<10x1xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xf16>, tensor<10x1xf16>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + +func.func @matmul_10x10xf16_times_10x1xf16_into_10x1xf32(%lhs: tensor<10x10xf16>, %rhs: tensor<10x1xf16>) -> tensor<10x1xf32> { + %init_acc = tensor.empty() : tensor<10x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<10x1xf32>) -> tensor<10x1xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xf16>, tensor<10x1xf16>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_small_calls.mlir b/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_small_calls.mlir new file mode 100644 index 0000000..051ac2b --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f32/matmul_f16_into_f32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xf16_times_13x3xf16_into_6x3xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xf16_times_37x7xf16_into_15x7xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xf16_times_19x41xf16_into_81x41xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xf16_times_1x10xf16_into_10x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xf16_times_10x1xf16_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xf16_times_10x1xf16_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xf16_times_1x1xf16_into_1x1xf32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xf16_times_13x3xf16_into_6x3xf32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xf16_times_13x3xf16_into_6x3xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xf16_times_37x7xf16_into_15x7xf32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xf16_times_37x7xf16_into_15x7xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xf16_times_19x41xf16_into_81x41xf32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xf16_times_19x41xf16_into_81x41xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xf16_times_10x10xf16_into_1x10xf32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xf16_times_1x10xf16_into_10x10xf32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xf16_times_1x10xf16_into_10x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xf16_times_10x1xf16_into_10x1xf32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xf16_times_10x1xf16_into_10x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xf16_times_10x1xf16_into_10x1xf32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xf16_times_10x1xf16_into_10x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_large.mlir b/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_large.mlir new file mode 100644 index 0000000..fb930f7 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xf16_times_512x128xf16_into_512x512xf32(%lhs: tensor<512x128xf16>, %rhs: tensor<512x128xf16>, %acc: tensor<512x512xf32>) -> tensor<512x512xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xf16>, tensor<512x128xf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xf16_times_512x128xf16_into_512x512xf32(%lhs: tensor<512x128xf16>, %rhs: tensor<512x128xf16>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xf16>, tensor<512x128xf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_1000x4xf16_times_512x4xf16_into_1000x512xf32(%lhs: tensor<1000x4xf16>, %rhs: tensor<512x4xf16>) -> tensor<1000x512xf32> { + %init_acc = tensor.empty() : tensor<1000x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x512xf32>) -> tensor<1000x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x4xf16>, tensor<512x4xf16>) outs(%acc: tensor<1000x512xf32>) -> tensor<1000x512xf32> + return %result: tensor<1000x512xf32> +} + +func.func @matmul_4x1000xf16_times_512x1000xf16_into_4x512xf32(%lhs: tensor<4x1000xf16>, %rhs: tensor<512x1000xf16>) -> tensor<4x512xf32> { + %init_acc = tensor.empty() : tensor<4x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<4x512xf32>) -> tensor<4x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x1000xf16>, tensor<512x1000xf16>) outs(%acc: tensor<4x512xf32>) -> tensor<4x512xf32> + return %result: tensor<4x512xf32> +} + +func.func @matmul_512x1000xf16_times_4x1000xf16_into_512x4xf32(%lhs: tensor<512x1000xf16>, %rhs: tensor<4x1000xf16>) -> tensor<512x4xf32> { + %init_acc = tensor.empty() : tensor<512x4xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x4xf32>) -> tensor<512x4xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x1000xf16>, tensor<4x1000xf16>) outs(%acc: tensor<512x4xf32>) -> tensor<512x4xf32> + return %result: tensor<512x4xf32> +} + +func.func @matmul_512x128xf16_times_500x128xf16_into_512x500xf32(%lhs: tensor<512x128xf16>, %rhs: tensor<500x128xf16>) -> tensor<512x500xf32> { + %init_acc = tensor.empty() : tensor<512x500xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x500xf32>) -> tensor<512x500xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xf16>, tensor<500x128xf16>) outs(%acc: tensor<512x500xf32>) -> tensor<512x500xf32> + return %result: tensor<512x500xf32> +} + +func.func @matmul_457x330xf16_times_512x330xf16_into_457x512xf32(%lhs: tensor<457x330xf16>, %rhs: tensor<512x330xf16>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xf16>, tensor<512x330xf16>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_457x330xf16_times_514x330xf16_into_457x514xf32(%lhs: tensor<457x330xf16>, %rhs: tensor<514x330xf16>) -> tensor<457x514xf32> { + %init_acc = tensor.empty() : tensor<457x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x514xf32>) -> tensor<457x514xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xf16>, tensor<514x330xf16>) outs(%acc: tensor<457x514xf32>) -> tensor<457x514xf32> + return %result: tensor<457x514xf32> +} + +func.func @matmul_438x330xf16_times_514x330xf16_into_438x514xf32(%lhs: tensor<438x330xf16>, %rhs: tensor<514x330xf16>) -> tensor<438x514xf32> { + %init_acc = tensor.empty() : tensor<438x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<438x514xf32>) -> tensor<438x514xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<438x330xf16>, tensor<514x330xf16>) outs(%acc: tensor<438x514xf32>) -> tensor<438x514xf32> + return %result: tensor<438x514xf32> +} + +func.func @matmul_540x332xf16_times_516x332xf16_into_540x516xf32(%lhs: tensor<540x332xf16>, %rhs: tensor<516x332xf16>) -> tensor<540x516xf32> { + %init_acc = tensor.empty() : tensor<540x516xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<540x516xf32>) -> tensor<540x516xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<540x332xf16>, tensor<516x332xf16>) outs(%acc: tensor<540x516xf32>) -> tensor<540x516xf32> + return %result: tensor<540x516xf32> +} + +func.func @matmul_654x321xf16_times_234x321xf16_into_654x234xf32(%lhs: tensor<654x321xf16>, %rhs: tensor<234x321xf16>) -> tensor<654x234xf32> { + %init_acc = tensor.empty() : tensor<654x234xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<654x234xf32>) -> tensor<654x234xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<654x321xf16>, tensor<234x321xf16>) outs(%acc: tensor<654x234xf32>) -> tensor<654x234xf32> + return %result: tensor<654x234xf32> +} + +func.func @matmul_457x160xf16_times_512x160xf16_into_457x512xf32(%lhs: tensor<457x160xf16>, %rhs: tensor<512x160xf16>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x160xf16>, tensor<512x160xf16>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_512x330xf16_times_512x330xf16_into_512x512xf32(%lhs: tensor<512x330xf16>, %rhs: tensor<512x330xf16>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x330xf16>, tensor<512x330xf16>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf32(%lhs: tensor<1x1000xf16>, %rhs: tensor<1000x1000xf16>, %acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1000xf16>, tensor<1000x1000xf16>) outs(%acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> + return %result: tensor<1x1000xf32> +} + +func.func @matmul_accumulate_1000x1000xf16_times_1x1000xf16_into_1000x1xf32(%lhs: tensor<1000x1000xf16>, %rhs: tensor<1x1000xf16>, %acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xf16>, tensor<1x1000xf16>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + +func.func @matmul_1000x1000xf16_times_1x1000xf16_into_1000x1xf32(%lhs: tensor<1000x1000xf16>, %rhs: tensor<1x1000xf16>) -> tensor<1000x1xf32> { + %init_acc = tensor.empty() : tensor<1000x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x1xf32>) -> tensor<1000x1xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xf16>, tensor<1x1000xf16>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_large_calls.mlir b/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_large_calls.mlir new file mode 100644 index 0000000..e644cc3 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xf16_times_512x128xf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf16_times_512x128xf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xf16_times_512x4xf16_into_1000x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xf16_times_512x1000xf16_into_4x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xf16_times_4x1000xf16_into_512x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf16_times_500x128xf16_into_512x500xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf16_times_512x330xf16_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf16_times_514x330xf16_into_457x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xf16_times_514x330xf16_into_438x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xf16_times_516x332xf16_into_540x516xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xf16_times_234x321xf16_into_654x234xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xf16_times_512x160xf16_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xf16_times_512x330xf16_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xf16_times_1x1000xf16_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xf16_times_1x1000xf16_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xf16_times_512x128xf16_into_512x512xf32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xf16_times_512x128xf16_into_512x512xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf16_times_512x128xf16_into_512x512xf32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf16_times_512x128xf16_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xf16_times_512x4xf16_into_1000x512xf32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xf16_times_512x4xf16_into_1000x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xf16_times_512x1000xf16_into_4x512xf32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xf16_times_512x1000xf16_into_4x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xf16_times_4x1000xf16_into_512x4xf32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xf16_times_4x1000xf16_into_512x4xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf16_times_500x128xf16_into_512x500xf32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf16_times_500x128xf16_into_512x500xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf16_times_512x330xf16_into_457x512xf32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf16_times_512x330xf16_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf16_times_514x330xf16_into_457x514xf32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf16_times_514x330xf16_into_457x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xf16_times_514x330xf16_into_438x514xf32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xf16_times_514x330xf16_into_438x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xf16_times_516x332xf16_into_540x516xf32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xf16_times_516x332xf16_into_540x516xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xf16_times_234x321xf16_into_654x234xf32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xf16_times_234x321xf16_into_654x234xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xf16_times_512x160xf16_into_457x512xf32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xf16_times_512x160xf16_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xf16_times_512x330xf16_into_512x512xf32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xf16_times_512x330xf16_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xf16_times_1000x1000xf16_into_1x1000xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xf16_times_1x1000xf16_into_1000x1xf32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xf16_times_1x1000xf16_into_1000x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xf16_times_1x1000xf16_into_1000x1xf32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xf16_times_1x1000xf16_into_1000x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_small.mlir b/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_small.mlir new file mode 100644 index 0000000..70ed2e4 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs: tensor<1x1xf16>, %rhs: tensor<1x1xf16>, %acc: tensor<1x1xf32>) -> tensor<1x1xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xf16>, tensor<1x1xf16>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs: tensor<1x1xf16>, %rhs: tensor<1x1xf16>) -> tensor<1x1xf32> { + %init_acc = tensor.empty() : tensor<1x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x1xf32>) -> tensor<1x1xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xf16>, tensor<1x1xf16>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf32(%lhs: tensor<2x2xf16>, %rhs: tensor<2x2xf16>, %acc: tensor<2x2xf32>) -> tensor<2x2xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<2x2xf16>, tensor<2x2xf16>) outs(%acc: tensor<2x2xf32>) -> tensor<2x2xf32> + return %result: tensor<2x2xf32> +} + +func.func @matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf32(%lhs: tensor<4x4xf16>, %rhs: tensor<4x4xf16>, %acc: tensor<4x4xf32>) -> tensor<4x4xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x4xf16>, tensor<4x4xf16>) outs(%acc: tensor<4x4xf32>) -> tensor<4x4xf32> + return %result: tensor<4x4xf32> +} + +func.func @matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf32(%lhs: tensor<8x8xf16>, %rhs: tensor<8x8xf16>, %acc: tensor<8x8xf32>) -> tensor<8x8xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<8x8xf16>, tensor<8x8xf16>) outs(%acc: tensor<8x8xf32>) -> tensor<8x8xf32> + return %result: tensor<8x8xf32> +} + +func.func @matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf32(%lhs: tensor<9x9xf16>, %rhs: tensor<9x9xf16>, %acc: tensor<9x9xf32>) -> tensor<9x9xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<9x9xf16>, tensor<9x9xf16>) outs(%acc: tensor<9x9xf32>) -> tensor<9x9xf32> + return %result: tensor<9x9xf32> +} + +func.func @matmul_accumulate_6x13xf16_times_3x13xf16_into_6x3xf32(%lhs: tensor<6x13xf16>, %rhs: tensor<3x13xf16>, %acc: tensor<6x3xf32>) -> tensor<6x3xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<6x13xf16>, tensor<3x13xf16>) outs(%acc: tensor<6x3xf32>) -> tensor<6x3xf32> + return %result: tensor<6x3xf32> +} + +func.func @matmul_15x37xf16_times_7x37xf16_into_15x7xf32(%lhs: tensor<15x37xf16>, %rhs: tensor<7x37xf16>) -> tensor<15x7xf32> { + %init_acc = tensor.empty() : tensor<15x7xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<15x7xf32>) -> tensor<15x7xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<15x37xf16>, tensor<7x37xf16>) outs(%acc: tensor<15x7xf32>) -> tensor<15x7xf32> + return %result: tensor<15x7xf32> +} + +func.func @matmul_accumulate_81x19xf16_times_41x19xf16_into_81x41xf32(%lhs: tensor<81x19xf16>, %rhs: tensor<41x19xf16>, %acc: tensor<81x41xf32>) -> tensor<81x41xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<81x19xf16>, tensor<41x19xf16>) outs(%acc: tensor<81x41xf32>) -> tensor<81x41xf32> + return %result: tensor<81x41xf32> +} + +func.func @matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs: tensor<1x10xf16>, %rhs: tensor<10x10xf16>, %acc: tensor<1x10xf32>) -> tensor<1x10xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xf16>, tensor<10x10xf16>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs: tensor<1x10xf16>, %rhs: tensor<10x10xf16>) -> tensor<1x10xf32> { + %init_acc = tensor.empty() : tensor<1x10xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x10xf32>) -> tensor<1x10xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xf16>, tensor<10x10xf16>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_accumulate_10x1xf16_times_10x1xf16_into_10x10xf32(%lhs: tensor<10x1xf16>, %rhs: tensor<10x1xf16>, %acc: tensor<10x10xf32>) -> tensor<10x10xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x1xf16>, tensor<10x1xf16>) outs(%acc: tensor<10x10xf32>) -> tensor<10x10xf32> + return %result: tensor<10x10xf32> +} + +func.func @matmul_accumulate_10x10xf16_times_1x10xf16_into_10x1xf32(%lhs: tensor<10x10xf16>, %rhs: tensor<1x10xf16>, %acc: tensor<10x1xf32>) -> tensor<10x1xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xf16>, tensor<1x10xf16>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + +func.func @matmul_10x10xf16_times_1x10xf16_into_10x1xf32(%lhs: tensor<10x10xf16>, %rhs: tensor<1x10xf16>) -> tensor<10x1xf32> { + %init_acc = tensor.empty() : tensor<10x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<10x1xf32>) -> tensor<10x1xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xf16>, tensor<1x10xf16>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_small_calls.mlir b/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_small_calls.mlir new file mode 100644 index 0000000..27ee342 --- /dev/null +++ b/linalg_ops/matmul/generated/f16_into_f32/matmul_transpose_b_f16_into_f32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xf16_times_3x13xf16_into_6x3xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xf16_times_7x37xf16_into_15x7xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xf16_times_41x19xf16_into_81x41xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xf16_times_10x1xf16_into_10x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xf16_times_1x10xf16_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xf16_times_1x10xf16_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xf16_times_1x1xf16_into_1x1xf32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xf16_times_1x1xf16_into_1x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xf16_times_2x2xf16_into_2x2xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xf16_times_4x4xf16_into_4x4xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xf16_times_8x8xf16_into_8x8xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xf16_times_9x9xf16_into_9x9xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xf16_times_3x13xf16_into_6x3xf32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xf16_times_3x13xf16_into_6x3xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xf16_times_7x37xf16_into_15x7xf32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xf16_times_7x37xf16_into_15x7xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xf16_times_41x19xf16_into_81x41xf32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xf16_times_41x19xf16_into_81x41xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xf16_times_10x10xf16_into_1x10xf32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xf16_times_10x10xf16_into_1x10xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xf16_times_10x1xf16_into_10x10xf32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xf16_times_10x1xf16_into_10x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xf16_times_1x10xf16_into_10x1xf32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xf16_times_1x10xf16_into_10x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xf16_times_1x10xf16_into_10x1xf32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xf16_times_1x10xf16_into_10x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_large.mlir b/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_large.mlir new file mode 100644 index 0000000..c68c1d1 --- /dev/null +++ b/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xf32_times_128x512xf32_into_512x512xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<128x512xf32>, %acc: tensor<512x512xf32>) -> tensor<512x512xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xf32>, tensor<128x512xf32>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xf32_times_128x512xf32_into_512x512xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<128x512xf32>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xf32>, tensor<128x512xf32>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_1000x4xf32_times_4x512xf32_into_1000x512xf32(%lhs: tensor<1000x4xf32>, %rhs: tensor<4x512xf32>) -> tensor<1000x512xf32> { + %init_acc = tensor.empty() : tensor<1000x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x512xf32>) -> tensor<1000x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x4xf32>, tensor<4x512xf32>) outs(%acc: tensor<1000x512xf32>) -> tensor<1000x512xf32> + return %result: tensor<1000x512xf32> +} + +func.func @matmul_4x1000xf32_times_1000x512xf32_into_4x512xf32(%lhs: tensor<4x1000xf32>, %rhs: tensor<1000x512xf32>) -> tensor<4x512xf32> { + %init_acc = tensor.empty() : tensor<4x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<4x512xf32>) -> tensor<4x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x1000xf32>, tensor<1000x512xf32>) outs(%acc: tensor<4x512xf32>) -> tensor<4x512xf32> + return %result: tensor<4x512xf32> +} + +func.func @matmul_512x1000xf32_times_1000x4xf32_into_512x4xf32(%lhs: tensor<512x1000xf32>, %rhs: tensor<1000x4xf32>) -> tensor<512x4xf32> { + %init_acc = tensor.empty() : tensor<512x4xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x4xf32>) -> tensor<512x4xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x1000xf32>, tensor<1000x4xf32>) outs(%acc: tensor<512x4xf32>) -> tensor<512x4xf32> + return %result: tensor<512x4xf32> +} + +func.func @matmul_512x128xf32_times_128x500xf32_into_512x500xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<128x500xf32>) -> tensor<512x500xf32> { + %init_acc = tensor.empty() : tensor<512x500xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x500xf32>) -> tensor<512x500xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xf32>, tensor<128x500xf32>) outs(%acc: tensor<512x500xf32>) -> tensor<512x500xf32> + return %result: tensor<512x500xf32> +} + +func.func @matmul_457x330xf32_times_330x512xf32_into_457x512xf32(%lhs: tensor<457x330xf32>, %rhs: tensor<330x512xf32>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xf32>, tensor<330x512xf32>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_457x330xf32_times_330x514xf32_into_457x514xf32(%lhs: tensor<457x330xf32>, %rhs: tensor<330x514xf32>) -> tensor<457x514xf32> { + %init_acc = tensor.empty() : tensor<457x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x514xf32>) -> tensor<457x514xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xf32>, tensor<330x514xf32>) outs(%acc: tensor<457x514xf32>) -> tensor<457x514xf32> + return %result: tensor<457x514xf32> +} + +func.func @matmul_438x330xf32_times_330x514xf32_into_438x514xf32(%lhs: tensor<438x330xf32>, %rhs: tensor<330x514xf32>) -> tensor<438x514xf32> { + %init_acc = tensor.empty() : tensor<438x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<438x514xf32>) -> tensor<438x514xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<438x330xf32>, tensor<330x514xf32>) outs(%acc: tensor<438x514xf32>) -> tensor<438x514xf32> + return %result: tensor<438x514xf32> +} + +func.func @matmul_540x332xf32_times_332x516xf32_into_540x516xf32(%lhs: tensor<540x332xf32>, %rhs: tensor<332x516xf32>) -> tensor<540x516xf32> { + %init_acc = tensor.empty() : tensor<540x516xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<540x516xf32>) -> tensor<540x516xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<540x332xf32>, tensor<332x516xf32>) outs(%acc: tensor<540x516xf32>) -> tensor<540x516xf32> + return %result: tensor<540x516xf32> +} + +func.func @matmul_654x321xf32_times_321x234xf32_into_654x234xf32(%lhs: tensor<654x321xf32>, %rhs: tensor<321x234xf32>) -> tensor<654x234xf32> { + %init_acc = tensor.empty() : tensor<654x234xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<654x234xf32>) -> tensor<654x234xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<654x321xf32>, tensor<321x234xf32>) outs(%acc: tensor<654x234xf32>) -> tensor<654x234xf32> + return %result: tensor<654x234xf32> +} + +func.func @matmul_457x160xf32_times_160x512xf32_into_457x512xf32(%lhs: tensor<457x160xf32>, %rhs: tensor<160x512xf32>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x160xf32>, tensor<160x512xf32>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_512x330xf32_times_330x512xf32_into_512x512xf32(%lhs: tensor<512x330xf32>, %rhs: tensor<330x512xf32>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x330xf32>, tensor<330x512xf32>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_accumulate_1x1000xf32_times_1000x1000xf32_into_1x1000xf32(%lhs: tensor<1x1000xf32>, %rhs: tensor<1000x1000xf32>, %acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1000xf32>, tensor<1000x1000xf32>) outs(%acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> + return %result: tensor<1x1000xf32> +} + +func.func @matmul_accumulate_1000x1000xf32_times_1000x1xf32_into_1000x1xf32(%lhs: tensor<1000x1000xf32>, %rhs: tensor<1000x1xf32>, %acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xf32>, tensor<1000x1xf32>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + +func.func @matmul_1000x1000xf32_times_1000x1xf32_into_1000x1xf32(%lhs: tensor<1000x1000xf32>, %rhs: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %init_acc = tensor.empty() : tensor<1000x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x1xf32>) -> tensor<1000x1xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xf32>, tensor<1000x1xf32>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_large_calls.mlir b/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_large_calls.mlir new file mode 100644 index 0000000..28136c6 --- /dev/null +++ b/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xf32_times_128x512xf32_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf32_times_128x512xf32_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xf32_times_4x512xf32_into_1000x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xf32_times_1000x512xf32_into_4x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xf32_times_1000x4xf32_into_512x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf32_times_128x500xf32_into_512x500xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf32_times_330x512xf32_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf32_times_330x514xf32_into_457x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xf32_times_330x514xf32_into_438x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xf32_times_332x516xf32_into_540x516xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xf32_times_321x234xf32_into_654x234xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xf32_times_160x512xf32_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xf32_times_330x512xf32_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xf32_times_1000x1000xf32_into_1x1000xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xf32_times_1000x1xf32_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xf32_times_1000x1xf32_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xf32_times_128x512xf32_into_512x512xf32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xf32_times_128x512xf32_into_512x512xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf32_times_128x512xf32_into_512x512xf32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf32_times_128x512xf32_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xf32_times_4x512xf32_into_1000x512xf32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xf32_times_4x512xf32_into_1000x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xf32_times_1000x512xf32_into_4x512xf32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xf32_times_1000x512xf32_into_4x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xf32_times_1000x4xf32_into_512x4xf32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xf32_times_1000x4xf32_into_512x4xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf32_times_128x500xf32_into_512x500xf32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf32_times_128x500xf32_into_512x500xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf32_times_330x512xf32_into_457x512xf32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf32_times_330x512xf32_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf32_times_330x514xf32_into_457x514xf32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf32_times_330x514xf32_into_457x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xf32_times_330x514xf32_into_438x514xf32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xf32_times_330x514xf32_into_438x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xf32_times_332x516xf32_into_540x516xf32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xf32_times_332x516xf32_into_540x516xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xf32_times_321x234xf32_into_654x234xf32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xf32_times_321x234xf32_into_654x234xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xf32_times_160x512xf32_into_457x512xf32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xf32_times_160x512xf32_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xf32_times_330x512xf32_into_512x512xf32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xf32_times_330x512xf32_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xf32_times_1000x1000xf32_into_1x1000xf32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xf32_times_1000x1000xf32_into_1x1000xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xf32_times_1000x1xf32_into_1000x1xf32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xf32_times_1000x1xf32_into_1000x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xf32_times_1000x1xf32_into_1000x1xf32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xf32_times_1000x1xf32_into_1000x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_small.mlir b/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_small.mlir new file mode 100644 index 0000000..ff5e347 --- /dev/null +++ b/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs: tensor<1x1xf32>, %rhs: tensor<1x1xf32>, %acc: tensor<1x1xf32>) -> tensor<1x1xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xf32>, tensor<1x1xf32>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs: tensor<1x1xf32>, %rhs: tensor<1x1xf32>) -> tensor<1x1xf32> { + %init_acc = tensor.empty() : tensor<1x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x1xf32>) -> tensor<1x1xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xf32>, tensor<1x1xf32>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_accumulate_2x2xf32_times_2x2xf32_into_2x2xf32(%lhs: tensor<2x2xf32>, %rhs: tensor<2x2xf32>, %acc: tensor<2x2xf32>) -> tensor<2x2xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<2x2xf32>, tensor<2x2xf32>) outs(%acc: tensor<2x2xf32>) -> tensor<2x2xf32> + return %result: tensor<2x2xf32> +} + +func.func @matmul_accumulate_4x4xf32_times_4x4xf32_into_4x4xf32(%lhs: tensor<4x4xf32>, %rhs: tensor<4x4xf32>, %acc: tensor<4x4xf32>) -> tensor<4x4xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x4xf32>, tensor<4x4xf32>) outs(%acc: tensor<4x4xf32>) -> tensor<4x4xf32> + return %result: tensor<4x4xf32> +} + +func.func @matmul_accumulate_8x8xf32_times_8x8xf32_into_8x8xf32(%lhs: tensor<8x8xf32>, %rhs: tensor<8x8xf32>, %acc: tensor<8x8xf32>) -> tensor<8x8xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<8x8xf32>, tensor<8x8xf32>) outs(%acc: tensor<8x8xf32>) -> tensor<8x8xf32> + return %result: tensor<8x8xf32> +} + +func.func @matmul_accumulate_9x9xf32_times_9x9xf32_into_9x9xf32(%lhs: tensor<9x9xf32>, %rhs: tensor<9x9xf32>, %acc: tensor<9x9xf32>) -> tensor<9x9xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<9x9xf32>, tensor<9x9xf32>) outs(%acc: tensor<9x9xf32>) -> tensor<9x9xf32> + return %result: tensor<9x9xf32> +} + +func.func @matmul_accumulate_6x13xf32_times_13x3xf32_into_6x3xf32(%lhs: tensor<6x13xf32>, %rhs: tensor<13x3xf32>, %acc: tensor<6x3xf32>) -> tensor<6x3xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<6x13xf32>, tensor<13x3xf32>) outs(%acc: tensor<6x3xf32>) -> tensor<6x3xf32> + return %result: tensor<6x3xf32> +} + +func.func @matmul_15x37xf32_times_37x7xf32_into_15x7xf32(%lhs: tensor<15x37xf32>, %rhs: tensor<37x7xf32>) -> tensor<15x7xf32> { + %init_acc = tensor.empty() : tensor<15x7xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<15x7xf32>) -> tensor<15x7xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<15x37xf32>, tensor<37x7xf32>) outs(%acc: tensor<15x7xf32>) -> tensor<15x7xf32> + return %result: tensor<15x7xf32> +} + +func.func @matmul_accumulate_81x19xf32_times_19x41xf32_into_81x41xf32(%lhs: tensor<81x19xf32>, %rhs: tensor<19x41xf32>, %acc: tensor<81x41xf32>) -> tensor<81x41xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<81x19xf32>, tensor<19x41xf32>) outs(%acc: tensor<81x41xf32>) -> tensor<81x41xf32> + return %result: tensor<81x41xf32> +} + +func.func @matmul_accumulate_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs: tensor<1x10xf32>, %rhs: tensor<10x10xf32>, %acc: tensor<1x10xf32>) -> tensor<1x10xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xf32>, tensor<10x10xf32>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs: tensor<1x10xf32>, %rhs: tensor<10x10xf32>) -> tensor<1x10xf32> { + %init_acc = tensor.empty() : tensor<1x10xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x10xf32>) -> tensor<1x10xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xf32>, tensor<10x10xf32>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_accumulate_10x1xf32_times_1x10xf32_into_10x10xf32(%lhs: tensor<10x1xf32>, %rhs: tensor<1x10xf32>, %acc: tensor<10x10xf32>) -> tensor<10x10xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x1xf32>, tensor<1x10xf32>) outs(%acc: tensor<10x10xf32>) -> tensor<10x10xf32> + return %result: tensor<10x10xf32> +} + +func.func @matmul_accumulate_10x10xf32_times_10x1xf32_into_10x1xf32(%lhs: tensor<10x10xf32>, %rhs: tensor<10x1xf32>, %acc: tensor<10x1xf32>) -> tensor<10x1xf32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xf32>, tensor<10x1xf32>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + +func.func @matmul_10x10xf32_times_10x1xf32_into_10x1xf32(%lhs: tensor<10x10xf32>, %rhs: tensor<10x1xf32>) -> tensor<10x1xf32> { + %init_acc = tensor.empty() : tensor<10x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<10x1xf32>) -> tensor<10x1xf32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xf32>, tensor<10x1xf32>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_small_calls.mlir b/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_small_calls.mlir new file mode 100644 index 0000000..e764f76 --- /dev/null +++ b/linalg_ops/matmul/generated/f32_into_f32/matmul_f32_into_f32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xf32_times_2x2xf32_into_2x2xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xf32_times_4x4xf32_into_4x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xf32_times_8x8xf32_into_8x8xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xf32_times_9x9xf32_into_9x9xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xf32_times_13x3xf32_into_6x3xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xf32_times_37x7xf32_into_15x7xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xf32_times_19x41xf32_into_81x41xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xf32_times_1x10xf32_into_10x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xf32_times_10x1xf32_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xf32_times_10x1xf32_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xf32_times_1x1xf32_into_1x1xf32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xf32_times_1x1xf32_into_1x1xf32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xf32_times_2x2xf32_into_2x2xf32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xf32_times_2x2xf32_into_2x2xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xf32_times_4x4xf32_into_4x4xf32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xf32_times_4x4xf32_into_4x4xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xf32_times_8x8xf32_into_8x8xf32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xf32_times_8x8xf32_into_8x8xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xf32_times_9x9xf32_into_9x9xf32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xf32_times_9x9xf32_into_9x9xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xf32_times_13x3xf32_into_6x3xf32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xf32_times_13x3xf32_into_6x3xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xf32_times_37x7xf32_into_15x7xf32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xf32_times_37x7xf32_into_15x7xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xf32_times_19x41xf32_into_81x41xf32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xf32_times_19x41xf32_into_81x41xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xf32_times_10x10xf32_into_1x10xf32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xf32_times_10x10xf32_into_1x10xf32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xf32_times_1x10xf32_into_10x10xf32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xf32_times_1x10xf32_into_10x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xf32_times_10x1xf32_into_10x1xf32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xf32_times_10x1xf32_into_10x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xf32_times_10x1xf32_into_10x1xf32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xf32_times_10x1xf32_into_10x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_large.mlir b/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_large.mlir new file mode 100644 index 0000000..63c71a7 --- /dev/null +++ b/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xf32_times_512x128xf32_into_512x512xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<512x128xf32>, %acc: tensor<512x512xf32>) -> tensor<512x512xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xf32>, tensor<512x128xf32>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xf32_times_512x128xf32_into_512x512xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<512x128xf32>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xf32>, tensor<512x128xf32>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_1000x4xf32_times_512x4xf32_into_1000x512xf32(%lhs: tensor<1000x4xf32>, %rhs: tensor<512x4xf32>) -> tensor<1000x512xf32> { + %init_acc = tensor.empty() : tensor<1000x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x512xf32>) -> tensor<1000x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x4xf32>, tensor<512x4xf32>) outs(%acc: tensor<1000x512xf32>) -> tensor<1000x512xf32> + return %result: tensor<1000x512xf32> +} + +func.func @matmul_4x1000xf32_times_512x1000xf32_into_4x512xf32(%lhs: tensor<4x1000xf32>, %rhs: tensor<512x1000xf32>) -> tensor<4x512xf32> { + %init_acc = tensor.empty() : tensor<4x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<4x512xf32>) -> tensor<4x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x1000xf32>, tensor<512x1000xf32>) outs(%acc: tensor<4x512xf32>) -> tensor<4x512xf32> + return %result: tensor<4x512xf32> +} + +func.func @matmul_512x1000xf32_times_4x1000xf32_into_512x4xf32(%lhs: tensor<512x1000xf32>, %rhs: tensor<4x1000xf32>) -> tensor<512x4xf32> { + %init_acc = tensor.empty() : tensor<512x4xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x4xf32>) -> tensor<512x4xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x1000xf32>, tensor<4x1000xf32>) outs(%acc: tensor<512x4xf32>) -> tensor<512x4xf32> + return %result: tensor<512x4xf32> +} + +func.func @matmul_512x128xf32_times_500x128xf32_into_512x500xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<500x128xf32>) -> tensor<512x500xf32> { + %init_acc = tensor.empty() : tensor<512x500xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x500xf32>) -> tensor<512x500xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xf32>, tensor<500x128xf32>) outs(%acc: tensor<512x500xf32>) -> tensor<512x500xf32> + return %result: tensor<512x500xf32> +} + +func.func @matmul_457x330xf32_times_512x330xf32_into_457x512xf32(%lhs: tensor<457x330xf32>, %rhs: tensor<512x330xf32>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xf32>, tensor<512x330xf32>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_457x330xf32_times_514x330xf32_into_457x514xf32(%lhs: tensor<457x330xf32>, %rhs: tensor<514x330xf32>) -> tensor<457x514xf32> { + %init_acc = tensor.empty() : tensor<457x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x514xf32>) -> tensor<457x514xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xf32>, tensor<514x330xf32>) outs(%acc: tensor<457x514xf32>) -> tensor<457x514xf32> + return %result: tensor<457x514xf32> +} + +func.func @matmul_438x330xf32_times_514x330xf32_into_438x514xf32(%lhs: tensor<438x330xf32>, %rhs: tensor<514x330xf32>) -> tensor<438x514xf32> { + %init_acc = tensor.empty() : tensor<438x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<438x514xf32>) -> tensor<438x514xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<438x330xf32>, tensor<514x330xf32>) outs(%acc: tensor<438x514xf32>) -> tensor<438x514xf32> + return %result: tensor<438x514xf32> +} + +func.func @matmul_540x332xf32_times_516x332xf32_into_540x516xf32(%lhs: tensor<540x332xf32>, %rhs: tensor<516x332xf32>) -> tensor<540x516xf32> { + %init_acc = tensor.empty() : tensor<540x516xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<540x516xf32>) -> tensor<540x516xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<540x332xf32>, tensor<516x332xf32>) outs(%acc: tensor<540x516xf32>) -> tensor<540x516xf32> + return %result: tensor<540x516xf32> +} + +func.func @matmul_654x321xf32_times_234x321xf32_into_654x234xf32(%lhs: tensor<654x321xf32>, %rhs: tensor<234x321xf32>) -> tensor<654x234xf32> { + %init_acc = tensor.empty() : tensor<654x234xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<654x234xf32>) -> tensor<654x234xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<654x321xf32>, tensor<234x321xf32>) outs(%acc: tensor<654x234xf32>) -> tensor<654x234xf32> + return %result: tensor<654x234xf32> +} + +func.func @matmul_457x160xf32_times_512x160xf32_into_457x512xf32(%lhs: tensor<457x160xf32>, %rhs: tensor<512x160xf32>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x160xf32>, tensor<512x160xf32>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_512x330xf32_times_512x330xf32_into_512x512xf32(%lhs: tensor<512x330xf32>, %rhs: tensor<512x330xf32>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x330xf32>, tensor<512x330xf32>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_accumulate_1x1000xf32_times_1000x1000xf32_into_1x1000xf32(%lhs: tensor<1x1000xf32>, %rhs: tensor<1000x1000xf32>, %acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1000xf32>, tensor<1000x1000xf32>) outs(%acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> + return %result: tensor<1x1000xf32> +} + +func.func @matmul_accumulate_1000x1000xf32_times_1x1000xf32_into_1000x1xf32(%lhs: tensor<1000x1000xf32>, %rhs: tensor<1x1000xf32>, %acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xf32>, tensor<1x1000xf32>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + +func.func @matmul_1000x1000xf32_times_1x1000xf32_into_1000x1xf32(%lhs: tensor<1000x1000xf32>, %rhs: tensor<1x1000xf32>) -> tensor<1000x1xf32> { + %init_acc = tensor.empty() : tensor<1000x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x1xf32>) -> tensor<1000x1xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xf32>, tensor<1x1000xf32>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_large_calls.mlir b/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_large_calls.mlir new file mode 100644 index 0000000..1ecf2c6 --- /dev/null +++ b/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xf32_times_512x128xf32_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf32_times_512x128xf32_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xf32_times_512x4xf32_into_1000x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xf32_times_512x1000xf32_into_4x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xf32_times_4x1000xf32_into_512x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf32_times_500x128xf32_into_512x500xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf32_times_512x330xf32_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf32_times_514x330xf32_into_457x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xf32_times_514x330xf32_into_438x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xf32_times_516x332xf32_into_540x516xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xf32_times_234x321xf32_into_654x234xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xf32_times_512x160xf32_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xf32_times_512x330xf32_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xf32_times_1000x1000xf32_into_1x1000xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xf32_times_1x1000xf32_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xf32_times_1x1000xf32_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xf32_times_512x128xf32_into_512x512xf32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xf32_times_512x128xf32_into_512x512xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf32_times_512x128xf32_into_512x512xf32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf32_times_512x128xf32_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xf32_times_512x4xf32_into_1000x512xf32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xf32_times_512x4xf32_into_1000x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xf32_times_512x1000xf32_into_4x512xf32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xf32_times_512x1000xf32_into_4x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xf32_times_4x1000xf32_into_512x4xf32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xf32_times_4x1000xf32_into_512x4xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf32_times_500x128xf32_into_512x500xf32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf32_times_500x128xf32_into_512x500xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf32_times_512x330xf32_into_457x512xf32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf32_times_512x330xf32_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf32_times_514x330xf32_into_457x514xf32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf32_times_514x330xf32_into_457x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xf32_times_514x330xf32_into_438x514xf32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xf32_times_514x330xf32_into_438x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xf32_times_516x332xf32_into_540x516xf32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xf32_times_516x332xf32_into_540x516xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xf32_times_234x321xf32_into_654x234xf32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xf32_times_234x321xf32_into_654x234xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xf32_times_512x160xf32_into_457x512xf32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xf32_times_512x160xf32_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xf32_times_512x330xf32_into_512x512xf32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xf32_times_512x330xf32_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xf32_times_1000x1000xf32_into_1x1000xf32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xf32_times_1000x1000xf32_into_1x1000xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xf32_times_1x1000xf32_into_1000x1xf32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xf32_times_1x1000xf32_into_1000x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xf32_times_1x1000xf32_into_1000x1xf32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xf32_times_1x1000xf32_into_1000x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_small.mlir b/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_small.mlir new file mode 100644 index 0000000..4a31253 --- /dev/null +++ b/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs: tensor<1x1xf32>, %rhs: tensor<1x1xf32>, %acc: tensor<1x1xf32>) -> tensor<1x1xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xf32>, tensor<1x1xf32>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs: tensor<1x1xf32>, %rhs: tensor<1x1xf32>) -> tensor<1x1xf32> { + %init_acc = tensor.empty() : tensor<1x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x1xf32>) -> tensor<1x1xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xf32>, tensor<1x1xf32>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_accumulate_2x2xf32_times_2x2xf32_into_2x2xf32(%lhs: tensor<2x2xf32>, %rhs: tensor<2x2xf32>, %acc: tensor<2x2xf32>) -> tensor<2x2xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<2x2xf32>, tensor<2x2xf32>) outs(%acc: tensor<2x2xf32>) -> tensor<2x2xf32> + return %result: tensor<2x2xf32> +} + +func.func @matmul_accumulate_4x4xf32_times_4x4xf32_into_4x4xf32(%lhs: tensor<4x4xf32>, %rhs: tensor<4x4xf32>, %acc: tensor<4x4xf32>) -> tensor<4x4xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x4xf32>, tensor<4x4xf32>) outs(%acc: tensor<4x4xf32>) -> tensor<4x4xf32> + return %result: tensor<4x4xf32> +} + +func.func @matmul_accumulate_8x8xf32_times_8x8xf32_into_8x8xf32(%lhs: tensor<8x8xf32>, %rhs: tensor<8x8xf32>, %acc: tensor<8x8xf32>) -> tensor<8x8xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<8x8xf32>, tensor<8x8xf32>) outs(%acc: tensor<8x8xf32>) -> tensor<8x8xf32> + return %result: tensor<8x8xf32> +} + +func.func @matmul_accumulate_9x9xf32_times_9x9xf32_into_9x9xf32(%lhs: tensor<9x9xf32>, %rhs: tensor<9x9xf32>, %acc: tensor<9x9xf32>) -> tensor<9x9xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<9x9xf32>, tensor<9x9xf32>) outs(%acc: tensor<9x9xf32>) -> tensor<9x9xf32> + return %result: tensor<9x9xf32> +} + +func.func @matmul_accumulate_6x13xf32_times_3x13xf32_into_6x3xf32(%lhs: tensor<6x13xf32>, %rhs: tensor<3x13xf32>, %acc: tensor<6x3xf32>) -> tensor<6x3xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<6x13xf32>, tensor<3x13xf32>) outs(%acc: tensor<6x3xf32>) -> tensor<6x3xf32> + return %result: tensor<6x3xf32> +} + +func.func @matmul_15x37xf32_times_7x37xf32_into_15x7xf32(%lhs: tensor<15x37xf32>, %rhs: tensor<7x37xf32>) -> tensor<15x7xf32> { + %init_acc = tensor.empty() : tensor<15x7xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<15x7xf32>) -> tensor<15x7xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<15x37xf32>, tensor<7x37xf32>) outs(%acc: tensor<15x7xf32>) -> tensor<15x7xf32> + return %result: tensor<15x7xf32> +} + +func.func @matmul_accumulate_81x19xf32_times_41x19xf32_into_81x41xf32(%lhs: tensor<81x19xf32>, %rhs: tensor<41x19xf32>, %acc: tensor<81x41xf32>) -> tensor<81x41xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<81x19xf32>, tensor<41x19xf32>) outs(%acc: tensor<81x41xf32>) -> tensor<81x41xf32> + return %result: tensor<81x41xf32> +} + +func.func @matmul_accumulate_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs: tensor<1x10xf32>, %rhs: tensor<10x10xf32>, %acc: tensor<1x10xf32>) -> tensor<1x10xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xf32>, tensor<10x10xf32>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs: tensor<1x10xf32>, %rhs: tensor<10x10xf32>) -> tensor<1x10xf32> { + %init_acc = tensor.empty() : tensor<1x10xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x10xf32>) -> tensor<1x10xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xf32>, tensor<10x10xf32>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_accumulate_10x1xf32_times_10x1xf32_into_10x10xf32(%lhs: tensor<10x1xf32>, %rhs: tensor<10x1xf32>, %acc: tensor<10x10xf32>) -> tensor<10x10xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x1xf32>, tensor<10x1xf32>) outs(%acc: tensor<10x10xf32>) -> tensor<10x10xf32> + return %result: tensor<10x10xf32> +} + +func.func @matmul_accumulate_10x10xf32_times_1x10xf32_into_10x1xf32(%lhs: tensor<10x10xf32>, %rhs: tensor<1x10xf32>, %acc: tensor<10x1xf32>) -> tensor<10x1xf32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xf32>, tensor<1x10xf32>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + +func.func @matmul_10x10xf32_times_1x10xf32_into_10x1xf32(%lhs: tensor<10x10xf32>, %rhs: tensor<1x10xf32>) -> tensor<10x1xf32> { + %init_acc = tensor.empty() : tensor<10x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<10x1xf32>) -> tensor<10x1xf32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xf32>, tensor<1x10xf32>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_small_calls.mlir b/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_small_calls.mlir new file mode 100644 index 0000000..d10f372 --- /dev/null +++ b/linalg_ops/matmul/generated/f32_into_f32/matmul_transpose_b_f32_into_f32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xf32_times_2x2xf32_into_2x2xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xf32_times_4x4xf32_into_4x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xf32_times_8x8xf32_into_8x8xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xf32_times_9x9xf32_into_9x9xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xf32_times_3x13xf32_into_6x3xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xf32_times_7x37xf32_into_15x7xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xf32_times_41x19xf32_into_81x41xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xf32_times_10x1xf32_into_10x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xf32_times_1x10xf32_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xf32_times_1x10xf32_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xf32_times_1x1xf32_into_1x1xf32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xf32_times_1x1xf32_into_1x1xf32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xf32_times_1x1xf32_into_1x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xf32_times_2x2xf32_into_2x2xf32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xf32_times_2x2xf32_into_2x2xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xf32_times_4x4xf32_into_4x4xf32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xf32_times_4x4xf32_into_4x4xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xf32_times_8x8xf32_into_8x8xf32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xf32_times_8x8xf32_into_8x8xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xf32_times_9x9xf32_into_9x9xf32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xf32_times_9x9xf32_into_9x9xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xf32_times_3x13xf32_into_6x3xf32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xf32_times_3x13xf32_into_6x3xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xf32_times_7x37xf32_into_15x7xf32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xf32_times_7x37xf32_into_15x7xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xf32_times_41x19xf32_into_81x41xf32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xf32_times_41x19xf32_into_81x41xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xf32_times_10x10xf32_into_1x10xf32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xf32_times_10x10xf32_into_1x10xf32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xf32_times_10x10xf32_into_1x10xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xf32_times_10x1xf32_into_10x10xf32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xf32_times_10x1xf32_into_10x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xf32_times_1x10xf32_into_10x1xf32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xf32_times_1x10xf32_into_10x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf32_times_DYNxDYNxf32_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xf32_times_1x10xf32_into_10x1xf32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xf32_times_1x10xf32_into_10x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_large.mlir b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_large.mlir new file mode 100644 index 0000000..5e8b633 --- /dev/null +++ b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_large.mlir @@ -0,0 +1,172 @@ +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %lhs_casted = arith.truncf %lhs: tensor to tensor + %rhs_casted = arith.truncf %rhs: tensor to tensor + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xf8E4M3FNUZ_times_128x512xf8E4M3FNUZ_into_512x512xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<128x512xf32>, %acc: tensor<512x512xf32>) -> tensor<512x512xf32> { + %lhs_casted = arith.truncf %lhs: tensor<512x128xf32> to tensor<512x128xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<128x512xf32> to tensor<128x512xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<512x128xf8E4M3FNUZ>, tensor<128x512xf8E4M3FNUZ>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %lhs_casted = arith.truncf %lhs: tensor to tensor + %rhs_casted = arith.truncf %rhs: tensor to tensor + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xf8E4M3FNUZ_times_128x512xf8E4M3FNUZ_into_512x512xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<128x512xf32>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<512x128xf32> to tensor<512x128xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<128x512xf32> to tensor<128x512xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<512x128xf8E4M3FNUZ>, tensor<128x512xf8E4M3FNUZ>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_1000x4xf8E4M3FNUZ_times_4x512xf8E4M3FNUZ_into_1000x512xf32(%lhs: tensor<1000x4xf32>, %rhs: tensor<4x512xf32>) -> tensor<1000x512xf32> { + %init_acc = tensor.empty() : tensor<1000x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x512xf32>) -> tensor<1000x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<1000x4xf32> to tensor<1000x4xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<4x512xf32> to tensor<4x512xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<1000x4xf8E4M3FNUZ>, tensor<4x512xf8E4M3FNUZ>) outs(%acc: tensor<1000x512xf32>) -> tensor<1000x512xf32> + return %result: tensor<1000x512xf32> +} + +func.func @matmul_4x1000xf8E4M3FNUZ_times_1000x512xf8E4M3FNUZ_into_4x512xf32(%lhs: tensor<4x1000xf32>, %rhs: tensor<1000x512xf32>) -> tensor<4x512xf32> { + %init_acc = tensor.empty() : tensor<4x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<4x512xf32>) -> tensor<4x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<4x1000xf32> to tensor<4x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1000x512xf32> to tensor<1000x512xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<4x1000xf8E4M3FNUZ>, tensor<1000x512xf8E4M3FNUZ>) outs(%acc: tensor<4x512xf32>) -> tensor<4x512xf32> + return %result: tensor<4x512xf32> +} + +func.func @matmul_512x1000xf8E4M3FNUZ_times_1000x4xf8E4M3FNUZ_into_512x4xf32(%lhs: tensor<512x1000xf32>, %rhs: tensor<1000x4xf32>) -> tensor<512x4xf32> { + %init_acc = tensor.empty() : tensor<512x4xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x4xf32>) -> tensor<512x4xf32> + %lhs_casted = arith.truncf %lhs: tensor<512x1000xf32> to tensor<512x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1000x4xf32> to tensor<1000x4xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<512x1000xf8E4M3FNUZ>, tensor<1000x4xf8E4M3FNUZ>) outs(%acc: tensor<512x4xf32>) -> tensor<512x4xf32> + return %result: tensor<512x4xf32> +} + +func.func @matmul_512x128xf8E4M3FNUZ_times_128x500xf8E4M3FNUZ_into_512x500xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<128x500xf32>) -> tensor<512x500xf32> { + %init_acc = tensor.empty() : tensor<512x500xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x500xf32>) -> tensor<512x500xf32> + %lhs_casted = arith.truncf %lhs: tensor<512x128xf32> to tensor<512x128xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<128x500xf32> to tensor<128x500xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<512x128xf8E4M3FNUZ>, tensor<128x500xf8E4M3FNUZ>) outs(%acc: tensor<512x500xf32>) -> tensor<512x500xf32> + return %result: tensor<512x500xf32> +} + +func.func @matmul_457x330xf8E4M3FNUZ_times_330x512xf8E4M3FNUZ_into_457x512xf32(%lhs: tensor<457x330xf32>, %rhs: tensor<330x512xf32>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<457x330xf32> to tensor<457x330xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<330x512xf32> to tensor<330x512xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<457x330xf8E4M3FNUZ>, tensor<330x512xf8E4M3FNUZ>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_457x330xf8E4M3FNUZ_times_330x514xf8E4M3FNUZ_into_457x514xf32(%lhs: tensor<457x330xf32>, %rhs: tensor<330x514xf32>) -> tensor<457x514xf32> { + %init_acc = tensor.empty() : tensor<457x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x514xf32>) -> tensor<457x514xf32> + %lhs_casted = arith.truncf %lhs: tensor<457x330xf32> to tensor<457x330xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<330x514xf32> to tensor<330x514xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<457x330xf8E4M3FNUZ>, tensor<330x514xf8E4M3FNUZ>) outs(%acc: tensor<457x514xf32>) -> tensor<457x514xf32> + return %result: tensor<457x514xf32> +} + +func.func @matmul_438x330xf8E4M3FNUZ_times_330x514xf8E4M3FNUZ_into_438x514xf32(%lhs: tensor<438x330xf32>, %rhs: tensor<330x514xf32>) -> tensor<438x514xf32> { + %init_acc = tensor.empty() : tensor<438x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<438x514xf32>) -> tensor<438x514xf32> + %lhs_casted = arith.truncf %lhs: tensor<438x330xf32> to tensor<438x330xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<330x514xf32> to tensor<330x514xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<438x330xf8E4M3FNUZ>, tensor<330x514xf8E4M3FNUZ>) outs(%acc: tensor<438x514xf32>) -> tensor<438x514xf32> + return %result: tensor<438x514xf32> +} + +func.func @matmul_540x332xf8E4M3FNUZ_times_332x516xf8E4M3FNUZ_into_540x516xf32(%lhs: tensor<540x332xf32>, %rhs: tensor<332x516xf32>) -> tensor<540x516xf32> { + %init_acc = tensor.empty() : tensor<540x516xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<540x516xf32>) -> tensor<540x516xf32> + %lhs_casted = arith.truncf %lhs: tensor<540x332xf32> to tensor<540x332xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<332x516xf32> to tensor<332x516xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<540x332xf8E4M3FNUZ>, tensor<332x516xf8E4M3FNUZ>) outs(%acc: tensor<540x516xf32>) -> tensor<540x516xf32> + return %result: tensor<540x516xf32> +} + +func.func @matmul_654x321xf8E4M3FNUZ_times_321x234xf8E4M3FNUZ_into_654x234xf32(%lhs: tensor<654x321xf32>, %rhs: tensor<321x234xf32>) -> tensor<654x234xf32> { + %init_acc = tensor.empty() : tensor<654x234xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<654x234xf32>) -> tensor<654x234xf32> + %lhs_casted = arith.truncf %lhs: tensor<654x321xf32> to tensor<654x321xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<321x234xf32> to tensor<321x234xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<654x321xf8E4M3FNUZ>, tensor<321x234xf8E4M3FNUZ>) outs(%acc: tensor<654x234xf32>) -> tensor<654x234xf32> + return %result: tensor<654x234xf32> +} + +func.func @matmul_457x160xf8E4M3FNUZ_times_160x512xf8E4M3FNUZ_into_457x512xf32(%lhs: tensor<457x160xf32>, %rhs: tensor<160x512xf32>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<457x160xf32> to tensor<457x160xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<160x512xf32> to tensor<160x512xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<457x160xf8E4M3FNUZ>, tensor<160x512xf8E4M3FNUZ>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_512x330xf8E4M3FNUZ_times_330x512xf8E4M3FNUZ_into_512x512xf32(%lhs: tensor<512x330xf32>, %rhs: tensor<330x512xf32>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<512x330xf32> to tensor<512x330xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<330x512xf32> to tensor<330x512xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<512x330xf8E4M3FNUZ>, tensor<330x512xf8E4M3FNUZ>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_accumulate_1x1000xf8E4M3FNUZ_times_1000x1000xf8E4M3FNUZ_into_1x1000xf32(%lhs: tensor<1x1000xf32>, %rhs: tensor<1000x1000xf32>, %acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> { + %lhs_casted = arith.truncf %lhs: tensor<1x1000xf32> to tensor<1x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1000x1000xf32> to tensor<1000x1000xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<1x1000xf8E4M3FNUZ>, tensor<1000x1000xf8E4M3FNUZ>) outs(%acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> + return %result: tensor<1x1000xf32> +} + +func.func @matmul_accumulate_1000x1000xf8E4M3FNUZ_times_1000x1xf8E4M3FNUZ_into_1000x1xf32(%lhs: tensor<1000x1000xf32>, %rhs: tensor<1000x1xf32>, %acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %lhs_casted = arith.truncf %lhs: tensor<1000x1000xf32> to tensor<1000x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1000x1xf32> to tensor<1000x1xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<1000x1000xf8E4M3FNUZ>, tensor<1000x1xf8E4M3FNUZ>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + +func.func @matmul_1000x1000xf8E4M3FNUZ_times_1000x1xf8E4M3FNUZ_into_1000x1xf32(%lhs: tensor<1000x1000xf32>, %rhs: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %init_acc = tensor.empty() : tensor<1000x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x1xf32>) -> tensor<1000x1xf32> + %lhs_casted = arith.truncf %lhs: tensor<1000x1000xf32> to tensor<1000x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1000x1xf32> to tensor<1000x1xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<1000x1000xf8E4M3FNUZ>, tensor<1000x1xf8E4M3FNUZ>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_large_calls.mlir b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_large_calls.mlir new file mode 100644 index 0000000..5317e96 --- /dev/null +++ b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xf8E4M3FNUZ_times_128x512xf8E4M3FNUZ_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf8E4M3FNUZ_times_128x512xf8E4M3FNUZ_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xf8E4M3FNUZ_times_4x512xf8E4M3FNUZ_into_1000x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xf8E4M3FNUZ_times_1000x512xf8E4M3FNUZ_into_4x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xf8E4M3FNUZ_times_1000x4xf8E4M3FNUZ_into_512x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf8E4M3FNUZ_times_128x500xf8E4M3FNUZ_into_512x500xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf8E4M3FNUZ_times_330x512xf8E4M3FNUZ_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf8E4M3FNUZ_times_330x514xf8E4M3FNUZ_into_457x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xf8E4M3FNUZ_times_330x514xf8E4M3FNUZ_into_438x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xf8E4M3FNUZ_times_332x516xf8E4M3FNUZ_into_540x516xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xf8E4M3FNUZ_times_321x234xf8E4M3FNUZ_into_654x234xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xf8E4M3FNUZ_times_160x512xf8E4M3FNUZ_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xf8E4M3FNUZ_times_330x512xf8E4M3FNUZ_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xf8E4M3FNUZ_times_1000x1000xf8E4M3FNUZ_into_1x1000xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xf8E4M3FNUZ_times_1000x1xf8E4M3FNUZ_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xf8E4M3FNUZ_times_1000x1xf8E4M3FNUZ_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xf8E4M3FNUZ_times_128x512xf8E4M3FNUZ_into_512x512xf32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xf8E4M3FNUZ_times_128x512xf8E4M3FNUZ_into_512x512xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf8E4M3FNUZ_times_128x512xf8E4M3FNUZ_into_512x512xf32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf8E4M3FNUZ_times_128x512xf8E4M3FNUZ_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xf8E4M3FNUZ_times_4x512xf8E4M3FNUZ_into_1000x512xf32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xf8E4M3FNUZ_times_4x512xf8E4M3FNUZ_into_1000x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xf8E4M3FNUZ_times_1000x512xf8E4M3FNUZ_into_4x512xf32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xf8E4M3FNUZ_times_1000x512xf8E4M3FNUZ_into_4x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xf8E4M3FNUZ_times_1000x4xf8E4M3FNUZ_into_512x4xf32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xf8E4M3FNUZ_times_1000x4xf8E4M3FNUZ_into_512x4xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf8E4M3FNUZ_times_128x500xf8E4M3FNUZ_into_512x500xf32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf8E4M3FNUZ_times_128x500xf8E4M3FNUZ_into_512x500xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf8E4M3FNUZ_times_330x512xf8E4M3FNUZ_into_457x512xf32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf8E4M3FNUZ_times_330x512xf8E4M3FNUZ_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf8E4M3FNUZ_times_330x514xf8E4M3FNUZ_into_457x514xf32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf8E4M3FNUZ_times_330x514xf8E4M3FNUZ_into_457x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xf8E4M3FNUZ_times_330x514xf8E4M3FNUZ_into_438x514xf32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xf8E4M3FNUZ_times_330x514xf8E4M3FNUZ_into_438x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xf8E4M3FNUZ_times_332x516xf8E4M3FNUZ_into_540x516xf32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xf8E4M3FNUZ_times_332x516xf8E4M3FNUZ_into_540x516xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xf8E4M3FNUZ_times_321x234xf8E4M3FNUZ_into_654x234xf32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xf8E4M3FNUZ_times_321x234xf8E4M3FNUZ_into_654x234xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xf8E4M3FNUZ_times_160x512xf8E4M3FNUZ_into_457x512xf32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xf8E4M3FNUZ_times_160x512xf8E4M3FNUZ_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xf8E4M3FNUZ_times_330x512xf8E4M3FNUZ_into_512x512xf32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xf8E4M3FNUZ_times_330x512xf8E4M3FNUZ_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xf8E4M3FNUZ_times_1000x1000xf8E4M3FNUZ_into_1x1000xf32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xf8E4M3FNUZ_times_1000x1000xf8E4M3FNUZ_into_1x1000xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xf8E4M3FNUZ_times_1000x1xf8E4M3FNUZ_into_1000x1xf32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xf8E4M3FNUZ_times_1000x1xf8E4M3FNUZ_into_1000x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xf8E4M3FNUZ_times_1000x1xf8E4M3FNUZ_into_1000x1xf32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xf8E4M3FNUZ_times_1000x1xf8E4M3FNUZ_into_1000x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_small.mlir b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_small.mlir new file mode 100644 index 0000000..c5a2caa --- /dev/null +++ b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_small.mlir @@ -0,0 +1,131 @@ +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %lhs_casted = arith.truncf %lhs: tensor to tensor + %rhs_casted = arith.truncf %rhs: tensor to tensor + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs: tensor<1x1xf32>, %rhs: tensor<1x1xf32>, %acc: tensor<1x1xf32>) -> tensor<1x1xf32> { + %lhs_casted = arith.truncf %lhs: tensor<1x1xf32> to tensor<1x1xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1x1xf32> to tensor<1x1xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<1x1xf8E4M3FNUZ>, tensor<1x1xf8E4M3FNUZ>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %lhs_casted = arith.truncf %lhs: tensor to tensor + %rhs_casted = arith.truncf %rhs: tensor to tensor + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs: tensor<1x1xf32>, %rhs: tensor<1x1xf32>) -> tensor<1x1xf32> { + %init_acc = tensor.empty() : tensor<1x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x1xf32>) -> tensor<1x1xf32> + %lhs_casted = arith.truncf %lhs: tensor<1x1xf32> to tensor<1x1xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1x1xf32> to tensor<1x1xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<1x1xf8E4M3FNUZ>, tensor<1x1xf8E4M3FNUZ>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_accumulate_2x2xf8E4M3FNUZ_times_2x2xf8E4M3FNUZ_into_2x2xf32(%lhs: tensor<2x2xf32>, %rhs: tensor<2x2xf32>, %acc: tensor<2x2xf32>) -> tensor<2x2xf32> { + %lhs_casted = arith.truncf %lhs: tensor<2x2xf32> to tensor<2x2xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<2x2xf32> to tensor<2x2xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<2x2xf8E4M3FNUZ>, tensor<2x2xf8E4M3FNUZ>) outs(%acc: tensor<2x2xf32>) -> tensor<2x2xf32> + return %result: tensor<2x2xf32> +} + +func.func @matmul_accumulate_4x4xf8E4M3FNUZ_times_4x4xf8E4M3FNUZ_into_4x4xf32(%lhs: tensor<4x4xf32>, %rhs: tensor<4x4xf32>, %acc: tensor<4x4xf32>) -> tensor<4x4xf32> { + %lhs_casted = arith.truncf %lhs: tensor<4x4xf32> to tensor<4x4xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<4x4xf32> to tensor<4x4xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<4x4xf8E4M3FNUZ>, tensor<4x4xf8E4M3FNUZ>) outs(%acc: tensor<4x4xf32>) -> tensor<4x4xf32> + return %result: tensor<4x4xf32> +} + +func.func @matmul_accumulate_8x8xf8E4M3FNUZ_times_8x8xf8E4M3FNUZ_into_8x8xf32(%lhs: tensor<8x8xf32>, %rhs: tensor<8x8xf32>, %acc: tensor<8x8xf32>) -> tensor<8x8xf32> { + %lhs_casted = arith.truncf %lhs: tensor<8x8xf32> to tensor<8x8xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<8x8xf32> to tensor<8x8xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<8x8xf8E4M3FNUZ>, tensor<8x8xf8E4M3FNUZ>) outs(%acc: tensor<8x8xf32>) -> tensor<8x8xf32> + return %result: tensor<8x8xf32> +} + +func.func @matmul_accumulate_9x9xf8E4M3FNUZ_times_9x9xf8E4M3FNUZ_into_9x9xf32(%lhs: tensor<9x9xf32>, %rhs: tensor<9x9xf32>, %acc: tensor<9x9xf32>) -> tensor<9x9xf32> { + %lhs_casted = arith.truncf %lhs: tensor<9x9xf32> to tensor<9x9xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<9x9xf32> to tensor<9x9xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<9x9xf8E4M3FNUZ>, tensor<9x9xf8E4M3FNUZ>) outs(%acc: tensor<9x9xf32>) -> tensor<9x9xf32> + return %result: tensor<9x9xf32> +} + +func.func @matmul_accumulate_6x13xf8E4M3FNUZ_times_13x3xf8E4M3FNUZ_into_6x3xf32(%lhs: tensor<6x13xf32>, %rhs: tensor<13x3xf32>, %acc: tensor<6x3xf32>) -> tensor<6x3xf32> { + %lhs_casted = arith.truncf %lhs: tensor<6x13xf32> to tensor<6x13xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<13x3xf32> to tensor<13x3xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<6x13xf8E4M3FNUZ>, tensor<13x3xf8E4M3FNUZ>) outs(%acc: tensor<6x3xf32>) -> tensor<6x3xf32> + return %result: tensor<6x3xf32> +} + +func.func @matmul_15x37xf8E4M3FNUZ_times_37x7xf8E4M3FNUZ_into_15x7xf32(%lhs: tensor<15x37xf32>, %rhs: tensor<37x7xf32>) -> tensor<15x7xf32> { + %init_acc = tensor.empty() : tensor<15x7xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<15x7xf32>) -> tensor<15x7xf32> + %lhs_casted = arith.truncf %lhs: tensor<15x37xf32> to tensor<15x37xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<37x7xf32> to tensor<37x7xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<15x37xf8E4M3FNUZ>, tensor<37x7xf8E4M3FNUZ>) outs(%acc: tensor<15x7xf32>) -> tensor<15x7xf32> + return %result: tensor<15x7xf32> +} + +func.func @matmul_accumulate_81x19xf8E4M3FNUZ_times_19x41xf8E4M3FNUZ_into_81x41xf32(%lhs: tensor<81x19xf32>, %rhs: tensor<19x41xf32>, %acc: tensor<81x41xf32>) -> tensor<81x41xf32> { + %lhs_casted = arith.truncf %lhs: tensor<81x19xf32> to tensor<81x19xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<19x41xf32> to tensor<19x41xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<81x19xf8E4M3FNUZ>, tensor<19x41xf8E4M3FNUZ>) outs(%acc: tensor<81x41xf32>) -> tensor<81x41xf32> + return %result: tensor<81x41xf32> +} + +func.func @matmul_accumulate_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs: tensor<1x10xf32>, %rhs: tensor<10x10xf32>, %acc: tensor<1x10xf32>) -> tensor<1x10xf32> { + %lhs_casted = arith.truncf %lhs: tensor<1x10xf32> to tensor<1x10xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<10x10xf32> to tensor<10x10xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<1x10xf8E4M3FNUZ>, tensor<10x10xf8E4M3FNUZ>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs: tensor<1x10xf32>, %rhs: tensor<10x10xf32>) -> tensor<1x10xf32> { + %init_acc = tensor.empty() : tensor<1x10xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x10xf32>) -> tensor<1x10xf32> + %lhs_casted = arith.truncf %lhs: tensor<1x10xf32> to tensor<1x10xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<10x10xf32> to tensor<10x10xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<1x10xf8E4M3FNUZ>, tensor<10x10xf8E4M3FNUZ>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_accumulate_10x1xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x10xf32(%lhs: tensor<10x1xf32>, %rhs: tensor<1x10xf32>, %acc: tensor<10x10xf32>) -> tensor<10x10xf32> { + %lhs_casted = arith.truncf %lhs: tensor<10x1xf32> to tensor<10x1xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1x10xf32> to tensor<1x10xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<10x1xf8E4M3FNUZ>, tensor<1x10xf8E4M3FNUZ>) outs(%acc: tensor<10x10xf32>) -> tensor<10x10xf32> + return %result: tensor<10x10xf32> +} + +func.func @matmul_accumulate_10x10xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x1xf32(%lhs: tensor<10x10xf32>, %rhs: tensor<10x1xf32>, %acc: tensor<10x1xf32>) -> tensor<10x1xf32> { + %lhs_casted = arith.truncf %lhs: tensor<10x10xf32> to tensor<10x10xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<10x1xf32> to tensor<10x1xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<10x10xf8E4M3FNUZ>, tensor<10x1xf8E4M3FNUZ>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + +func.func @matmul_10x10xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x1xf32(%lhs: tensor<10x10xf32>, %rhs: tensor<10x1xf32>) -> tensor<10x1xf32> { + %init_acc = tensor.empty() : tensor<10x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<10x1xf32>) -> tensor<10x1xf32> + %lhs_casted = arith.truncf %lhs: tensor<10x10xf32> to tensor<10x10xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<10x1xf32> to tensor<10x1xf8E4M3FNUZ> + %result = linalg.matmul ins(%lhs_casted, %rhs_casted: tensor<10x10xf8E4M3FNUZ>, tensor<10x1xf8E4M3FNUZ>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_small_calls.mlir b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_small_calls.mlir new file mode 100644 index 0000000..758ec4a --- /dev/null +++ b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_f8E4M3FNUZ_into_f32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xf8E4M3FNUZ_times_2x2xf8E4M3FNUZ_into_2x2xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xf8E4M3FNUZ_times_4x4xf8E4M3FNUZ_into_4x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xf8E4M3FNUZ_times_8x8xf8E4M3FNUZ_into_8x8xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xf8E4M3FNUZ_times_9x9xf8E4M3FNUZ_into_9x9xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xf8E4M3FNUZ_times_13x3xf8E4M3FNUZ_into_6x3xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xf8E4M3FNUZ_times_37x7xf8E4M3FNUZ_into_15x7xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xf8E4M3FNUZ_times_19x41xf8E4M3FNUZ_into_81x41xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xf8E4M3FNUZ_times_2x2xf8E4M3FNUZ_into_2x2xf32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xf8E4M3FNUZ_times_2x2xf8E4M3FNUZ_into_2x2xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xf8E4M3FNUZ_times_4x4xf8E4M3FNUZ_into_4x4xf32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xf8E4M3FNUZ_times_4x4xf8E4M3FNUZ_into_4x4xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xf8E4M3FNUZ_times_8x8xf8E4M3FNUZ_into_8x8xf32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xf8E4M3FNUZ_times_8x8xf8E4M3FNUZ_into_8x8xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xf8E4M3FNUZ_times_9x9xf8E4M3FNUZ_into_9x9xf32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xf8E4M3FNUZ_times_9x9xf8E4M3FNUZ_into_9x9xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xf8E4M3FNUZ_times_13x3xf8E4M3FNUZ_into_6x3xf32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xf8E4M3FNUZ_times_13x3xf8E4M3FNUZ_into_6x3xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xf8E4M3FNUZ_times_37x7xf8E4M3FNUZ_into_15x7xf32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xf8E4M3FNUZ_times_37x7xf8E4M3FNUZ_into_15x7xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xf8E4M3FNUZ_times_19x41xf8E4M3FNUZ_into_81x41xf32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xf8E4M3FNUZ_times_19x41xf8E4M3FNUZ_into_81x41xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x10xf32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x1xf32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x1xf32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_large.mlir b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_large.mlir new file mode 100644 index 0000000..c2c9702 --- /dev/null +++ b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_large.mlir @@ -0,0 +1,172 @@ +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %lhs_casted = arith.truncf %lhs: tensor to tensor + %rhs_casted = arith.truncf %rhs: tensor to tensor + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xf8E4M3FNUZ_times_512x128xf8E4M3FNUZ_into_512x512xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<512x128xf32>, %acc: tensor<512x512xf32>) -> tensor<512x512xf32> { + %lhs_casted = arith.truncf %lhs: tensor<512x128xf32> to tensor<512x128xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<512x128xf32> to tensor<512x128xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<512x128xf8E4M3FNUZ>, tensor<512x128xf8E4M3FNUZ>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %lhs_casted = arith.truncf %lhs: tensor to tensor + %rhs_casted = arith.truncf %rhs: tensor to tensor + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xf8E4M3FNUZ_times_512x128xf8E4M3FNUZ_into_512x512xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<512x128xf32>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<512x128xf32> to tensor<512x128xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<512x128xf32> to tensor<512x128xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<512x128xf8E4M3FNUZ>, tensor<512x128xf8E4M3FNUZ>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_1000x4xf8E4M3FNUZ_times_512x4xf8E4M3FNUZ_into_1000x512xf32(%lhs: tensor<1000x4xf32>, %rhs: tensor<512x4xf32>) -> tensor<1000x512xf32> { + %init_acc = tensor.empty() : tensor<1000x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x512xf32>) -> tensor<1000x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<1000x4xf32> to tensor<1000x4xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<512x4xf32> to tensor<512x4xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<1000x4xf8E4M3FNUZ>, tensor<512x4xf8E4M3FNUZ>) outs(%acc: tensor<1000x512xf32>) -> tensor<1000x512xf32> + return %result: tensor<1000x512xf32> +} + +func.func @matmul_4x1000xf8E4M3FNUZ_times_512x1000xf8E4M3FNUZ_into_4x512xf32(%lhs: tensor<4x1000xf32>, %rhs: tensor<512x1000xf32>) -> tensor<4x512xf32> { + %init_acc = tensor.empty() : tensor<4x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<4x512xf32>) -> tensor<4x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<4x1000xf32> to tensor<4x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<512x1000xf32> to tensor<512x1000xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<4x1000xf8E4M3FNUZ>, tensor<512x1000xf8E4M3FNUZ>) outs(%acc: tensor<4x512xf32>) -> tensor<4x512xf32> + return %result: tensor<4x512xf32> +} + +func.func @matmul_512x1000xf8E4M3FNUZ_times_4x1000xf8E4M3FNUZ_into_512x4xf32(%lhs: tensor<512x1000xf32>, %rhs: tensor<4x1000xf32>) -> tensor<512x4xf32> { + %init_acc = tensor.empty() : tensor<512x4xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x4xf32>) -> tensor<512x4xf32> + %lhs_casted = arith.truncf %lhs: tensor<512x1000xf32> to tensor<512x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<4x1000xf32> to tensor<4x1000xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<512x1000xf8E4M3FNUZ>, tensor<4x1000xf8E4M3FNUZ>) outs(%acc: tensor<512x4xf32>) -> tensor<512x4xf32> + return %result: tensor<512x4xf32> +} + +func.func @matmul_512x128xf8E4M3FNUZ_times_500x128xf8E4M3FNUZ_into_512x500xf32(%lhs: tensor<512x128xf32>, %rhs: tensor<500x128xf32>) -> tensor<512x500xf32> { + %init_acc = tensor.empty() : tensor<512x500xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x500xf32>) -> tensor<512x500xf32> + %lhs_casted = arith.truncf %lhs: tensor<512x128xf32> to tensor<512x128xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<500x128xf32> to tensor<500x128xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<512x128xf8E4M3FNUZ>, tensor<500x128xf8E4M3FNUZ>) outs(%acc: tensor<512x500xf32>) -> tensor<512x500xf32> + return %result: tensor<512x500xf32> +} + +func.func @matmul_457x330xf8E4M3FNUZ_times_512x330xf8E4M3FNUZ_into_457x512xf32(%lhs: tensor<457x330xf32>, %rhs: tensor<512x330xf32>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<457x330xf32> to tensor<457x330xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<512x330xf32> to tensor<512x330xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<457x330xf8E4M3FNUZ>, tensor<512x330xf8E4M3FNUZ>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_457x330xf8E4M3FNUZ_times_514x330xf8E4M3FNUZ_into_457x514xf32(%lhs: tensor<457x330xf32>, %rhs: tensor<514x330xf32>) -> tensor<457x514xf32> { + %init_acc = tensor.empty() : tensor<457x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x514xf32>) -> tensor<457x514xf32> + %lhs_casted = arith.truncf %lhs: tensor<457x330xf32> to tensor<457x330xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<514x330xf32> to tensor<514x330xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<457x330xf8E4M3FNUZ>, tensor<514x330xf8E4M3FNUZ>) outs(%acc: tensor<457x514xf32>) -> tensor<457x514xf32> + return %result: tensor<457x514xf32> +} + +func.func @matmul_438x330xf8E4M3FNUZ_times_514x330xf8E4M3FNUZ_into_438x514xf32(%lhs: tensor<438x330xf32>, %rhs: tensor<514x330xf32>) -> tensor<438x514xf32> { + %init_acc = tensor.empty() : tensor<438x514xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<438x514xf32>) -> tensor<438x514xf32> + %lhs_casted = arith.truncf %lhs: tensor<438x330xf32> to tensor<438x330xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<514x330xf32> to tensor<514x330xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<438x330xf8E4M3FNUZ>, tensor<514x330xf8E4M3FNUZ>) outs(%acc: tensor<438x514xf32>) -> tensor<438x514xf32> + return %result: tensor<438x514xf32> +} + +func.func @matmul_540x332xf8E4M3FNUZ_times_516x332xf8E4M3FNUZ_into_540x516xf32(%lhs: tensor<540x332xf32>, %rhs: tensor<516x332xf32>) -> tensor<540x516xf32> { + %init_acc = tensor.empty() : tensor<540x516xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<540x516xf32>) -> tensor<540x516xf32> + %lhs_casted = arith.truncf %lhs: tensor<540x332xf32> to tensor<540x332xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<516x332xf32> to tensor<516x332xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<540x332xf8E4M3FNUZ>, tensor<516x332xf8E4M3FNUZ>) outs(%acc: tensor<540x516xf32>) -> tensor<540x516xf32> + return %result: tensor<540x516xf32> +} + +func.func @matmul_654x321xf8E4M3FNUZ_times_234x321xf8E4M3FNUZ_into_654x234xf32(%lhs: tensor<654x321xf32>, %rhs: tensor<234x321xf32>) -> tensor<654x234xf32> { + %init_acc = tensor.empty() : tensor<654x234xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<654x234xf32>) -> tensor<654x234xf32> + %lhs_casted = arith.truncf %lhs: tensor<654x321xf32> to tensor<654x321xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<234x321xf32> to tensor<234x321xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<654x321xf8E4M3FNUZ>, tensor<234x321xf8E4M3FNUZ>) outs(%acc: tensor<654x234xf32>) -> tensor<654x234xf32> + return %result: tensor<654x234xf32> +} + +func.func @matmul_457x160xf8E4M3FNUZ_times_512x160xf8E4M3FNUZ_into_457x512xf32(%lhs: tensor<457x160xf32>, %rhs: tensor<512x160xf32>) -> tensor<457x512xf32> { + %init_acc = tensor.empty() : tensor<457x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<457x512xf32>) -> tensor<457x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<457x160xf32> to tensor<457x160xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<512x160xf32> to tensor<512x160xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<457x160xf8E4M3FNUZ>, tensor<512x160xf8E4M3FNUZ>) outs(%acc: tensor<457x512xf32>) -> tensor<457x512xf32> + return %result: tensor<457x512xf32> +} + +func.func @matmul_512x330xf8E4M3FNUZ_times_512x330xf8E4M3FNUZ_into_512x512xf32(%lhs: tensor<512x330xf32>, %rhs: tensor<512x330xf32>) -> tensor<512x512xf32> { + %init_acc = tensor.empty() : tensor<512x512xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<512x512xf32>) -> tensor<512x512xf32> + %lhs_casted = arith.truncf %lhs: tensor<512x330xf32> to tensor<512x330xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<512x330xf32> to tensor<512x330xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<512x330xf8E4M3FNUZ>, tensor<512x330xf8E4M3FNUZ>) outs(%acc: tensor<512x512xf32>) -> tensor<512x512xf32> + return %result: tensor<512x512xf32> +} + +func.func @matmul_accumulate_1x1000xf8E4M3FNUZ_times_1000x1000xf8E4M3FNUZ_into_1x1000xf32(%lhs: tensor<1x1000xf32>, %rhs: tensor<1000x1000xf32>, %acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> { + %lhs_casted = arith.truncf %lhs: tensor<1x1000xf32> to tensor<1x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1000x1000xf32> to tensor<1000x1000xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<1x1000xf8E4M3FNUZ>, tensor<1000x1000xf8E4M3FNUZ>) outs(%acc: tensor<1x1000xf32>) -> tensor<1x1000xf32> + return %result: tensor<1x1000xf32> +} + +func.func @matmul_accumulate_1000x1000xf8E4M3FNUZ_times_1x1000xf8E4M3FNUZ_into_1000x1xf32(%lhs: tensor<1000x1000xf32>, %rhs: tensor<1x1000xf32>, %acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> { + %lhs_casted = arith.truncf %lhs: tensor<1000x1000xf32> to tensor<1000x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1x1000xf32> to tensor<1x1000xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<1000x1000xf8E4M3FNUZ>, tensor<1x1000xf8E4M3FNUZ>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + +func.func @matmul_1000x1000xf8E4M3FNUZ_times_1x1000xf8E4M3FNUZ_into_1000x1xf32(%lhs: tensor<1000x1000xf32>, %rhs: tensor<1x1000xf32>) -> tensor<1000x1xf32> { + %init_acc = tensor.empty() : tensor<1000x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1000x1xf32>) -> tensor<1000x1xf32> + %lhs_casted = arith.truncf %lhs: tensor<1000x1000xf32> to tensor<1000x1000xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1x1000xf32> to tensor<1x1000xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<1000x1000xf8E4M3FNUZ>, tensor<1x1000xf8E4M3FNUZ>) outs(%acc: tensor<1000x1xf32>) -> tensor<1000x1xf32> + return %result: tensor<1000x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_large_calls.mlir b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_large_calls.mlir new file mode 100644 index 0000000..249c528 --- /dev/null +++ b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xf8E4M3FNUZ_times_512x128xf8E4M3FNUZ_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf8E4M3FNUZ_times_512x128xf8E4M3FNUZ_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xf8E4M3FNUZ_times_512x4xf8E4M3FNUZ_into_1000x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xf8E4M3FNUZ_times_512x1000xf8E4M3FNUZ_into_4x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xf8E4M3FNUZ_times_4x1000xf8E4M3FNUZ_into_512x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xf8E4M3FNUZ_times_500x128xf8E4M3FNUZ_into_512x500xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf8E4M3FNUZ_times_512x330xf8E4M3FNUZ_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xf8E4M3FNUZ_times_514x330xf8E4M3FNUZ_into_457x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xf8E4M3FNUZ_times_514x330xf8E4M3FNUZ_into_438x514xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xf8E4M3FNUZ_times_516x332xf8E4M3FNUZ_into_540x516xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xf8E4M3FNUZ_times_234x321xf8E4M3FNUZ_into_654x234xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xf8E4M3FNUZ_times_512x160xf8E4M3FNUZ_into_457x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xf8E4M3FNUZ_times_512x330xf8E4M3FNUZ_into_512x512xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xf8E4M3FNUZ_times_1000x1000xf8E4M3FNUZ_into_1x1000xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xf8E4M3FNUZ_times_1x1000xf8E4M3FNUZ_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xf8E4M3FNUZ_times_1x1000xf8E4M3FNUZ_into_1000x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xf8E4M3FNUZ_times_512x128xf8E4M3FNUZ_into_512x512xf32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xf8E4M3FNUZ_times_512x128xf8E4M3FNUZ_into_512x512xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf8E4M3FNUZ_times_512x128xf8E4M3FNUZ_into_512x512xf32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf8E4M3FNUZ_times_512x128xf8E4M3FNUZ_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xf8E4M3FNUZ_times_512x4xf8E4M3FNUZ_into_1000x512xf32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xf8E4M3FNUZ_times_512x4xf8E4M3FNUZ_into_1000x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xf8E4M3FNUZ_times_512x1000xf8E4M3FNUZ_into_4x512xf32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xf8E4M3FNUZ_times_512x1000xf8E4M3FNUZ_into_4x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xf8E4M3FNUZ_times_4x1000xf8E4M3FNUZ_into_512x4xf32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xf8E4M3FNUZ_times_4x1000xf8E4M3FNUZ_into_512x4xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xf8E4M3FNUZ_times_500x128xf8E4M3FNUZ_into_512x500xf32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xf8E4M3FNUZ_times_500x128xf8E4M3FNUZ_into_512x500xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf8E4M3FNUZ_times_512x330xf8E4M3FNUZ_into_457x512xf32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf8E4M3FNUZ_times_512x330xf8E4M3FNUZ_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xf8E4M3FNUZ_times_514x330xf8E4M3FNUZ_into_457x514xf32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xf8E4M3FNUZ_times_514x330xf8E4M3FNUZ_into_457x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xf8E4M3FNUZ_times_514x330xf8E4M3FNUZ_into_438x514xf32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xf8E4M3FNUZ_times_514x330xf8E4M3FNUZ_into_438x514xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xf8E4M3FNUZ_times_516x332xf8E4M3FNUZ_into_540x516xf32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xf8E4M3FNUZ_times_516x332xf8E4M3FNUZ_into_540x516xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xf8E4M3FNUZ_times_234x321xf8E4M3FNUZ_into_654x234xf32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xf8E4M3FNUZ_times_234x321xf8E4M3FNUZ_into_654x234xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xf8E4M3FNUZ_times_512x160xf8E4M3FNUZ_into_457x512xf32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xf8E4M3FNUZ_times_512x160xf8E4M3FNUZ_into_457x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xf8E4M3FNUZ_times_512x330xf8E4M3FNUZ_into_512x512xf32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xf8E4M3FNUZ_times_512x330xf8E4M3FNUZ_into_512x512xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xf8E4M3FNUZ_times_1000x1000xf8E4M3FNUZ_into_1x1000xf32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xf8E4M3FNUZ_times_1000x1000xf8E4M3FNUZ_into_1x1000xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xf8E4M3FNUZ_times_1x1000xf8E4M3FNUZ_into_1000x1xf32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xf8E4M3FNUZ_times_1x1000xf8E4M3FNUZ_into_1000x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xf8E4M3FNUZ_times_1x1000xf8E4M3FNUZ_into_1000x1xf32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xf8E4M3FNUZ_times_1x1000xf8E4M3FNUZ_into_1000x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_small.mlir b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_small.mlir new file mode 100644 index 0000000..6b56445 --- /dev/null +++ b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_small.mlir @@ -0,0 +1,131 @@ +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %lhs_casted = arith.truncf %lhs: tensor to tensor + %rhs_casted = arith.truncf %rhs: tensor to tensor + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs: tensor<1x1xf32>, %rhs: tensor<1x1xf32>, %acc: tensor<1x1xf32>) -> tensor<1x1xf32> { + %lhs_casted = arith.truncf %lhs: tensor<1x1xf32> to tensor<1x1xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1x1xf32> to tensor<1x1xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<1x1xf8E4M3FNUZ>, tensor<1x1xf8E4M3FNUZ>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor) -> tensor + %lhs_casted = arith.truncf %lhs: tensor to tensor + %rhs_casted = arith.truncf %rhs: tensor to tensor + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs: tensor<1x1xf32>, %rhs: tensor<1x1xf32>) -> tensor<1x1xf32> { + %init_acc = tensor.empty() : tensor<1x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x1xf32>) -> tensor<1x1xf32> + %lhs_casted = arith.truncf %lhs: tensor<1x1xf32> to tensor<1x1xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1x1xf32> to tensor<1x1xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<1x1xf8E4M3FNUZ>, tensor<1x1xf8E4M3FNUZ>) outs(%acc: tensor<1x1xf32>) -> tensor<1x1xf32> + return %result: tensor<1x1xf32> +} + +func.func @matmul_accumulate_2x2xf8E4M3FNUZ_times_2x2xf8E4M3FNUZ_into_2x2xf32(%lhs: tensor<2x2xf32>, %rhs: tensor<2x2xf32>, %acc: tensor<2x2xf32>) -> tensor<2x2xf32> { + %lhs_casted = arith.truncf %lhs: tensor<2x2xf32> to tensor<2x2xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<2x2xf32> to tensor<2x2xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<2x2xf8E4M3FNUZ>, tensor<2x2xf8E4M3FNUZ>) outs(%acc: tensor<2x2xf32>) -> tensor<2x2xf32> + return %result: tensor<2x2xf32> +} + +func.func @matmul_accumulate_4x4xf8E4M3FNUZ_times_4x4xf8E4M3FNUZ_into_4x4xf32(%lhs: tensor<4x4xf32>, %rhs: tensor<4x4xf32>, %acc: tensor<4x4xf32>) -> tensor<4x4xf32> { + %lhs_casted = arith.truncf %lhs: tensor<4x4xf32> to tensor<4x4xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<4x4xf32> to tensor<4x4xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<4x4xf8E4M3FNUZ>, tensor<4x4xf8E4M3FNUZ>) outs(%acc: tensor<4x4xf32>) -> tensor<4x4xf32> + return %result: tensor<4x4xf32> +} + +func.func @matmul_accumulate_8x8xf8E4M3FNUZ_times_8x8xf8E4M3FNUZ_into_8x8xf32(%lhs: tensor<8x8xf32>, %rhs: tensor<8x8xf32>, %acc: tensor<8x8xf32>) -> tensor<8x8xf32> { + %lhs_casted = arith.truncf %lhs: tensor<8x8xf32> to tensor<8x8xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<8x8xf32> to tensor<8x8xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<8x8xf8E4M3FNUZ>, tensor<8x8xf8E4M3FNUZ>) outs(%acc: tensor<8x8xf32>) -> tensor<8x8xf32> + return %result: tensor<8x8xf32> +} + +func.func @matmul_accumulate_9x9xf8E4M3FNUZ_times_9x9xf8E4M3FNUZ_into_9x9xf32(%lhs: tensor<9x9xf32>, %rhs: tensor<9x9xf32>, %acc: tensor<9x9xf32>) -> tensor<9x9xf32> { + %lhs_casted = arith.truncf %lhs: tensor<9x9xf32> to tensor<9x9xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<9x9xf32> to tensor<9x9xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<9x9xf8E4M3FNUZ>, tensor<9x9xf8E4M3FNUZ>) outs(%acc: tensor<9x9xf32>) -> tensor<9x9xf32> + return %result: tensor<9x9xf32> +} + +func.func @matmul_accumulate_6x13xf8E4M3FNUZ_times_3x13xf8E4M3FNUZ_into_6x3xf32(%lhs: tensor<6x13xf32>, %rhs: tensor<3x13xf32>, %acc: tensor<6x3xf32>) -> tensor<6x3xf32> { + %lhs_casted = arith.truncf %lhs: tensor<6x13xf32> to tensor<6x13xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<3x13xf32> to tensor<3x13xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<6x13xf8E4M3FNUZ>, tensor<3x13xf8E4M3FNUZ>) outs(%acc: tensor<6x3xf32>) -> tensor<6x3xf32> + return %result: tensor<6x3xf32> +} + +func.func @matmul_15x37xf8E4M3FNUZ_times_7x37xf8E4M3FNUZ_into_15x7xf32(%lhs: tensor<15x37xf32>, %rhs: tensor<7x37xf32>) -> tensor<15x7xf32> { + %init_acc = tensor.empty() : tensor<15x7xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<15x7xf32>) -> tensor<15x7xf32> + %lhs_casted = arith.truncf %lhs: tensor<15x37xf32> to tensor<15x37xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<7x37xf32> to tensor<7x37xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<15x37xf8E4M3FNUZ>, tensor<7x37xf8E4M3FNUZ>) outs(%acc: tensor<15x7xf32>) -> tensor<15x7xf32> + return %result: tensor<15x7xf32> +} + +func.func @matmul_accumulate_81x19xf8E4M3FNUZ_times_41x19xf8E4M3FNUZ_into_81x41xf32(%lhs: tensor<81x19xf32>, %rhs: tensor<41x19xf32>, %acc: tensor<81x41xf32>) -> tensor<81x41xf32> { + %lhs_casted = arith.truncf %lhs: tensor<81x19xf32> to tensor<81x19xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<41x19xf32> to tensor<41x19xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<81x19xf8E4M3FNUZ>, tensor<41x19xf8E4M3FNUZ>) outs(%acc: tensor<81x41xf32>) -> tensor<81x41xf32> + return %result: tensor<81x41xf32> +} + +func.func @matmul_accumulate_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs: tensor<1x10xf32>, %rhs: tensor<10x10xf32>, %acc: tensor<1x10xf32>) -> tensor<1x10xf32> { + %lhs_casted = arith.truncf %lhs: tensor<1x10xf32> to tensor<1x10xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<10x10xf32> to tensor<10x10xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<1x10xf8E4M3FNUZ>, tensor<10x10xf8E4M3FNUZ>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs: tensor<1x10xf32>, %rhs: tensor<10x10xf32>) -> tensor<1x10xf32> { + %init_acc = tensor.empty() : tensor<1x10xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<1x10xf32>) -> tensor<1x10xf32> + %lhs_casted = arith.truncf %lhs: tensor<1x10xf32> to tensor<1x10xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<10x10xf32> to tensor<10x10xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<1x10xf8E4M3FNUZ>, tensor<10x10xf8E4M3FNUZ>) outs(%acc: tensor<1x10xf32>) -> tensor<1x10xf32> + return %result: tensor<1x10xf32> +} + +func.func @matmul_accumulate_10x1xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x10xf32(%lhs: tensor<10x1xf32>, %rhs: tensor<10x1xf32>, %acc: tensor<10x10xf32>) -> tensor<10x10xf32> { + %lhs_casted = arith.truncf %lhs: tensor<10x1xf32> to tensor<10x1xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<10x1xf32> to tensor<10x1xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<10x1xf8E4M3FNUZ>, tensor<10x1xf8E4M3FNUZ>) outs(%acc: tensor<10x10xf32>) -> tensor<10x10xf32> + return %result: tensor<10x10xf32> +} + +func.func @matmul_accumulate_10x10xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x1xf32(%lhs: tensor<10x10xf32>, %rhs: tensor<1x10xf32>, %acc: tensor<10x1xf32>) -> tensor<10x1xf32> { + %lhs_casted = arith.truncf %lhs: tensor<10x10xf32> to tensor<10x10xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1x10xf32> to tensor<1x10xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<10x10xf8E4M3FNUZ>, tensor<1x10xf8E4M3FNUZ>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + +func.func @matmul_10x10xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x1xf32(%lhs: tensor<10x10xf32>, %rhs: tensor<1x10xf32>) -> tensor<10x1xf32> { + %init_acc = tensor.empty() : tensor<10x1xf32> + %c0_acc_type = arith.constant 0.0: f32 + %acc = linalg.fill ins(%c0_acc_type : f32) outs(%init_acc : tensor<10x1xf32>) -> tensor<10x1xf32> + %lhs_casted = arith.truncf %lhs: tensor<10x10xf32> to tensor<10x10xf8E4M3FNUZ> + %rhs_casted = arith.truncf %rhs: tensor<1x10xf32> to tensor<1x10xf8E4M3FNUZ> + %result = linalg.matmul_transpose_b ins(%lhs_casted, %rhs_casted: tensor<10x10xf8E4M3FNUZ>, tensor<1x10xf8E4M3FNUZ>) outs(%acc: tensor<10x1xf32>) -> tensor<10x1xf32> + return %result: tensor<10x1xf32> +} + diff --git a/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_small_calls.mlir b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_small_calls.mlir new file mode 100644 index 0000000..84f89f3 --- /dev/null +++ b/linalg_ops/matmul/generated/f8E4M3FNUZ_into_f32/matmul_transpose_b_f8E4M3FNUZ_into_f32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xf8E4M3FNUZ_times_2x2xf8E4M3FNUZ_into_2x2xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xf8E4M3FNUZ_times_4x4xf8E4M3FNUZ_into_4x4xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xf8E4M3FNUZ_times_8x8xf8E4M3FNUZ_into_8x8xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xf8E4M3FNUZ_times_9x9xf8E4M3FNUZ_into_9x9xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xf8E4M3FNUZ_times_3x13xf8E4M3FNUZ_into_6x3xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xf8E4M3FNUZ_times_7x37xf8E4M3FNUZ_into_15x7xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xf8E4M3FNUZ_times_41x19xf8E4M3FNUZ_into_81x41xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x10xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x1xf32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xf8E4M3FNUZ_times_1x1xf8E4M3FNUZ_into_1x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xf8E4M3FNUZ_times_2x2xf8E4M3FNUZ_into_2x2xf32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xf8E4M3FNUZ_times_2x2xf8E4M3FNUZ_into_2x2xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xf8E4M3FNUZ_times_4x4xf8E4M3FNUZ_into_4x4xf32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xf8E4M3FNUZ_times_4x4xf8E4M3FNUZ_into_4x4xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xf8E4M3FNUZ_times_8x8xf8E4M3FNUZ_into_8x8xf32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xf8E4M3FNUZ_times_8x8xf8E4M3FNUZ_into_8x8xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xf8E4M3FNUZ_times_9x9xf8E4M3FNUZ_into_9x9xf32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xf8E4M3FNUZ_times_9x9xf8E4M3FNUZ_into_9x9xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xf8E4M3FNUZ_times_3x13xf8E4M3FNUZ_into_6x3xf32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xf8E4M3FNUZ_times_3x13xf8E4M3FNUZ_into_6x3xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xf8E4M3FNUZ_times_7x37xf8E4M3FNUZ_into_15x7xf32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xf8E4M3FNUZ_times_7x37xf8E4M3FNUZ_into_15x7xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xf8E4M3FNUZ_times_41x19xf8E4M3FNUZ_into_81x41xf32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xf8E4M3FNUZ_times_41x19xf8E4M3FNUZ_into_81x41xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xf8E4M3FNUZ_times_10x10xf8E4M3FNUZ_into_1x10xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x10xf32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xf8E4M3FNUZ_times_10x1xf8E4M3FNUZ_into_10x10xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x1xf32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x1xf32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxf8E4M3FNUZ_times_DYNxDYNxf8E4M3FNUZ_into_DYNxDYNxf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x1xf32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xf8E4M3FNUZ_times_1x10xf8E4M3FNUZ_into_10x1xf32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_large.mlir b/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_large.mlir new file mode 100644 index 0000000..5fa3c90 --- /dev/null +++ b/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xi8_times_128x512xi8_into_512x512xi32(%lhs: tensor<512x128xi8>, %rhs: tensor<128x512xi8>, %acc: tensor<512x512xi32>) -> tensor<512x512xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xi8>, tensor<128x512xi8>) outs(%acc: tensor<512x512xi32>) -> tensor<512x512xi32> + return %result: tensor<512x512xi32> +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xi8_times_128x512xi8_into_512x512xi32(%lhs: tensor<512x128xi8>, %rhs: tensor<128x512xi8>) -> tensor<512x512xi32> { + %init_acc = tensor.empty() : tensor<512x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<512x512xi32>) -> tensor<512x512xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xi8>, tensor<128x512xi8>) outs(%acc: tensor<512x512xi32>) -> tensor<512x512xi32> + return %result: tensor<512x512xi32> +} + +func.func @matmul_1000x4xi8_times_4x512xi8_into_1000x512xi32(%lhs: tensor<1000x4xi8>, %rhs: tensor<4x512xi8>) -> tensor<1000x512xi32> { + %init_acc = tensor.empty() : tensor<1000x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<1000x512xi32>) -> tensor<1000x512xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x4xi8>, tensor<4x512xi8>) outs(%acc: tensor<1000x512xi32>) -> tensor<1000x512xi32> + return %result: tensor<1000x512xi32> +} + +func.func @matmul_4x1000xi8_times_1000x512xi8_into_4x512xi32(%lhs: tensor<4x1000xi8>, %rhs: tensor<1000x512xi8>) -> tensor<4x512xi32> { + %init_acc = tensor.empty() : tensor<4x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<4x512xi32>) -> tensor<4x512xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x1000xi8>, tensor<1000x512xi8>) outs(%acc: tensor<4x512xi32>) -> tensor<4x512xi32> + return %result: tensor<4x512xi32> +} + +func.func @matmul_512x1000xi8_times_1000x4xi8_into_512x4xi32(%lhs: tensor<512x1000xi8>, %rhs: tensor<1000x4xi8>) -> tensor<512x4xi32> { + %init_acc = tensor.empty() : tensor<512x4xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<512x4xi32>) -> tensor<512x4xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x1000xi8>, tensor<1000x4xi8>) outs(%acc: tensor<512x4xi32>) -> tensor<512x4xi32> + return %result: tensor<512x4xi32> +} + +func.func @matmul_512x128xi8_times_128x500xi8_into_512x500xi32(%lhs: tensor<512x128xi8>, %rhs: tensor<128x500xi8>) -> tensor<512x500xi32> { + %init_acc = tensor.empty() : tensor<512x500xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<512x500xi32>) -> tensor<512x500xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x128xi8>, tensor<128x500xi8>) outs(%acc: tensor<512x500xi32>) -> tensor<512x500xi32> + return %result: tensor<512x500xi32> +} + +func.func @matmul_457x330xi8_times_330x512xi8_into_457x512xi32(%lhs: tensor<457x330xi8>, %rhs: tensor<330x512xi8>) -> tensor<457x512xi32> { + %init_acc = tensor.empty() : tensor<457x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<457x512xi32>) -> tensor<457x512xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xi8>, tensor<330x512xi8>) outs(%acc: tensor<457x512xi32>) -> tensor<457x512xi32> + return %result: tensor<457x512xi32> +} + +func.func @matmul_457x330xi8_times_330x514xi8_into_457x514xi32(%lhs: tensor<457x330xi8>, %rhs: tensor<330x514xi8>) -> tensor<457x514xi32> { + %init_acc = tensor.empty() : tensor<457x514xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<457x514xi32>) -> tensor<457x514xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x330xi8>, tensor<330x514xi8>) outs(%acc: tensor<457x514xi32>) -> tensor<457x514xi32> + return %result: tensor<457x514xi32> +} + +func.func @matmul_438x330xi8_times_330x514xi8_into_438x514xi32(%lhs: tensor<438x330xi8>, %rhs: tensor<330x514xi8>) -> tensor<438x514xi32> { + %init_acc = tensor.empty() : tensor<438x514xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<438x514xi32>) -> tensor<438x514xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<438x330xi8>, tensor<330x514xi8>) outs(%acc: tensor<438x514xi32>) -> tensor<438x514xi32> + return %result: tensor<438x514xi32> +} + +func.func @matmul_540x332xi8_times_332x516xi8_into_540x516xi32(%lhs: tensor<540x332xi8>, %rhs: tensor<332x516xi8>) -> tensor<540x516xi32> { + %init_acc = tensor.empty() : tensor<540x516xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<540x516xi32>) -> tensor<540x516xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<540x332xi8>, tensor<332x516xi8>) outs(%acc: tensor<540x516xi32>) -> tensor<540x516xi32> + return %result: tensor<540x516xi32> +} + +func.func @matmul_654x321xi8_times_321x234xi8_into_654x234xi32(%lhs: tensor<654x321xi8>, %rhs: tensor<321x234xi8>) -> tensor<654x234xi32> { + %init_acc = tensor.empty() : tensor<654x234xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<654x234xi32>) -> tensor<654x234xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<654x321xi8>, tensor<321x234xi8>) outs(%acc: tensor<654x234xi32>) -> tensor<654x234xi32> + return %result: tensor<654x234xi32> +} + +func.func @matmul_457x160xi8_times_160x512xi8_into_457x512xi32(%lhs: tensor<457x160xi8>, %rhs: tensor<160x512xi8>) -> tensor<457x512xi32> { + %init_acc = tensor.empty() : tensor<457x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<457x512xi32>) -> tensor<457x512xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<457x160xi8>, tensor<160x512xi8>) outs(%acc: tensor<457x512xi32>) -> tensor<457x512xi32> + return %result: tensor<457x512xi32> +} + +func.func @matmul_512x330xi8_times_330x512xi8_into_512x512xi32(%lhs: tensor<512x330xi8>, %rhs: tensor<330x512xi8>) -> tensor<512x512xi32> { + %init_acc = tensor.empty() : tensor<512x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<512x512xi32>) -> tensor<512x512xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<512x330xi8>, tensor<330x512xi8>) outs(%acc: tensor<512x512xi32>) -> tensor<512x512xi32> + return %result: tensor<512x512xi32> +} + +func.func @matmul_accumulate_1x1000xi8_times_1000x1000xi8_into_1x1000xi32(%lhs: tensor<1x1000xi8>, %rhs: tensor<1000x1000xi8>, %acc: tensor<1x1000xi32>) -> tensor<1x1000xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1000xi8>, tensor<1000x1000xi8>) outs(%acc: tensor<1x1000xi32>) -> tensor<1x1000xi32> + return %result: tensor<1x1000xi32> +} + +func.func @matmul_accumulate_1000x1000xi8_times_1000x1xi8_into_1000x1xi32(%lhs: tensor<1000x1000xi8>, %rhs: tensor<1000x1xi8>, %acc: tensor<1000x1xi32>) -> tensor<1000x1xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xi8>, tensor<1000x1xi8>) outs(%acc: tensor<1000x1xi32>) -> tensor<1000x1xi32> + return %result: tensor<1000x1xi32> +} + +func.func @matmul_1000x1000xi8_times_1000x1xi8_into_1000x1xi32(%lhs: tensor<1000x1000xi8>, %rhs: tensor<1000x1xi8>) -> tensor<1000x1xi32> { + %init_acc = tensor.empty() : tensor<1000x1xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<1000x1xi32>) -> tensor<1000x1xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1000x1000xi8>, tensor<1000x1xi8>) outs(%acc: tensor<1000x1xi32>) -> tensor<1000x1xi32> + return %result: tensor<1000x1xi32> +} + diff --git a/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_large_calls.mlir b/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_large_calls.mlir new file mode 100644 index 0000000..575772e --- /dev/null +++ b/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xi8_times_128x512xi8_into_512x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xi8_times_128x512xi8_into_512x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xi8_times_4x512xi8_into_1000x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xi8_times_1000x512xi8_into_4x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xi8_times_1000x4xi8_into_512x4xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xi8_times_128x500xi8_into_512x500xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xi8_times_330x512xi8_into_457x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xi8_times_330x514xi8_into_457x514xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xi8_times_330x514xi8_into_438x514xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xi8_times_332x516xi8_into_540x516xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xi8_times_321x234xi8_into_654x234xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xi8_times_160x512xi8_into_457x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xi8_times_330x512xi8_into_512x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xi8_times_1000x1000xi8_into_1x1000xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xi8_times_1000x1xi8_into_1000x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xi8_times_1000x1xi8_into_1000x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xi8_times_128x512xi8_into_512x512xi32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xi8_times_128x512xi8_into_512x512xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xi8_times_128x512xi8_into_512x512xi32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xi8_times_128x512xi8_into_512x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xi8_times_4x512xi8_into_1000x512xi32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xi8_times_4x512xi8_into_1000x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xi8_times_1000x512xi8_into_4x512xi32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xi8_times_1000x512xi8_into_4x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xi8_times_1000x4xi8_into_512x4xi32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xi8_times_1000x4xi8_into_512x4xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xi8_times_128x500xi8_into_512x500xi32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 128 : i64 + %rhs_dim1 = arith.constant 500 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xi8_times_128x500xi8_into_512x500xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xi8_times_330x512xi8_into_457x512xi32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xi8_times_330x512xi8_into_457x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xi8_times_330x514xi8_into_457x514xi32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xi8_times_330x514xi8_into_457x514xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xi8_times_330x514xi8_into_438x514xi32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 514 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xi8_times_330x514xi8_into_438x514xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xi8_times_332x516xi8_into_540x516xi32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 332 : i64 + %rhs_dim1 = arith.constant 516 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xi8_times_332x516xi8_into_540x516xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xi8_times_321x234xi8_into_654x234xi32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 321 : i64 + %rhs_dim1 = arith.constant 234 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xi8_times_321x234xi8_into_654x234xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xi8_times_160x512xi8_into_457x512xi32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 160 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xi8_times_160x512xi8_into_457x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xi8_times_330x512xi8_into_512x512xi32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 330 : i64 + %rhs_dim1 = arith.constant 512 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xi8_times_330x512xi8_into_512x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xi8_times_1000x1000xi8_into_1x1000xi32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xi8_times_1000x1000xi8_into_1x1000xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xi8_times_1000x1xi8_into_1000x1xi32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xi8_times_1000x1xi8_into_1000x1xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xi8_times_1000x1xi8_into_1000x1xi32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xi8_times_1000x1xi8_into_1000x1xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_small.mlir b/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_small.mlir new file mode 100644 index 0000000..b89d848 --- /dev/null +++ b/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: tensor<1x1xi8>, %rhs: tensor<1x1xi8>, %acc: tensor<1x1xi32>) -> tensor<1x1xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xi8>, tensor<1x1xi8>) outs(%acc: tensor<1x1xi32>) -> tensor<1x1xi32> + return %result: tensor<1x1xi32> +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: tensor<1x1xi8>, %rhs: tensor<1x1xi8>) -> tensor<1x1xi32> { + %init_acc = tensor.empty() : tensor<1x1xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<1x1xi32>) -> tensor<1x1xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xi8>, tensor<1x1xi8>) outs(%acc: tensor<1x1xi32>) -> tensor<1x1xi32> + return %result: tensor<1x1xi32> +} + +func.func @matmul_accumulate_2x2xi8_times_2x2xi8_into_2x2xi32(%lhs: tensor<2x2xi8>, %rhs: tensor<2x2xi8>, %acc: tensor<2x2xi32>) -> tensor<2x2xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<2x2xi8>, tensor<2x2xi8>) outs(%acc: tensor<2x2xi32>) -> tensor<2x2xi32> + return %result: tensor<2x2xi32> +} + +func.func @matmul_accumulate_4x4xi8_times_4x4xi8_into_4x4xi32(%lhs: tensor<4x4xi8>, %rhs: tensor<4x4xi8>, %acc: tensor<4x4xi32>) -> tensor<4x4xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<4x4xi8>, tensor<4x4xi8>) outs(%acc: tensor<4x4xi32>) -> tensor<4x4xi32> + return %result: tensor<4x4xi32> +} + +func.func @matmul_accumulate_8x8xi8_times_8x8xi8_into_8x8xi32(%lhs: tensor<8x8xi8>, %rhs: tensor<8x8xi8>, %acc: tensor<8x8xi32>) -> tensor<8x8xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<8x8xi8>, tensor<8x8xi8>) outs(%acc: tensor<8x8xi32>) -> tensor<8x8xi32> + return %result: tensor<8x8xi32> +} + +func.func @matmul_accumulate_9x9xi8_times_9x9xi8_into_9x9xi32(%lhs: tensor<9x9xi8>, %rhs: tensor<9x9xi8>, %acc: tensor<9x9xi32>) -> tensor<9x9xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<9x9xi8>, tensor<9x9xi8>) outs(%acc: tensor<9x9xi32>) -> tensor<9x9xi32> + return %result: tensor<9x9xi32> +} + +func.func @matmul_accumulate_6x13xi8_times_13x3xi8_into_6x3xi32(%lhs: tensor<6x13xi8>, %rhs: tensor<13x3xi8>, %acc: tensor<6x3xi32>) -> tensor<6x3xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<6x13xi8>, tensor<13x3xi8>) outs(%acc: tensor<6x3xi32>) -> tensor<6x3xi32> + return %result: tensor<6x3xi32> +} + +func.func @matmul_15x37xi8_times_37x7xi8_into_15x7xi32(%lhs: tensor<15x37xi8>, %rhs: tensor<37x7xi8>) -> tensor<15x7xi32> { + %init_acc = tensor.empty() : tensor<15x7xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<15x7xi32>) -> tensor<15x7xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<15x37xi8>, tensor<37x7xi8>) outs(%acc: tensor<15x7xi32>) -> tensor<15x7xi32> + return %result: tensor<15x7xi32> +} + +func.func @matmul_accumulate_81x19xi8_times_19x41xi8_into_81x41xi32(%lhs: tensor<81x19xi8>, %rhs: tensor<19x41xi8>, %acc: tensor<81x41xi32>) -> tensor<81x41xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<81x19xi8>, tensor<19x41xi8>) outs(%acc: tensor<81x41xi32>) -> tensor<81x41xi32> + return %result: tensor<81x41xi32> +} + +func.func @matmul_accumulate_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs: tensor<1x10xi8>, %rhs: tensor<10x10xi8>, %acc: tensor<1x10xi32>) -> tensor<1x10xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xi8>, tensor<10x10xi8>) outs(%acc: tensor<1x10xi32>) -> tensor<1x10xi32> + return %result: tensor<1x10xi32> +} + +func.func @matmul_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs: tensor<1x10xi8>, %rhs: tensor<10x10xi8>) -> tensor<1x10xi32> { + %init_acc = tensor.empty() : tensor<1x10xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<1x10xi32>) -> tensor<1x10xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<1x10xi8>, tensor<10x10xi8>) outs(%acc: tensor<1x10xi32>) -> tensor<1x10xi32> + return %result: tensor<1x10xi32> +} + +func.func @matmul_accumulate_10x1xi8_times_1x10xi8_into_10x10xi32(%lhs: tensor<10x1xi8>, %rhs: tensor<1x10xi8>, %acc: tensor<10x10xi32>) -> tensor<10x10xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x1xi8>, tensor<1x10xi8>) outs(%acc: tensor<10x10xi32>) -> tensor<10x10xi32> + return %result: tensor<10x10xi32> +} + +func.func @matmul_accumulate_10x10xi8_times_10x1xi8_into_10x1xi32(%lhs: tensor<10x10xi8>, %rhs: tensor<10x1xi8>, %acc: tensor<10x1xi32>) -> tensor<10x1xi32> { + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xi8>, tensor<10x1xi8>) outs(%acc: tensor<10x1xi32>) -> tensor<10x1xi32> + return %result: tensor<10x1xi32> +} + +func.func @matmul_10x10xi8_times_10x1xi8_into_10x1xi32(%lhs: tensor<10x10xi8>, %rhs: tensor<10x1xi8>) -> tensor<10x1xi32> { + %init_acc = tensor.empty() : tensor<10x1xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<10x1xi32>) -> tensor<10x1xi32> + %result = linalg.matmul ins(%lhs, %rhs: tensor<10x10xi8>, tensor<10x1xi8>) outs(%acc: tensor<10x1xi32>) -> tensor<10x1xi32> + return %result: tensor<10x1xi32> +} + diff --git a/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_small_calls.mlir b/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_small_calls.mlir new file mode 100644 index 0000000..3b93cbe --- /dev/null +++ b/linalg_ops/matmul/generated/i8_into_i32/matmul_i8_into_i32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xi8_times_2x2xi8_into_2x2xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xi8_times_4x4xi8_into_4x4xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xi8_times_8x8xi8_into_8x8xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xi8_times_9x9xi8_into_9x9xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xi8_times_13x3xi8_into_6x3xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xi8_times_37x7xi8_into_15x7xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xi8_times_19x41xi8_into_81x41xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xi8_times_1x10xi8_into_10x10xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xi8_times_10x1xi8_into_10x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xi8_times_10x1xi8_into_10x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xi8_times_1x1xi8_into_1x1xi32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xi8_times_2x2xi8_into_2x2xi32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xi8_times_2x2xi8_into_2x2xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xi8_times_4x4xi8_into_4x4xi32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xi8_times_4x4xi8_into_4x4xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xi8_times_8x8xi8_into_8x8xi32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xi8_times_8x8xi8_into_8x8xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xi8_times_9x9xi8_into_9x9xi32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xi8_times_9x9xi8_into_9x9xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xi8_times_13x3xi8_into_6x3xi32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 13 : i64 + %rhs_dim1 = arith.constant 3 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xi8_times_13x3xi8_into_6x3xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xi8_times_37x7xi8_into_15x7xi32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 37 : i64 + %rhs_dim1 = arith.constant 7 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xi8_times_37x7xi8_into_15x7xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xi8_times_19x41xi8_into_81x41xi32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 19 : i64 + %rhs_dim1 = arith.constant 41 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xi8_times_19x41xi8_into_81x41xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xi8_times_10x10xi8_into_1x10xi32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xi8_times_10x10xi8_into_1x10xi32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xi8_times_1x10xi8_into_10x10xi32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xi8_times_1x10xi8_into_10x10xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xi8_times_10x1xi8_into_10x1xi32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xi8_times_10x1xi8_into_10x1xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xi8_times_10x1xi8_into_10x1xi32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xi8_times_10x1xi8_into_10x1xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 0 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_large.mlir b/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_large.mlir new file mode 100644 index 0000000..1879a13 --- /dev/null +++ b/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_large.mlir @@ -0,0 +1,136 @@ +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_512x128xi8_times_512x128xi8_into_512x512xi32(%lhs: tensor<512x128xi8>, %rhs: tensor<512x128xi8>, %acc: tensor<512x512xi32>) -> tensor<512x512xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xi8>, tensor<512x128xi8>) outs(%acc: tensor<512x512xi32>) -> tensor<512x512xi32> + return %result: tensor<512x512xi32> +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_512x128xi8_times_512x128xi8_into_512x512xi32(%lhs: tensor<512x128xi8>, %rhs: tensor<512x128xi8>) -> tensor<512x512xi32> { + %init_acc = tensor.empty() : tensor<512x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<512x512xi32>) -> tensor<512x512xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xi8>, tensor<512x128xi8>) outs(%acc: tensor<512x512xi32>) -> tensor<512x512xi32> + return %result: tensor<512x512xi32> +} + +func.func @matmul_1000x4xi8_times_512x4xi8_into_1000x512xi32(%lhs: tensor<1000x4xi8>, %rhs: tensor<512x4xi8>) -> tensor<1000x512xi32> { + %init_acc = tensor.empty() : tensor<1000x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<1000x512xi32>) -> tensor<1000x512xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x4xi8>, tensor<512x4xi8>) outs(%acc: tensor<1000x512xi32>) -> tensor<1000x512xi32> + return %result: tensor<1000x512xi32> +} + +func.func @matmul_4x1000xi8_times_512x1000xi8_into_4x512xi32(%lhs: tensor<4x1000xi8>, %rhs: tensor<512x1000xi8>) -> tensor<4x512xi32> { + %init_acc = tensor.empty() : tensor<4x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<4x512xi32>) -> tensor<4x512xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x1000xi8>, tensor<512x1000xi8>) outs(%acc: tensor<4x512xi32>) -> tensor<4x512xi32> + return %result: tensor<4x512xi32> +} + +func.func @matmul_512x1000xi8_times_4x1000xi8_into_512x4xi32(%lhs: tensor<512x1000xi8>, %rhs: tensor<4x1000xi8>) -> tensor<512x4xi32> { + %init_acc = tensor.empty() : tensor<512x4xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<512x4xi32>) -> tensor<512x4xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x1000xi8>, tensor<4x1000xi8>) outs(%acc: tensor<512x4xi32>) -> tensor<512x4xi32> + return %result: tensor<512x4xi32> +} + +func.func @matmul_512x128xi8_times_500x128xi8_into_512x500xi32(%lhs: tensor<512x128xi8>, %rhs: tensor<500x128xi8>) -> tensor<512x500xi32> { + %init_acc = tensor.empty() : tensor<512x500xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<512x500xi32>) -> tensor<512x500xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x128xi8>, tensor<500x128xi8>) outs(%acc: tensor<512x500xi32>) -> tensor<512x500xi32> + return %result: tensor<512x500xi32> +} + +func.func @matmul_457x330xi8_times_512x330xi8_into_457x512xi32(%lhs: tensor<457x330xi8>, %rhs: tensor<512x330xi8>) -> tensor<457x512xi32> { + %init_acc = tensor.empty() : tensor<457x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<457x512xi32>) -> tensor<457x512xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xi8>, tensor<512x330xi8>) outs(%acc: tensor<457x512xi32>) -> tensor<457x512xi32> + return %result: tensor<457x512xi32> +} + +func.func @matmul_457x330xi8_times_514x330xi8_into_457x514xi32(%lhs: tensor<457x330xi8>, %rhs: tensor<514x330xi8>) -> tensor<457x514xi32> { + %init_acc = tensor.empty() : tensor<457x514xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<457x514xi32>) -> tensor<457x514xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x330xi8>, tensor<514x330xi8>) outs(%acc: tensor<457x514xi32>) -> tensor<457x514xi32> + return %result: tensor<457x514xi32> +} + +func.func @matmul_438x330xi8_times_514x330xi8_into_438x514xi32(%lhs: tensor<438x330xi8>, %rhs: tensor<514x330xi8>) -> tensor<438x514xi32> { + %init_acc = tensor.empty() : tensor<438x514xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<438x514xi32>) -> tensor<438x514xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<438x330xi8>, tensor<514x330xi8>) outs(%acc: tensor<438x514xi32>) -> tensor<438x514xi32> + return %result: tensor<438x514xi32> +} + +func.func @matmul_540x332xi8_times_516x332xi8_into_540x516xi32(%lhs: tensor<540x332xi8>, %rhs: tensor<516x332xi8>) -> tensor<540x516xi32> { + %init_acc = tensor.empty() : tensor<540x516xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<540x516xi32>) -> tensor<540x516xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<540x332xi8>, tensor<516x332xi8>) outs(%acc: tensor<540x516xi32>) -> tensor<540x516xi32> + return %result: tensor<540x516xi32> +} + +func.func @matmul_654x321xi8_times_234x321xi8_into_654x234xi32(%lhs: tensor<654x321xi8>, %rhs: tensor<234x321xi8>) -> tensor<654x234xi32> { + %init_acc = tensor.empty() : tensor<654x234xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<654x234xi32>) -> tensor<654x234xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<654x321xi8>, tensor<234x321xi8>) outs(%acc: tensor<654x234xi32>) -> tensor<654x234xi32> + return %result: tensor<654x234xi32> +} + +func.func @matmul_457x160xi8_times_512x160xi8_into_457x512xi32(%lhs: tensor<457x160xi8>, %rhs: tensor<512x160xi8>) -> tensor<457x512xi32> { + %init_acc = tensor.empty() : tensor<457x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<457x512xi32>) -> tensor<457x512xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<457x160xi8>, tensor<512x160xi8>) outs(%acc: tensor<457x512xi32>) -> tensor<457x512xi32> + return %result: tensor<457x512xi32> +} + +func.func @matmul_512x330xi8_times_512x330xi8_into_512x512xi32(%lhs: tensor<512x330xi8>, %rhs: tensor<512x330xi8>) -> tensor<512x512xi32> { + %init_acc = tensor.empty() : tensor<512x512xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<512x512xi32>) -> tensor<512x512xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<512x330xi8>, tensor<512x330xi8>) outs(%acc: tensor<512x512xi32>) -> tensor<512x512xi32> + return %result: tensor<512x512xi32> +} + +func.func @matmul_accumulate_1x1000xi8_times_1000x1000xi8_into_1x1000xi32(%lhs: tensor<1x1000xi8>, %rhs: tensor<1000x1000xi8>, %acc: tensor<1x1000xi32>) -> tensor<1x1000xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1000xi8>, tensor<1000x1000xi8>) outs(%acc: tensor<1x1000xi32>) -> tensor<1x1000xi32> + return %result: tensor<1x1000xi32> +} + +func.func @matmul_accumulate_1000x1000xi8_times_1x1000xi8_into_1000x1xi32(%lhs: tensor<1000x1000xi8>, %rhs: tensor<1x1000xi8>, %acc: tensor<1000x1xi32>) -> tensor<1000x1xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xi8>, tensor<1x1000xi8>) outs(%acc: tensor<1000x1xi32>) -> tensor<1000x1xi32> + return %result: tensor<1000x1xi32> +} + +func.func @matmul_1000x1000xi8_times_1x1000xi8_into_1000x1xi32(%lhs: tensor<1000x1000xi8>, %rhs: tensor<1x1000xi8>) -> tensor<1000x1xi32> { + %init_acc = tensor.empty() : tensor<1000x1xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<1000x1xi32>) -> tensor<1000x1xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1000x1000xi8>, tensor<1x1000xi8>) outs(%acc: tensor<1000x1xi32>) -> tensor<1000x1xi32> + return %result: tensor<1000x1xi32> +} + diff --git a/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_large_calls.mlir b/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_large_calls.mlir new file mode 100644 index 0000000..20ae545 --- /dev/null +++ b/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_large_calls.mlir @@ -0,0 +1,882 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_512x128xi8_times_512x128xi8_into_512x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xi8_times_512x128xi8_into_512x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x4xi8_times_512x4xi8_into_1000x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_4x1000xi8_times_512x1000xi8_into_4x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x1000xi8_times_4x1000xi8_into_512x4xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x128xi8_times_500x128xi8_into_512x500xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xi8_times_512x330xi8_into_457x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x330xi8_times_514x330xi8_into_457x514xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_438x330xi8_times_514x330xi8_into_438x514xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_540x332xi8_times_516x332xi8_into_540x516xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_654x321xi8_times_234x321xi8_into_654x234xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_457x160xi8_times_512x160xi8_into_457x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_512x330xi8_times_512x330xi8_into_512x512xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1000xi8_times_1000x1000xi8_into_1x1000xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1000x1000xi8_times_1x1000xi8_into_1000x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1000x1000xi8_times_1x1000xi8_into_1000x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_128_512_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_512x128xi8_times_512x128xi8_into_512x512xi32_512_128_512_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 512 : i64 + %acc_dim1 = arith.constant 512 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 512 : i64 + %acc_copy_dim1 = arith.constant 512 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_512x128xi8_times_512x128xi8_into_512x512xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_128_512_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xi8_times_512x128xi8_into_512x512xi32_512_128_512_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xi8_times_512x128xi8_into_512x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1000_4_512_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x4xi8_times_512x4xi8_into_1000x512xi32_1000_4_512_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x4x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 14 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 15 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x4xi8_times_512x4xi8_into_1000x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_4_1000_512_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 16 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 17 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_4x1000xi8_times_512x1000xi8_into_4x512xi32_4_1000_512_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x1000x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_4x1000xi8_times_512x1000xi8_into_4x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_1000_4_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 20 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 21 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x1000xi8_times_4x1000xi8_into_512x4xi32_512_1000_4_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x1000x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 22 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 23 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x1000xi8_times_4x1000xi8_into_512x4xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_128_500_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x128xi8_times_500x128xi8_into_512x500xi32_512_128_500_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x128x500"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 128 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 26 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 500 : i64 + %rhs_dim1 = arith.constant 128 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 27 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x128xi8_times_500x128xi8_into_512x500xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 128 : i64 + %n = arith.constant 500 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_457_330_512_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 28 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 29 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xi8_times_512x330xi8_into_457x512xi32_457_330_512_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xi8_times_512x330xi8_into_457x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_457_330_514_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 32 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 33 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x330xi8_times_514x330xi8_into_457x514xi32_457_330_514_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 34 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 35 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x330xi8_times_514x330xi8_into_457x514xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_438_330_514_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_438x330xi8_times_514x330xi8_into_438x514xi32_438_330_514_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 438x330x514"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 438 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 38 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 514 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 39 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_438x330xi8_times_514x330xi8_into_438x514xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 438 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 514 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_540_332_516_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 40 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 41 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_540x332xi8_times_516x332xi8_into_540x516xi32_540_332_516_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 540x332x516"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 540 : i64 + %lhs_dim1 = arith.constant 332 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 516 : i64 + %rhs_dim1 = arith.constant 332 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_540x332xi8_times_516x332xi8_into_540x516xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 540 : i64 + %k = arith.constant 332 : i64 + %n = arith.constant 516 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_654_321_234_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_654x321xi8_times_234x321xi8_into_654x234xi32_654_321_234_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 654x321x234"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 654 : i64 + %lhs_dim1 = arith.constant 321 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 234 : i64 + %rhs_dim1 = arith.constant 321 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_654x321xi8_times_234x321xi8_into_654x234xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 654 : i64 + %k = arith.constant 321 : i64 + %n = arith.constant 234 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_457_160_512_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 48 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 49 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_457x160xi8_times_512x160xi8_into_457x512xi32_457_160_512_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 457x160x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 457 : i64 + %lhs_dim1 = arith.constant 160 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 50 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 160 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 51 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_457x160xi8_times_512x160xi8_into_457x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 457 : i64 + %k = arith.constant 160 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_512_330_512_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_512x330xi8_times_512x330xi8_into_512x512xi32_512_330_512_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 512x330x512"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 512 : i64 + %lhs_dim1 = arith.constant 330 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 54 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 512 : i64 + %rhs_dim1 = arith.constant 330 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 55 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_512x330xi8_times_512x330xi8_into_512x512xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 512 : i64 + %k = arith.constant 330 : i64 + %n = arith.constant 512 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_1000_1000_acc_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 56 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 57 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 58 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 58 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1000xi8_times_1000x1000xi8_into_1x1000xi32_1_1000_1000_acc_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1000x1000"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 59 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1000 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 60 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1000 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 61 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1000 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 61 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1000xi8_times_1000x1000xi8_into_1x1000xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1000 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1000_1000_1_acc_28() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1000x1000xi8_times_1x1000xi8_into_1000x1xi32_1000_1000_1_acc_29() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1000 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1000 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1000x1000xi8_times_1x1000xi8_into_1000x1xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1000_1000_1_30() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1000x1000xi8_times_1x1000xi8_into_1000x1xi32_1000_1000_1_31() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1000x1000x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1000 : i64 + %lhs_dim1 = arith.constant 1000 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 70 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1000 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 71 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1000x1000xi8_times_1x1000xi8_into_1000x1xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1000 : i64 + %k = arith.constant 1000 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_small.mlir b/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_small.mlir new file mode 100644 index 0000000..2a0da4f --- /dev/null +++ b/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_small.mlir @@ -0,0 +1,99 @@ +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: tensor<1x1xi8>, %rhs: tensor<1x1xi8>, %acc: tensor<1x1xi32>) -> tensor<1x1xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xi8>, tensor<1x1xi8>) outs(%acc: tensor<1x1xi32>) -> tensor<1x1xi32> + return %result: tensor<1x1xi32> +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: tensor, %rhs: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %acc_dim0 = tensor.dim %lhs, %c0 : tensor + %acc_dim1 = tensor.dim %rhs, %c1 : tensor + %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : tensor + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor) -> tensor + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor, tensor) outs(%acc: tensor) -> tensor + return %result: tensor +} + +func.func @matmul_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: tensor<1x1xi8>, %rhs: tensor<1x1xi8>) -> tensor<1x1xi32> { + %init_acc = tensor.empty() : tensor<1x1xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<1x1xi32>) -> tensor<1x1xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x1xi8>, tensor<1x1xi8>) outs(%acc: tensor<1x1xi32>) -> tensor<1x1xi32> + return %result: tensor<1x1xi32> +} + +func.func @matmul_accumulate_2x2xi8_times_2x2xi8_into_2x2xi32(%lhs: tensor<2x2xi8>, %rhs: tensor<2x2xi8>, %acc: tensor<2x2xi32>) -> tensor<2x2xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<2x2xi8>, tensor<2x2xi8>) outs(%acc: tensor<2x2xi32>) -> tensor<2x2xi32> + return %result: tensor<2x2xi32> +} + +func.func @matmul_accumulate_4x4xi8_times_4x4xi8_into_4x4xi32(%lhs: tensor<4x4xi8>, %rhs: tensor<4x4xi8>, %acc: tensor<4x4xi32>) -> tensor<4x4xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<4x4xi8>, tensor<4x4xi8>) outs(%acc: tensor<4x4xi32>) -> tensor<4x4xi32> + return %result: tensor<4x4xi32> +} + +func.func @matmul_accumulate_8x8xi8_times_8x8xi8_into_8x8xi32(%lhs: tensor<8x8xi8>, %rhs: tensor<8x8xi8>, %acc: tensor<8x8xi32>) -> tensor<8x8xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<8x8xi8>, tensor<8x8xi8>) outs(%acc: tensor<8x8xi32>) -> tensor<8x8xi32> + return %result: tensor<8x8xi32> +} + +func.func @matmul_accumulate_9x9xi8_times_9x9xi8_into_9x9xi32(%lhs: tensor<9x9xi8>, %rhs: tensor<9x9xi8>, %acc: tensor<9x9xi32>) -> tensor<9x9xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<9x9xi8>, tensor<9x9xi8>) outs(%acc: tensor<9x9xi32>) -> tensor<9x9xi32> + return %result: tensor<9x9xi32> +} + +func.func @matmul_accumulate_6x13xi8_times_3x13xi8_into_6x3xi32(%lhs: tensor<6x13xi8>, %rhs: tensor<3x13xi8>, %acc: tensor<6x3xi32>) -> tensor<6x3xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<6x13xi8>, tensor<3x13xi8>) outs(%acc: tensor<6x3xi32>) -> tensor<6x3xi32> + return %result: tensor<6x3xi32> +} + +func.func @matmul_15x37xi8_times_7x37xi8_into_15x7xi32(%lhs: tensor<15x37xi8>, %rhs: tensor<7x37xi8>) -> tensor<15x7xi32> { + %init_acc = tensor.empty() : tensor<15x7xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<15x7xi32>) -> tensor<15x7xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<15x37xi8>, tensor<7x37xi8>) outs(%acc: tensor<15x7xi32>) -> tensor<15x7xi32> + return %result: tensor<15x7xi32> +} + +func.func @matmul_accumulate_81x19xi8_times_41x19xi8_into_81x41xi32(%lhs: tensor<81x19xi8>, %rhs: tensor<41x19xi8>, %acc: tensor<81x41xi32>) -> tensor<81x41xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<81x19xi8>, tensor<41x19xi8>) outs(%acc: tensor<81x41xi32>) -> tensor<81x41xi32> + return %result: tensor<81x41xi32> +} + +func.func @matmul_accumulate_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs: tensor<1x10xi8>, %rhs: tensor<10x10xi8>, %acc: tensor<1x10xi32>) -> tensor<1x10xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xi8>, tensor<10x10xi8>) outs(%acc: tensor<1x10xi32>) -> tensor<1x10xi32> + return %result: tensor<1x10xi32> +} + +func.func @matmul_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs: tensor<1x10xi8>, %rhs: tensor<10x10xi8>) -> tensor<1x10xi32> { + %init_acc = tensor.empty() : tensor<1x10xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<1x10xi32>) -> tensor<1x10xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<1x10xi8>, tensor<10x10xi8>) outs(%acc: tensor<1x10xi32>) -> tensor<1x10xi32> + return %result: tensor<1x10xi32> +} + +func.func @matmul_accumulate_10x1xi8_times_10x1xi8_into_10x10xi32(%lhs: tensor<10x1xi8>, %rhs: tensor<10x1xi8>, %acc: tensor<10x10xi32>) -> tensor<10x10xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x1xi8>, tensor<10x1xi8>) outs(%acc: tensor<10x10xi32>) -> tensor<10x10xi32> + return %result: tensor<10x10xi32> +} + +func.func @matmul_accumulate_10x10xi8_times_1x10xi8_into_10x1xi32(%lhs: tensor<10x10xi8>, %rhs: tensor<1x10xi8>, %acc: tensor<10x1xi32>) -> tensor<10x1xi32> { + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xi8>, tensor<1x10xi8>) outs(%acc: tensor<10x1xi32>) -> tensor<10x1xi32> + return %result: tensor<10x1xi32> +} + +func.func @matmul_10x10xi8_times_1x10xi8_into_10x1xi32(%lhs: tensor<10x10xi8>, %rhs: tensor<1x10xi8>) -> tensor<10x1xi32> { + %init_acc = tensor.empty() : tensor<10x1xi32> + %c0_acc_type = arith.constant 0: i32 + %acc = linalg.fill ins(%c0_acc_type : i32) outs(%init_acc : tensor<10x1xi32>) -> tensor<10x1xi32> + %result = linalg.matmul_transpose_b ins(%lhs, %rhs: tensor<10x10xi8>, tensor<1x10xi8>) outs(%acc: tensor<10x1xi32>) -> tensor<10x1xi32> + return %result: tensor<10x1xi32> +} + diff --git a/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_small_calls.mlir b/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_small_calls.mlir new file mode 100644 index 0000000..e3407bd --- /dev/null +++ b/linalg_ops/matmul/generated/i8_into_i32/matmul_transpose_b_i8_into_i32_small_calls.mlir @@ -0,0 +1,906 @@ +builtin.module @calls attributes { + +} { + +func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_2x2xi8_times_2x2xi8_into_2x2xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_4x4xi8_times_4x4xi8_into_4x4xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_8x8xi8_times_8x8xi8_into_8x8xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_9x9xi8_times_9x9xi8_into_9x9xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_6x13xi8_times_3x13xi8_into_6x3xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_15x37xi8_times_7x37xi8_into_15x7xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_81x19xi8_times_41x19xi8_into_81x41xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x1xi8_times_10x1xi8_into_10x10xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_accumulate_10x10xi8_times_1x10xi8_into_10x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.matmul_10x10xi8_times_1x10xi8_into_10x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_1_1_acc_0() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 2 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 3 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32_1_1_1_acc_1() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 5 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 6 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_1_1_2() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 8 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 9 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x1xi8_times_1x1xi8_into_1x1xi32_1_1_1_3() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 10 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 11 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_2_2_2_acc_4() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 12 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 13 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 14 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 14 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_2x2xi8_times_2x2xi8_into_2x2xi32_2_2_2_acc_5() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 2x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 2 : i64 + %lhs_dim1 = arith.constant 2 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 15 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 2 : i64 + %rhs_dim1 = arith.constant 2 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 16 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 17 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 17 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_2x2xi8_times_2x2xi8_into_2x2xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 2 : i64 + %k = arith.constant 2 : i64 + %n = arith.constant 2 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_4_4_4_acc_6() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 18 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 19 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 20 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 20 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_4x4xi8_times_4x4xi8_into_4x4xi32_4_4_4_acc_7() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 4x4x4"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 4 : i64 + %lhs_dim1 = arith.constant 4 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 21 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 4 : i64 + %rhs_dim1 = arith.constant 4 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 22 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 4 : i64 + %acc_dim1 = arith.constant 4 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 23 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 4 : i64 + %acc_copy_dim1 = arith.constant 4 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 23 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_4x4xi8_times_4x4xi8_into_4x4xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 4 : i64 + %k = arith.constant 4 : i64 + %n = arith.constant 4 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_8_8_8_acc_8() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 24 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 25 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 26 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 26 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_8x8xi8_times_8x8xi8_into_8x8xi32_8_8_8_acc_9() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 8x8x8"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 8 : i64 + %lhs_dim1 = arith.constant 8 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 27 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 8 : i64 + %rhs_dim1 = arith.constant 8 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 28 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 8 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 29 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 8 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 29 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_8x8xi8_times_8x8xi8_into_8x8xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 8 : i64 + %k = arith.constant 8 : i64 + %n = arith.constant 8 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_9_9_9_acc_10() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 30 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 31 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 32 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 32 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_9x9xi8_times_9x9xi8_into_9x9xi32_9_9_9_acc_11() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 9x9x9"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 9 : i64 + %lhs_dim1 = arith.constant 9 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 33 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 9 : i64 + %rhs_dim1 = arith.constant 9 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 34 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 9 : i64 + %acc_dim1 = arith.constant 9 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 35 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 9 : i64 + %acc_copy_dim1 = arith.constant 9 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 35 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_9x9xi8_times_9x9xi8_into_9x9xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 9 : i64 + %k = arith.constant 9 : i64 + %n = arith.constant 9 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_6_13_3_acc_12() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 36 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 37 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 38 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 38 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_6x13xi8_times_3x13xi8_into_6x3xi32_6_13_3_acc_13() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 6x13x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 6 : i64 + %lhs_dim1 = arith.constant 13 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 39 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 3 : i64 + %rhs_dim1 = arith.constant 13 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 40 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 6 : i64 + %acc_dim1 = arith.constant 3 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 41 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 6 : i64 + %acc_copy_dim1 = arith.constant 3 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 41 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_6x13xi8_times_3x13xi8_into_6x3xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 6 : i64 + %k = arith.constant 13 : i64 + %n = arith.constant 3 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_15_37_7_14() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 42 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 43 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_15x37xi8_times_7x37xi8_into_15x7xi32_15_37_7_15() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 15x37x7"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 15 : i64 + %lhs_dim1 = arith.constant 37 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 44 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 7 : i64 + %rhs_dim1 = arith.constant 37 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 45 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_15x37xi8_times_7x37xi8_into_15x7xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 15 : i64 + %k = arith.constant 37 : i64 + %n = arith.constant 7 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_81_19_41_acc_16() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 46 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 47 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 48 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 48 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_81x19xi8_times_41x19xi8_into_81x41xi32_81_19_41_acc_17() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 81x19x41"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 81 : i64 + %lhs_dim1 = arith.constant 19 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 49 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 41 : i64 + %rhs_dim1 = arith.constant 19 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 50 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 81 : i64 + %acc_dim1 = arith.constant 41 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 51 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 81 : i64 + %acc_copy_dim1 = arith.constant 41 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 51 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_81x19xi8_times_41x19xi8_into_81x41xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 81 : i64 + %k = arith.constant 19 : i64 + %n = arith.constant 41 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_10_10_acc_18() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 52 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 53 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 54 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 54 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_1x10xi8_times_10x10xi8_into_1x10xi32_1_10_10_acc_19() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 55 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 56 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 57 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 57 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_1_10_10_20() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 58 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 59 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_1x10xi8_times_10x10xi8_into_1x10xi32_1_10_10_21() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 1x10x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 1 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 60 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 61 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_1x10xi8_times_10x10xi8_into_1x10xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 1 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_10_1_10_acc_22() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 62 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 63 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 64 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 64 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x1xi8_times_10x1xi8_into_10x10xi32_10_1_10_acc_23() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x1x10"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 1 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 65 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 10 : i64 + %rhs_dim1 = arith.constant 1 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 66 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 10 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 67 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 10 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 67 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x1xi8_times_10x1xi8_into_10x10xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 1 : i64 + %n = arith.constant 10 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_10_10_1_acc_24() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 68 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 69 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 70 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 70 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_accumulate_10x10xi8_times_1x10xi8_into_10x1xi32_10_10_1_acc_25() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 71 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 72 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 10 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 73 : i32 + %acc = call @matmul_test.generate_random_matrix(%device, %acc_dim0, %acc_dim1, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 10 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 73 : i32 + %acc_copy = call @matmul_test.generate_random_matrix(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.matmul_accumulate_10x10xi8_times_1x10xi8_into_10x1xi32(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32_10_10_1_26() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 74 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 75 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @matmul_10x10xi8_times_1x10xi8_into_10x1xi32_10_10_1_27() attributes { + iree.reflection = {description = "Matmul shape (MxKxN): 10x10x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %lhs_dim0 = arith.constant 10 : i64 + %lhs_dim1 = arith.constant 10 : i64 + %lhs_element_type = hal.element_type : i32 + %lhs_seed = arith.constant 76 : i32 + %lhs = call @matmul_test.generate_random_matrix(%device, %lhs_dim0, %lhs_dim1, %lhs_element_type, %lhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %rhs_dim0 = arith.constant 1 : i64 + %rhs_dim1 = arith.constant 10 : i64 + %rhs_element_type = hal.element_type : i32 + %rhs_seed = arith.constant 77 : i32 + %rhs = call @matmul_test.generate_random_matrix(%device, %rhs_dim0, %rhs_dim1, %rhs_element_type, %rhs_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view + %acc = util.null : !hal.buffer_view + %result = call @module.matmul_10x10xi8_times_1x10xi8_into_10x1xi32(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %m = arith.constant 10 : i64 + %k = arith.constant 10 : i64 + %n = arith.constant 1 : i64 + %transpose_rhs = arith.constant 1 : i32 + call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +}