| # RUN: env SUPPORT_LIB=%mlir_cuda_runtime \ |
| # RUN: %PYTHON %s | FileCheck %s |
| |
| # ===----------------------------------------------------------------------===// |
| # Chapter 1 : 2D Saxpy |
| # ===----------------------------------------------------------------------===// |
| # |
| # This program demonstrates 2D Saxpy: |
| # 1. Use GPU dialect to allocate and copy memory host to gpu and vice versa |
| # 2. Computes 2D SAXPY kernel using operator overloading |
| # 3. Pass numpy arrays to MLIR as memref arguments |
| # 4. Verify MLIR program with reference computation in python |
| # |
| # ===----------------------------------------------------------------------===// |
| |
| |
| from mlir import ir |
| from mlir.dialects import gpu, memref |
| from tools.nvdsl import * |
| import numpy as np |
| |
| |
| @NVDSL.mlir_func |
| def saxpy(x, y, alpha): |
| # 1. Use MLIR GPU dialect to allocate and copy memory |
| token_ty = gpu.AsyncTokenType.get() |
| t1 = gpu.wait(token_ty, []) |
| x_dev, t2 = gpu.alloc(x.type, token_ty, [t1], [], []) |
| y_dev, t3 = gpu.alloc(y.type, token_ty, [t2], [], []) |
| t4 = gpu.memcpy(token_ty, [t3], x_dev, x) |
| t5 = gpu.memcpy(token_ty, [t4], y_dev, y) |
| t6 = gpu.wait(token_ty, [t5]) |
| |
| # 2. Compute 2D SAXPY kernel |
| @NVDSL.mlir_gpu_launch(grid=(M, 1, 1), block=(N, 1, 1)) |
| def saxpy_kernel(): |
| bidx = gpu.block_id(gpu.Dimension.x) |
| tidx = gpu.thread_id(gpu.Dimension.x) |
| x_val = memref.load(x_dev, [bidx, tidx]) |
| y_val = memref.load(y_dev, [bidx, tidx]) |
| |
| # SAXPY: y[i] += a * x[i]; |
| y_val += x_val * alpha |
| |
| memref.store(y_val, y_dev, [bidx, tidx]) |
| |
| saxpy_kernel() |
| |
| t7 = gpu.memcpy(token_ty, [t6], y, y_dev) |
| gpu.wait(token_ty, [t7]) |
| |
| |
| # 3. Pass numpy arrays to MLIR |
| M = 256 |
| N = 32 |
| alpha = 2.0 |
| x = np.random.randn(M, N).astype(np.float32) |
| y = np.ones((M, N), np.float32) |
| saxpy(x, y, alpha) |
| |
| # 4. Verify MLIR with reference computation |
| ref = np.ones((M, N), np.float32) |
| ref += x * alpha |
| np.testing.assert_allclose(y, ref, rtol=5e-03, atol=1e-01) |
| print("PASS") |
| # CHECK-NOT: Mismatched elements |