From 71c2fa4eafe1e8af49d5809a704836a2621163b5 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Mon, 24 Jun 2024 11:32:47 +0800 Subject: [PATCH 1/4] New test WIP Signed-off-by: Jiang, Zhiwei --- features/feature_case/cublasLt/matmul.cu | 754 ++++++++++++++++++++ features/feature_case/cublasLt/transform.cu | 600 ++++++++++++++++ 2 files changed, 1354 insertions(+) create mode 100644 features/feature_case/cublasLt/matmul.cu create mode 100644 features/feature_case/cublasLt/transform.cu diff --git a/features/feature_case/cublasLt/matmul.cu b/features/feature_case/cublasLt/matmul.cu new file mode 100644 index 00000000..0cf382f7 --- /dev/null +++ b/features/feature_case/cublasLt/matmul.cu @@ -0,0 +1,754 @@ +// ===------------ matmul.cu ----------------------------- *- CUDA -* ----=== // +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#include +#include +#include + +const constexpr int COL_TURING = 0; +const constexpr int COL_AMPERE = 1; + +// The original source of below two functions was under the license below: +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. +// +// Repo: https://github.com/TimDettmers/bitsandbytes.git +inline int checkCublasStatus(cublasStatus_t status) { + if (status != CUBLAS_STATUS_SUCCESS) { + printf("cuBLAS API failed with status %d\n", status); + //throw std::logic_error("cuBLAS API failed"); + return 1; + } + return 0; +} + +template int igemmlt(cublasLtHandle_t ltHandle, int m, int n, int k, const int8_t *A, const int8_t *B, void *C, float *row_scale, int lda, int ldb, int ldc) +{ + int has_error = 0; + cublasLtMatmulDesc_t matmulDesc = NULL; + cublasLtMatrixLayout_t Adesc = NULL, Bdesc = NULL, Cdesc = NULL; + cublasOperation_t opT = CUBLAS_OP_T; + cublasLtPointerMode_t alphaVec = CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO; + cublasLtOrder_t col32 = CUBLASLT_ORDER_COL32; + cublasLtOrder_t col_turing = CUBLASLT_ORDER_COL4_4R2_8C; + cublasLtOrder_t col_ampere = CUBLASLT_ORDER_COL32_2R_4R4; + + has_error |= checkCublasStatus(cublasLtMatrixLayoutCreate(&Adesc, CUDA_R_8I, m, k, lda)); + has_error |= checkCublasStatus(cublasLtMatrixLayoutCreate(&Bdesc, CUDA_R_8I, n, k, ldb)); + + has_error |= checkCublasStatus(cublasLtMatrixLayoutSetAttribute(Adesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &col32, sizeof(col32))); + if(FORMATB == COL_TURING) + has_error |= checkCublasStatus(cublasLtMatrixLayoutSetAttribute(Bdesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &col_turing, sizeof(col_turing))); + else + has_error |= checkCublasStatus(cublasLtMatrixLayoutSetAttribute(Bdesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &col_ampere, sizeof(col_ampere))); + + if(DTYPE_OUT == 32) + { + has_error |= checkCublasStatus(cublasLtMatmulDescCreate(&matmulDesc, CUBLAS_COMPUTE_32I, CUDA_R_32I)); + has_error |= checkCublasStatus(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSB, &opT, sizeof(opT))); + has_error |= checkCublasStatus(cublasLtMatrixLayoutCreate(&Cdesc, CUDA_R_32I, m, n, ldc)); + has_error |= checkCublasStatus(cublasLtMatrixLayoutSetAttribute(Cdesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &col32, sizeof(col32))); + int alpha = 1, beta = 0; + has_error |= checkCublasStatus(cublasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int32_t*)C, Cdesc, (int32_t*)C, Cdesc, NULL, NULL, 0, 0)); + } + else + { + has_error |= checkCublasStatus(cublasLtMatmulDescCreate(&matmulDesc, CUBLAS_COMPUTE_32I, CUDA_R_32F)); + has_error |= checkCublasStatus(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSB, &opT, sizeof(opT))); + has_error |= checkCublasStatus(cublasLtMatrixLayoutCreate(&Cdesc, CUDA_R_8I, m, n, ldc)); + has_error |= checkCublasStatus(cublasLtMatrixLayoutSetAttribute(Cdesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &col32, sizeof(col32))); + if(!SCALE_ROWS) + { + float alpha = 1.0f, beta = 0.0f; + has_error |= checkCublasStatus(cublasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, NULL, NULL, 0, 0)); + } + else + { + has_error |= checkCublasStatus(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_POINTER_MODE, &alphaVec, sizeof(alphaVec))); + has_error |= checkCublasStatus(cublasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, NULL, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, NULL, NULL, 0, 0)); + } + } + + cudaStreamSynchronize(0); + + if (Cdesc) has_error |= checkCublasStatus(cublasLtMatrixLayoutDestroy(Cdesc)); + if (Bdesc) has_error |= checkCublasStatus(cublasLtMatrixLayoutDestroy(Bdesc)); + if (Adesc) has_error |= checkCublasStatus(cublasLtMatrixLayoutDestroy(Adesc)); + if (matmulDesc) has_error |= checkCublasStatus(cublasLtMatmulDescDestroy(matmulDesc)); + if(has_error == 1) + printf("error detected"); + + return has_error; +} + +void transform(cublasLtHandle_t ltHandle, const void *in, int ld_in, + cublasLtMatrixLayout_t layout_in, void *out, int ld_out, + cublasLtMatrixLayout_t layout_out) { + cublasLtMatrixTransformDesc_t transform_desc = NULL; + cublasLtMatrixTransformDescCreate(&transform_desc, CUDA_R_32F); + float alpha = 1.0f, beta = 0.0f; + cublasLtMatrixTransform(ltHandle, transform_desc, &alpha, in, layout_in, + &beta, NULL, NULL, out, layout_out, 0); + cublasLtMatrixTransformDescDestroy(transform_desc); +} + +// igemmlt +bool test1() { + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + cudaMalloc(&Adev, m * k * sizeof(int8_t)); + cudaMalloc(&Bdev, n * k * sizeof(int8_t)); + cudaMalloc(&Cdev, m * n * sizeof(int32_t)); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + cudaMemcpy(Adev, Ahost, m * k * sizeof(int8_t), cudaMemcpyHostToDevice); + cudaMemcpy(Bdev, Bhost, n * k * sizeof(int8_t), cudaMemcpyHostToDevice); + + cublasLtMatrixLayout_t Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + cublasLtMatrixLayoutCreate(&Adesc_col_major, CUDA_R_8I, m, k, lda); + cublasLtMatrixLayoutCreate(&Bdesc_col_major, CUDA_R_8I, n, k, ldb); + cublasLtMatrixLayoutCreate(&Cdesc_col_major, CUDA_R_32I, m, n, ldc); + + // Convert A and B + cublasLtMatrixLayout_t Adesc_col32 = NULL, Bdesc_col4_4r2_8c = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col4_4r2_8c; + int32_t *C_col32; + cudaMalloc(&A_col32, m * 32 * sizeof(std::int8_t)); + cudaMalloc(&B_col4_4r2_8c, ((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t)); + cudaMalloc(&C_col32, m * 32 * sizeof(std::int32_t)); + cublasLtMatrixLayoutCreate(&Adesc_col32, CUDA_R_8I, m, k, m * 32); + cublasLtMatrixLayoutCreate(&Bdesc_col4_4r2_8c, CUDA_R_8I, k, n, + ((n + 8 - 1) / 8) * 8 * 32); + cublasLtMatrixLayoutCreate(&Cdesc_col32, CUDA_R_32I, m, n, m * 32); + cublasLtOrder_t col32 = CUBLASLT_ORDER_COL32; + cublasLtOrder_t col4_4r2_8c = CUBLASLT_ORDER_COL4_4R2_8C; + cublasLtMatrixLayoutSetAttribute(Adesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + cublasLtMatrixLayoutSetAttribute(Bdesc_col4_4r2_8c, + CUBLASLT_MATRIX_LAYOUT_ORDER, &col4_4r2_8c, + sizeof(col4_4r2_8c)); + cublasLtMatrixLayoutSetAttribute(Cdesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col4_4r2_8c, 8 * 32, + Bdesc_col4_4r2_8c); + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col4_4r2_8c, C_col32, + nullptr, m * 32, ((n + 8 - 1) / 8) * 8 * 32, + m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + cudaStreamSynchronize(0); + + // Check result + int32_t Chost[m * n]; + cudaMemcpy(Chost, Cdev, m * n * sizeof(int32_t), cudaMemcpyDeviceToHost); + + bool error = false; + int32_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + cublasLtDestroy(ltHandle); + cublasLtMatrixLayoutDestroy(Adesc_col32); + cublasLtMatrixLayoutDestroy(Bdesc_col4_4r2_8c); + cublasLtMatrixLayoutDestroy(Cdesc_col32); + cublasLtMatrixLayoutDestroy(Adesc_col_major); + cublasLtMatrixLayoutDestroy(Bdesc_col_major); + cublasLtMatrixLayoutDestroy(Cdesc_col_major); + cudaFree(Adev); + cudaFree(Bdev); + cudaFree(Cdev); + + return !error; +} + +// igemmlt +bool test2() { + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + cudaMalloc(&Adev, m * k * sizeof(int8_t)); + cudaMalloc(&Bdev, n * k * sizeof(int8_t)); + cudaMalloc(&Cdev, m * n * sizeof(int8_t)); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + cudaMemcpy(Adev, Ahost, m * k * sizeof(int8_t), cudaMemcpyHostToDevice); + cudaMemcpy(Bdev, Bhost, n * k * sizeof(int8_t), cudaMemcpyHostToDevice); + + cublasLtMatrixLayout_t Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + cublasLtMatrixLayoutCreate(&Adesc_col_major, CUDA_R_8I, m, k, lda); + cublasLtMatrixLayoutCreate(&Bdesc_col_major, CUDA_R_8I, n, k, ldb); + cublasLtMatrixLayoutCreate(&Cdesc_col_major, CUDA_R_8I, m, n, ldc); + + // Convert A and B + cublasLtMatrixLayout_t Adesc_col32 = NULL, Bdesc_col4_4r2_8c = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col4_4r2_8c; + int8_t *C_col32; + cudaMalloc(&A_col32, m * 32 * sizeof(std::int8_t)); + cudaMalloc(&B_col4_4r2_8c, ((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t)); + cudaMalloc(&C_col32, m * 32 * sizeof(std::int8_t)); + cublasLtMatrixLayoutCreate(&Adesc_col32, CUDA_R_8I, m, k, m * 32); + cublasLtMatrixLayoutCreate(&Bdesc_col4_4r2_8c, CUDA_R_8I, k, n, + ((n + 8 - 1) / 8) * 8 * 32); + cublasLtMatrixLayoutCreate(&Cdesc_col32, CUDA_R_8I, m, n, m * 32); + cublasLtOrder_t col32 = CUBLASLT_ORDER_COL32; + cublasLtOrder_t col4_4r2_8c = CUBLASLT_ORDER_COL4_4R2_8C; + cublasLtMatrixLayoutSetAttribute(Adesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + cublasLtMatrixLayoutSetAttribute(Bdesc_col4_4r2_8c, + CUBLASLT_MATRIX_LAYOUT_ORDER, &col4_4r2_8c, + sizeof(col4_4r2_8c)); + cublasLtMatrixLayoutSetAttribute(Cdesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col4_4r2_8c, 8 * 32, + Bdesc_col4_4r2_8c); + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col4_4r2_8c, C_col32, + nullptr, m * 32, ((n + 8 - 1) / 8) * 8 * 32, + m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + cudaStreamSynchronize(0); + + // Check result + int8_t Chost[m * n]; + cudaMemcpy(Chost, Cdev, m * n * sizeof(int8_t), cudaMemcpyDeviceToHost); + + bool error = false; + int8_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + cublasLtDestroy(ltHandle); + cublasLtMatrixLayoutDestroy(Adesc_col32); + cublasLtMatrixLayoutDestroy(Bdesc_col4_4r2_8c); + cublasLtMatrixLayoutDestroy(Cdesc_col32); + cublasLtMatrixLayoutDestroy(Adesc_col_major); + cublasLtMatrixLayoutDestroy(Bdesc_col_major); + cublasLtMatrixLayoutDestroy(Cdesc_col_major); + cudaFree(Adev); + cudaFree(Bdev); + cudaFree(Cdev); + + return !error; +} + +// igemmlt +bool test3() { + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + cudaMalloc(&Adev, m * k * sizeof(int8_t)); + cudaMalloc(&Bdev, n * k * sizeof(int8_t)); + cudaMalloc(&Cdev, m * n * sizeof(int8_t)); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + cudaMemcpy(Adev, Ahost, m * k * sizeof(int8_t), cudaMemcpyHostToDevice); + cudaMemcpy(Bdev, Bhost, n * k * sizeof(int8_t), cudaMemcpyHostToDevice); + + cublasLtMatrixLayout_t Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + cublasLtMatrixLayoutCreate(&Adesc_col_major, CUDA_R_8I, m, k, lda); + cublasLtMatrixLayoutCreate(&Bdesc_col_major, CUDA_R_8I, n, k, ldb); + cublasLtMatrixLayoutCreate(&Cdesc_col_major, CUDA_R_8I, m, n, ldc); + + // Convert A and B + cublasLtMatrixLayout_t Adesc_col32 = NULL, Bdesc_col4_4r2_8c = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col4_4r2_8c; + int8_t *C_col32; + cudaMalloc(&A_col32, m * 32 * sizeof(std::int8_t)); + cudaMalloc(&B_col4_4r2_8c, ((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t)); + cudaMalloc(&C_col32, m * 32 * sizeof(std::int8_t)); + cublasLtMatrixLayoutCreate(&Adesc_col32, CUDA_R_8I, m, k, m * 32); + cublasLtMatrixLayoutCreate(&Bdesc_col4_4r2_8c, CUDA_R_8I, k, n, + ((n + 8 - 1) / 8) * 8 * 32); + cublasLtMatrixLayoutCreate(&Cdesc_col32, CUDA_R_8I, m, n, m * 32); + cublasLtOrder_t col32 = CUBLASLT_ORDER_COL32; + cublasLtOrder_t col4_4r2_8c = CUBLASLT_ORDER_COL4_4R2_8C; + cublasLtMatrixLayoutSetAttribute(Adesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + cublasLtMatrixLayoutSetAttribute(Bdesc_col4_4r2_8c, + CUBLASLT_MATRIX_LAYOUT_ORDER, &col4_4r2_8c, + sizeof(col4_4r2_8c)); + cublasLtMatrixLayoutSetAttribute(Cdesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col4_4r2_8c, 8 * 32, + Bdesc_col4_4r2_8c); + + float *alpha; + cudaMallocManaged(&alpha, 4 * sizeof(float)); + alpha[0] = 0; + alpha[1] = 1; + alpha[2] = 2; + alpha[3] = 3; + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col4_4r2_8c, C_col32, + alpha, m * 32, ((n + 8 - 1) / 8) * 8 * 32, m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + cudaStreamSynchronize(0); + + // Check result + int8_t Chost[m * n]; + cudaMemcpy(Chost, Cdev, m * n * sizeof(int8_t), cudaMemcpyDeviceToHost); + + bool error = false; + int8_t C_ref[m * n] = {0, 17, 40, 69, 0, 6, 16, 30}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + cublasLtDestroy(ltHandle); + cublasLtMatrixLayoutDestroy(Adesc_col32); + cublasLtMatrixLayoutDestroy(Bdesc_col4_4r2_8c); + cublasLtMatrixLayoutDestroy(Cdesc_col32); + cublasLtMatrixLayoutDestroy(Adesc_col_major); + cublasLtMatrixLayoutDestroy(Bdesc_col_major); + cublasLtMatrixLayoutDestroy(Cdesc_col_major); + cudaFree(Adev); + cudaFree(Bdev); + cudaFree(Cdev); + cudaFree(alpha); + + return !error; +} + +// igemmlt +bool test4() { + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + cudaMalloc(&Adev, m * k * sizeof(int8_t)); + cudaMalloc(&Bdev, n * k * sizeof(int8_t)); + cudaMalloc(&Cdev, m * n * sizeof(int32_t)); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + cudaMemcpy(Adev, Ahost, m * k * sizeof(int8_t), cudaMemcpyHostToDevice); + cudaMemcpy(Bdev, Bhost, n * k * sizeof(int8_t), cudaMemcpyHostToDevice); + + cublasLtMatrixLayout_t Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + cublasLtMatrixLayoutCreate(&Adesc_col_major, CUDA_R_8I, m, k, lda); + cublasLtMatrixLayoutCreate(&Bdesc_col_major, CUDA_R_8I, n, k, ldb); + cublasLtMatrixLayoutCreate(&Cdesc_col_major, CUDA_R_32I, m, n, ldc); + + // Convert A and B + cublasLtMatrixLayout_t Adesc_col32 = NULL, Bdesc_col32_2r_4r4 = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col32_2r_4r4; + int32_t *C_col32; + cudaMalloc(&A_col32, m * 32 * sizeof(std::int8_t)); + cudaMalloc(&B_col32_2r_4r4, + ((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t)); + cudaMalloc(&C_col32, m * 32 * sizeof(std::int32_t)); + cublasLtMatrixLayoutCreate(&Adesc_col32, CUDA_R_8I, m, k, m * 32); + cublasLtMatrixLayoutCreate(&Bdesc_col32_2r_4r4, CUDA_R_8I, k, n, + ((n + 32 - 1) / 32) * 32 * 32); + cublasLtMatrixLayoutCreate(&Cdesc_col32, CUDA_R_32I, m, n, m * 32); + cublasLtOrder_t col32 = CUBLASLT_ORDER_COL32; + cublasLtOrder_t col32_2r_4r4 = CUBLASLT_ORDER_COL32_2R_4R4; + cublasLtMatrixLayoutSetAttribute(Adesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + cublasLtMatrixLayoutSetAttribute(Bdesc_col32_2r_4r4, + CUBLASLT_MATRIX_LAYOUT_ORDER, &col32_2r_4r4, + sizeof(col32_2r_4r4)); + cublasLtMatrixLayoutSetAttribute(Cdesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col32_2r_4r4, 8 * 32, + Bdesc_col32_2r_4r4); + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col32_2r_4r4, + C_col32, nullptr, m * 32, + ((n + 8 - 1) / 8) * 8 * 32, m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + cudaStreamSynchronize(0); + + // Check result + int32_t Chost[m * n]; + cudaMemcpy(Chost, Cdev, m * n * sizeof(int32_t), cudaMemcpyDeviceToHost); + + bool error = false; + int32_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + cublasLtDestroy(ltHandle); + cublasLtMatrixLayoutDestroy(Adesc_col32); + cublasLtMatrixLayoutDestroy(Bdesc_col32_2r_4r4); + cublasLtMatrixLayoutDestroy(Cdesc_col32); + cublasLtMatrixLayoutDestroy(Adesc_col_major); + cublasLtMatrixLayoutDestroy(Bdesc_col_major); + cublasLtMatrixLayoutDestroy(Cdesc_col_major); + cudaFree(Adev); + cudaFree(Bdev); + cudaFree(Cdev); + + return !error; +} + +// igemmlt +bool test5() { + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + cudaMalloc(&Adev, m * k * sizeof(int8_t)); + cudaMalloc(&Bdev, n * k * sizeof(int8_t)); + cudaMalloc(&Cdev, m * n * sizeof(int8_t)); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + cudaMemcpy(Adev, Ahost, m * k * sizeof(int8_t), cudaMemcpyHostToDevice); + cudaMemcpy(Bdev, Bhost, n * k * sizeof(int8_t), cudaMemcpyHostToDevice); + + cublasLtMatrixLayout_t Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + cublasLtMatrixLayoutCreate(&Adesc_col_major, CUDA_R_8I, m, k, lda); + cublasLtMatrixLayoutCreate(&Bdesc_col_major, CUDA_R_8I, n, k, ldb); + cublasLtMatrixLayoutCreate(&Cdesc_col_major, CUDA_R_8I, m, n, ldc); + + // Convert A and B + cublasLtMatrixLayout_t Adesc_col32 = NULL, Bdesc_col32_2r_4r4 = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col32_2r_4r4; + int8_t *C_col32; + cudaMalloc(&A_col32, m * 32 * sizeof(std::int8_t)); + cudaMalloc(&B_col32_2r_4r4, + ((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t)); + cudaMalloc(&C_col32, m * 32 * sizeof(std::int8_t)); + cublasLtMatrixLayoutCreate(&Adesc_col32, CUDA_R_8I, m, k, m * 32); + cublasLtMatrixLayoutCreate(&Bdesc_col32_2r_4r4, CUDA_R_8I, k, n, + ((n + 32 - 1) / 32) * 32 * 32); + cublasLtMatrixLayoutCreate(&Cdesc_col32, CUDA_R_8I, m, n, m * 32); + cublasLtOrder_t col32 = CUBLASLT_ORDER_COL32; + cublasLtOrder_t col32_2r_4r4 = CUBLASLT_ORDER_COL32_2R_4R4; + cublasLtMatrixLayoutSetAttribute(Adesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + cublasLtMatrixLayoutSetAttribute(Bdesc_col32_2r_4r4, + CUBLASLT_MATRIX_LAYOUT_ORDER, &col32_2r_4r4, + sizeof(col32_2r_4r4)); + cublasLtMatrixLayoutSetAttribute(Cdesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col32_2r_4r4, 8 * 32, + Bdesc_col32_2r_4r4); + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col32_2r_4r4, C_col32, + nullptr, m * 32, ((n + 8 - 1) / 8) * 8 * 32, + m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + cudaStreamSynchronize(0); + + // Check result + int8_t Chost[m * n]; + cudaMemcpy(Chost, Cdev, m * n * sizeof(int8_t), cudaMemcpyDeviceToHost); + + bool error = false; + int8_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + cublasLtDestroy(ltHandle); + cublasLtMatrixLayoutDestroy(Adesc_col32); + cublasLtMatrixLayoutDestroy(Bdesc_col32_2r_4r4); + cublasLtMatrixLayoutDestroy(Cdesc_col32); + cublasLtMatrixLayoutDestroy(Adesc_col_major); + cublasLtMatrixLayoutDestroy(Bdesc_col_major); + cublasLtMatrixLayoutDestroy(Cdesc_col_major); + cudaFree(Adev); + cudaFree(Bdev); + cudaFree(Cdev); + + return !error; +} + +// igemmlt +bool test6() { + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + cudaMalloc(&Adev, m * k * sizeof(int8_t)); + cudaMalloc(&Bdev, n * k * sizeof(int8_t)); + cudaMalloc(&Cdev, m * n * sizeof(int8_t)); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + cudaMemcpy(Adev, Ahost, m * k * sizeof(int8_t), cudaMemcpyHostToDevice); + cudaMemcpy(Bdev, Bhost, n * k * sizeof(int8_t), cudaMemcpyHostToDevice); + + cublasLtMatrixLayout_t Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + cublasLtMatrixLayoutCreate(&Adesc_col_major, CUDA_R_8I, m, k, lda); + cublasLtMatrixLayoutCreate(&Bdesc_col_major, CUDA_R_8I, n, k, ldb); + cublasLtMatrixLayoutCreate(&Cdesc_col_major, CUDA_R_8I, m, n, ldc); + + // Convert A and B + cublasLtMatrixLayout_t Adesc_col32 = NULL, Bdesc_col32_2r_4r4 = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col32_2r_4r4; + int8_t *C_col32; + cudaMalloc(&A_col32, m * 32 * sizeof(std::int8_t)); + cudaMalloc(&B_col32_2r_4r4, + ((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t)); + cudaMalloc(&C_col32, m * 32 * sizeof(std::int8_t)); + cublasLtMatrixLayoutCreate(&Adesc_col32, CUDA_R_8I, m, k, m * 32); + cublasLtMatrixLayoutCreate(&Bdesc_col32_2r_4r4, CUDA_R_8I, k, n, + ((n + 32 - 1) / 32) * 32 * 32); + cublasLtMatrixLayoutCreate(&Cdesc_col32, CUDA_R_8I, m, n, m * 32); + cublasLtOrder_t col32 = CUBLASLT_ORDER_COL32; + cublasLtOrder_t col32_2r_4r4 = CUBLASLT_ORDER_COL32_2R_4R4; + cublasLtMatrixLayoutSetAttribute(Adesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + cublasLtMatrixLayoutSetAttribute(Bdesc_col32_2r_4r4, + CUBLASLT_MATRIX_LAYOUT_ORDER, &col32_2r_4r4, + sizeof(col32_2r_4r4)); + cublasLtMatrixLayoutSetAttribute(Cdesc_col32, CUBLASLT_MATRIX_LAYOUT_ORDER, + &col32, sizeof(col32)); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col32_2r_4r4, 8 * 32, + Bdesc_col32_2r_4r4); + + float *alpha; + cudaMallocManaged(&alpha, 4 * sizeof(float)); + alpha[0] = 0; + alpha[1] = 1; + alpha[2] = 2; + alpha[3] = 3; + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col32_2r_4r4, C_col32, + alpha, m * 32, ((n + 8 - 1) / 8) * 8 * 32, m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + cudaStreamSynchronize(0); + + // Check result + int8_t Chost[m * n]; + cudaMemcpy(Chost, Cdev, m * n * sizeof(int8_t), cudaMemcpyDeviceToHost); + + bool error = false; + int8_t C_ref[m * n] = {0, 17, 40, 69, 0, 6, 16, 30}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + cublasLtDestroy(ltHandle); + cublasLtMatrixLayoutDestroy(Adesc_col32); + cublasLtMatrixLayoutDestroy(Bdesc_col32_2r_4r4); + cublasLtMatrixLayoutDestroy(Cdesc_col32); + cublasLtMatrixLayoutDestroy(Adesc_col_major); + cublasLtMatrixLayoutDestroy(Bdesc_col_major); + cublasLtMatrixLayoutDestroy(Cdesc_col_major); + cudaFree(Adev); + cudaFree(Bdev); + cudaFree(Cdev); + cudaFree(alpha); + + return !error; +} + +// clang-format off +// A (4*3) B (2*3) +// 6 10 14 5 -3 1 +// 7 11 15 4 -2 0 +// 8 12 16 +// 9 13 17 +// +// alpha * A * op(B) = alpha * C = C +// 0 6 10 14 5 4 0 14 4 0 0 +// 1 7 11 15 -3 -2 1 17 6 17 6 +// 2 8 12 16 1 0 2 20 8 40 16 +// 3 9 13 17 3 23 10 69 30 +// +// alpha * A * op(B) = alpha * C = C +// 1 6 10 14 5 4 1 14 4 14 4 +// 7 11 15 -3 -2 17 6 17 6 +// 8 12 16 1 0 20 8 20 8 +// 9 13 17 23 10 23 10 +// clang-format on + +int main() { + bool pass = true; + pass = test1() && pass; + pass = test2() && pass; + pass = test3() && pass; + pass = test4() && pass; + pass = test5() && pass; + pass = test6() && pass; + return pass ? 0 : 1; +} diff --git a/features/feature_case/cublasLt/transform.cu b/features/feature_case/cublasLt/transform.cu new file mode 100644 index 00000000..3a1205ad --- /dev/null +++ b/features/feature_case/cublasLt/transform.cu @@ -0,0 +1,600 @@ +// ===------------ transform.cu -------------------------- *- CUDA -* ----=== // +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#include "cublasLt.h" +#include + +void transform(cublasLtHandle_t ltHandle, void *in, int ld_in, + cublasLtOrder_t order_in, void *out, int ld_out, + cublasLtOrder_t order_out, int dim1, int dim2) { + cublasLtMatrixLayout_t in_desc = NULL, out_desc = NULL; + cublasLtMatrixTransformDesc_t transform_desc = NULL; + + cublasLtMatrixLayoutCreate(&in_desc, CUDA_R_8I, dim1, dim2, ld_in); + cublasLtMatrixLayoutCreate(&out_desc, CUDA_R_8I, dim1, dim2, ld_out); + + cublasLtMatrixLayoutSetAttribute(in_desc, CUBLASLT_MATRIX_LAYOUT_ORDER, + &order_in, sizeof(order_in)); + cublasLtMatrixLayoutSetAttribute(out_desc, CUBLASLT_MATRIX_LAYOUT_ORDER, + &order_out, sizeof(order_out)); + + cublasLtMatrixTransformDescCreate(&transform_desc, CUDA_R_32F); + + float alpha = 1.0f, beta = 0.0f; + cublasLtMatrixTransform(ltHandle, transform_desc, &alpha, in, in_desc, &beta, + NULL, NULL, out, out_desc, 0); + + cublasLtMatrixLayoutDestroy(in_desc); + cublasLtMatrixLayoutDestroy(out_desc); + cublasLtMatrixTransformDescDestroy(transform_desc); +} + +bool test_ROW() { + const constexpr int m = 2; + const constexpr int n = 33; + const constexpr int in_ld = 4; + void *in_dev; + cudaMalloc(&in_dev, n * in_ld * sizeof(int8_t)); + + int8_t in_host[n * in_ld]; + int8_t value = 0; + for (int i = 0; i < n * in_ld; i++) { + if (i % 4 < 2) { + in_host[i] = value; + value++; + } else + in_host[i] = 99; + } + int8_t ref_2nd[n * in_ld]; + std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); + + cudaMemcpy(in_dev, in_host, n * in_ld * sizeof(int8_t), + cudaMemcpyHostToDevice); + + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + + void *out_dev; + const constexpr int out_ld = 36; + cudaMalloc(&out_dev, out_ld * m * sizeof(int8_t)); + cudaMemset(out_dev, 0, out_ld * m * sizeof(int8_t)); + transform(ltHandle, in_dev, in_ld, CUBLASLT_ORDER_COL, out_dev, out_ld, + CUBLASLT_ORDER_ROW, m, n); + + int8_t out_host[out_ld * m]; + cudaMemcpy(out_host, out_dev, out_ld * m * sizeof(int8_t), + cudaMemcpyDeviceToHost); + + bool pass_1st = true; + int8_t ref_1st[out_ld * m] = + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 0, 0, 0, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 0, 0, 0}; + for (int i = 0; i < out_ld * m; i++) { + if (i % out_ld < n) { + if (out_host[i] != ref_1st[i]) { + pass_1st = false; + break; + } + } + } + + for (int i = 0; i < out_ld * m; i++) { + printf("%d, ", out_host[i]); + } + printf("\n"); + if (pass_1st) { + printf("ROW 1st pass\n"); + } else { + printf("ROW 1st fail\n"); + } + + cudaMemset(in_dev, 0, n * in_ld * sizeof(int8_t)); + std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); + transform(ltHandle, out_dev, out_ld, CUBLASLT_ORDER_ROW, in_dev, in_ld, + CUBLASLT_ORDER_COL, m, n); + cudaMemcpy(in_host, in_dev, n * in_ld * sizeof(int8_t), + cudaMemcpyDeviceToHost); + + bool pass_2nd = true; + for (int i = 0; i < n * in_ld; i++) { + if (i % in_ld < m) { + if (in_host[i] != ref_2nd[i]) { + pass_2nd = false; + break; + } + } + } + + for (int i = 0; i < n * in_ld; i++) { + printf("%d, ", in_host[i]); + } + printf("\n"); + if (pass_2nd) { + printf("ROW 2nd pass\n"); + } else { + printf("ROW 2nd fail\n"); + } + + cublasLtDestroy(ltHandle); + + return pass_1st && pass_2nd; +} + +bool test_COL32() { + const constexpr int m = 2; + const constexpr int n = 33; + const constexpr int in_ld = 4; + void *in_dev; + cudaMalloc(&in_dev, n * in_ld * sizeof(int8_t)); + + int8_t in_host[n * in_ld]; + int8_t value = 0; + for (int i = 0; i < n * in_ld; i++) { + if (i % 4 < 2) { + in_host[i] = value; + value++; + } else + in_host[i] = 99; + } + int8_t ref_2nd[n * in_ld]; + std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); + + cudaMemcpy(in_dev, in_host, n * in_ld * sizeof(int8_t), + cudaMemcpyHostToDevice); + + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + + void *out_dev; + const constexpr int out_ld = 64; + cudaMalloc(&out_dev, out_ld * m * sizeof(int8_t)); + cudaMemset(out_dev, 0, out_ld * m * sizeof(int8_t)); + transform(ltHandle, in_dev, in_ld, CUBLASLT_ORDER_COL, out_dev, out_ld, + CUBLASLT_ORDER_COL32, m, n); + + int8_t out_host[out_ld * m]; + cudaMemcpy(out_host, out_dev, out_ld * m * sizeof(int8_t), + cudaMemcpyDeviceToHost); + + bool pass_1st = true; + int8_t ref_1st[out_ld * m] = + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, + 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < out_ld * m; i++) { + if (i % out_ld < n) { + if (out_host[i] != ref_1st[i]) { + pass_1st = false; + break; + } + } + } + + for (int i = 0; i < out_ld * m; i++) { + printf("%d, ", out_host[i]); + } + printf("\n"); + if (pass_1st) { + printf("COL32 1st pass\n"); + } else { + printf("COL32 1st fail\n"); + } + + cudaMemset(in_dev, 0, n * in_ld * sizeof(int8_t)); + std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); + transform(ltHandle, out_dev, out_ld, CUBLASLT_ORDER_COL32, in_dev, in_ld, + CUBLASLT_ORDER_COL, m, n); + cudaMemcpy(in_host, in_dev, n * in_ld * sizeof(int8_t), + cudaMemcpyDeviceToHost); + + bool pass_2nd = true; + for (int i = 0; i < n * in_ld; i++) { + if (i % in_ld < m) { + if (in_host[i] != ref_2nd[i]) { + pass_2nd = false; + break; + } + } + } + + for (int i = 0; i < n * in_ld; i++) { + printf("%d, ", in_host[i]); + } + printf("\n"); + if (pass_2nd) { + printf("COL32 2nd pass\n"); + } else { + printf("COL32 2nd fail\n"); + } + + cublasLtDestroy(ltHandle); + + return pass_1st && pass_2nd; +} + +bool test_COL4_4R2_8C() { + const constexpr int m = 2; + const constexpr int n = 33; + const constexpr int in_ld = 4; + void *in_dev; + cudaMalloc(&in_dev, n * in_ld * sizeof(int8_t)); + + int8_t in_host[n * in_ld]; + int8_t value = 0; + for (int i = 0; i < n * in_ld; i++) { + if (i % 4 < 2) { + in_host[i] = value; + value++; + } else + in_host[i] = 99; + } + int8_t ref_2nd[n * in_ld]; + std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); + + cudaMemcpy(in_dev, in_host, n * in_ld * sizeof(int8_t), + cudaMemcpyHostToDevice); + + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + + void *out_dev; + const constexpr int out_ld = (32 * 8) * 2; + cudaMalloc(&out_dev, out_ld * m * sizeof(int8_t)); + cudaMemset(out_dev, 0, out_ld * m * sizeof(int8_t)); + transform(ltHandle, in_dev, in_ld, CUBLASLT_ORDER_COL, out_dev, out_ld, + CUBLASLT_ORDER_COL4_4R2_8C, m, n); + + int8_t out_host[out_ld * m]; + cudaMemcpy(out_host, out_dev, out_ld * m * sizeof(int8_t), + cudaMemcpyDeviceToHost); + + bool pass_1st = true; + int8_t ref_1st[out_ld * m] = + {0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 16, 18, 20, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 24, 26, 28, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 34, 36, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 40, 42, 44, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 48, 50, 52, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 56, 58, 60, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 17, 19, 21, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 25, 27, 29, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 33, 35, 37, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 41, 43, 45, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 49, 51, 53, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 57, 59, 61, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < out_ld * m; i++) { + if (i % out_ld < n) { + if (out_host[i] != ref_1st[i]) { + pass_1st = false; + break; + } + } + } + + for (int i = 0; i < out_ld * m; i++) { + printf("%d, ", out_host[i]); + } + printf("\n"); + if (pass_1st) { + printf("COL4_4R2_8C 1st pass\n"); + } else { + printf("COL4_4R2_8C 1st fail\n"); + } + + cudaMemset(in_dev, 0, n * in_ld * sizeof(int8_t)); + std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); + transform(ltHandle, out_dev, out_ld, CUBLASLT_ORDER_COL4_4R2_8C, in_dev, + in_ld, CUBLASLT_ORDER_COL, m, n); + cudaMemcpy(in_host, in_dev, n * in_ld * sizeof(int8_t), + cudaMemcpyDeviceToHost); + + bool pass_2nd = true; + for (int i = 0; i < n * in_ld; i++) { + if (i % in_ld < m) { + if (in_host[i] != ref_2nd[i]) { + pass_2nd = false; + break; + } + } + } + + for (int i = 0; i < n * in_ld; i++) { + printf("%d, ", in_host[i]); + } + printf("\n"); + if (pass_2nd) { + printf("COL4_4R2_8C 2nd pass\n"); + } else { + printf("COL4_4R2_8C 2nd fail\n"); + } + + cublasLtDestroy(ltHandle); + + return pass_1st && pass_2nd; +} + +bool test_COL32_2R_4R4() { + const constexpr int m = 2; + const constexpr int n = 33; + const constexpr int in_ld = 4; + void *in_dev; + cudaMalloc(&in_dev, n * in_ld * sizeof(int8_t)); + + int8_t in_host[n * in_ld]; + int8_t value = 0; + for (int i = 0; i < n * in_ld; i++) { + if (i % 4 < 2) { + in_host[i] = value; + value++; + } else + in_host[i] = 99; + } + int8_t ref_2nd[n * in_ld]; + std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); + + cudaMemcpy(in_dev, in_host, n * in_ld * sizeof(int8_t), + cudaMemcpyHostToDevice); + + cublasLtHandle_t ltHandle; + cublasLtCreate(<Handle); + + void *out_dev; + const constexpr int out_ld = (32 * 32) * 2; + cudaMalloc(&out_dev, out_ld * m * sizeof(int8_t)); + cudaMemset(out_dev, 0, out_ld * m * sizeof(int8_t)); + transform(ltHandle, in_dev, in_ld, CUBLASLT_ORDER_COL, out_dev, out_ld, + CUBLASLT_ORDER_COL32_2R_4R4, m, n); + + int8_t out_host[out_ld * m]; + cudaMemcpy(out_host, out_dev, out_ld * m * sizeof(int8_t), + cudaMemcpyDeviceToHost); + + bool pass_1st = true; + int8_t ref_1st[out_ld * m] = + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < out_ld * m; i++) { + if (i % out_ld < n) { + if (out_host[i] != ref_1st[i]) { + pass_1st = false; + break; + } + } + } + + for (int i = 0; i < out_ld * m; i++) { + printf("%d, ", out_host[i]); + } + printf("\n"); + if (pass_1st) { + printf("COL32_2R_4R4 1st pass\n"); + } else { + printf("COL32_2R_4R4 1st fail\n"); + } + + cudaMemset(in_dev, 0, n * in_ld * sizeof(int8_t)); + std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); + transform(ltHandle, out_dev, out_ld, CUBLASLT_ORDER_COL32_2R_4R4, in_dev, + in_ld, CUBLASLT_ORDER_COL, m, n); + cudaMemcpy(in_host, in_dev, n * in_ld * sizeof(int8_t), + cudaMemcpyDeviceToHost); + + bool pass_2nd = true; + for (int i = 0; i < n * in_ld; i++) { + if (i % in_ld < m) { + if (in_host[i] != ref_2nd[i]) { + pass_2nd = false; + break; + } + } + } + + for (int i = 0; i < n * in_ld; i++) { + printf("%d, ", in_host[i]); + } + printf("\n"); + if (pass_2nd) { + printf("COL32_2R_4R4 2nd pass\n"); + } else { + printf("COL32_2R_4R4 2nd fail\n"); + } + + cublasLtDestroy(ltHandle); + + return pass_1st && pass_2nd; +} + +// Input col_major matrix: +// 2 rows * 33 columns, ld is 4 +int main() { + bool pass = true; + pass = test_ROW() && pass; + pass = test_COL32() && pass; + pass = test_COL4_4R2_8C() && pass; + pass = test_COL32_2R_4R4() && pass; + return pass ? 0 : 1; +} From 2f0f6cee2ec6163e8dc8f2d7827fa819677931b5 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Mon, 24 Jun 2024 16:11:26 +0800 Subject: [PATCH 2/4] [SYCLomatic #1993] Add test for cublasLt API migration Signed-off-by: Jiang, Zhiwei --- features/config/TEMPLATE_cublasLt.xml | 13 + .../cublasLt/out/MainSourceFiles.yaml | 3846 +++++++++++++++++ .../feature_case/cublasLt/out/matmul.dp.cpp | 742 ++++ .../cublasLt/out/transform.dp.cpp | 597 +++ features/features.xml | 2 + features/test_feature.py | 4 +- help_function/help_function.xml | 1 + .../src/blas_gemm_utils_interface.cpp | 152 + help_function/test_help.py | 2 +- 9 files changed, 5356 insertions(+), 3 deletions(-) create mode 100644 features/config/TEMPLATE_cublasLt.xml create mode 100644 features/feature_case/cublasLt/out/MainSourceFiles.yaml create mode 100644 features/feature_case/cublasLt/out/matmul.dp.cpp create mode 100644 features/feature_case/cublasLt/out/transform.dp.cpp create mode 100644 help_function/src/blas_gemm_utils_interface.cpp diff --git a/features/config/TEMPLATE_cublasLt.xml b/features/config/TEMPLATE_cublasLt.xml new file mode 100644 index 00000000..cb15eeed --- /dev/null +++ b/features/config/TEMPLATE_cublasLt.xml @@ -0,0 +1,13 @@ + + + + test + + + + + + + + + diff --git a/features/feature_case/cublasLt/out/MainSourceFiles.yaml b/features/feature_case/cublasLt/out/MainSourceFiles.yaml new file mode 100644 index 00000000..d8674ec9 --- /dev/null +++ b/features/feature_case/cublasLt/out/MainSourceFiles.yaml @@ -0,0 +1,3846 @@ +--- +MainSourceFile: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/MainSrcFiles_placehold' +Replacements: + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 370 + Length: 22 + ReplacementText: "#include \n#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 431 + Length: 0 + ReplacementText: "\n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 851 + Length: 14 + ReplacementText: int + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 894 + Length: 21 + ReplacementText: '0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1143 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1279 + Length: 0 + ReplacementText: ' try ' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1280 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1308 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::matmul_desc_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1352 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1421 + Length: 17 + ReplacementText: 'oneapi::mkl::transpose' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1445 + Length: 11 + ReplacementText: 'oneapi::mkl::transpose::trans' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1462 + Length: 21 + ReplacementText: 'dpct::blas_gemm::experimental::pointer_mode_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1495 + Length: 51 + ReplacementText: 'dpct::blas_gemm::experimental::pointer_mode_t::alpha_device_vector_beta_zero' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1552 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1576 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1602 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1631 + Length: 26 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1663 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1692 + Length: 27 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1757 + Length: 56 + ReplacementText: 'DPCT_CHECK_ERROR(Adesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1851 + Length: 56 + ReplacementText: 'DPCT_CHECK_ERROR(Bdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 1946 + Length: 92 + ReplacementText: 'DPCT_CHECK_ERROR(Adesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 2108 + Length: 102 + ReplacementText: 'DPCT_CHECK_ERROR(Bdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col_turing))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 2259 + Length: 102 + ReplacementText: 'DPCT_CHECK_ERROR(Bdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col_ampere))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 2432 + Length: 69 + ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc = new dpct::blas_gemm::experimental::matmul_desc_t(dpct::compute_type::i32, dpct::library_data_t::real_int32))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 2541 + Length: 90 + ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_b, &opT))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 2671 + Length: 57 + ReplacementText: 'DPCT_CHECK_ERROR(Cdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 2768 + Length: 92 + ReplacementText: 'DPCT_CHECK_ERROR(Cdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 2931 + Length: 128 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, &alpha, A, Adesc, B, Bdesc, &beta, (int32_t*)C, Cdesc, (int32_t*)C, Cdesc, 0))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 3120 + Length: 69 + ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc = new dpct::blas_gemm::experimental::matmul_desc_t(dpct::compute_type::i32, dpct::library_data_t::real_float))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 3229 + Length: 90 + ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_b, &opT))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 3359 + Length: 56 + ReplacementText: 'DPCT_CHECK_ERROR(Cdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 3455 + Length: 92 + ReplacementText: 'DPCT_CHECK_ERROR(Cdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 3660 + Length: 126 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, &alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, 0))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 3855 + Length: 106 + ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::pointer_mode, &alphaVec))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4003 + Length: 129 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, NULL, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, 0))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4154 + Length: 24 + ReplacementText: 'q_ct1.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4227 + Length: 34 + ReplacementText: 'DPCT_CHECK_ERROR(delete (Cdesc))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4310 + Length: 34 + ReplacementText: 'DPCT_CHECK_ERROR(delete (Bdesc))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4393 + Length: 34 + ReplacementText: 'DPCT_CHECK_ERROR(delete (Adesc))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4481 + Length: 37 + ReplacementText: 'DPCT_CHECK_ERROR(delete (matmulDesc))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4600 + Length: 0 + ReplacementText: "\ncatch (sycl::exception const &exc) {\n std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n std::exit(1);\n}" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4617 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4686 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4758 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4797 + Length: 29 + ReplacementText: 'dpct::blas_gemm::experimental::transform_desc_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4852 + Length: 62 + ReplacementText: 'transform_desc = new dpct::blas_gemm::experimental::transform_desc_t(dpct::library_data_t::real_float)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 4953 + Length: 137 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_transform(transform_desc, &alpha, in, layout_in, &beta, NULL, NULL, out, layout_out, 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5094 + Length: 50 + ReplacementText: 'delete (transform_desc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5193 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5196 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5225 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5428 + Length: 41 + ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5473 + Length: 41 + ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5518 + Length: 42 + ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int32_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5682 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5728 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5757 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5803 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5828 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5833 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 5955 + Length: 66 + ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6025 + Length: 66 + ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6095 + Length: 67 + ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6188 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6359 + Length: 50 + ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6413 + Length: 76 + ReplacementText: 'B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6493 + Length: 51 + ReplacementText: 'C_col32 = (int32_t *)sycl::malloc_device(m * 32 * sizeof(std::int32_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6548 + Length: 65 + ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6617 + Length: 120 + ReplacementText: 'Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6741 + Length: 66 + ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6811 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6835 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6859 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6889 + Length: 26 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 6919 + Length: 133 + ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 7056 + Length: 186 + ReplacementText: 'Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 7246 + Length: 133 + ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 7873 + Length: 24 + ReplacementText: 'q_ct1.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 7944 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 7991 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8016 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8388 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8417 + Length: 40 + ReplacementText: 'delete (Adesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8461 + Length: 46 + ReplacementText: 'delete (Bdesc_col4_4r2_8c)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8511 + Length: 40 + ReplacementText: 'delete (Cdesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8555 + Length: 44 + ReplacementText: 'delete (Adesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8603 + Length: 44 + ReplacementText: 'delete (Bdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8651 + Length: 44 + ReplacementText: 'delete (Cdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8699 + Length: 14 + ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8717 + Length: 14 + ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8735 + Length: 14 + ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8815 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8818 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 8847 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9050 + Length: 41 + ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9095 + Length: 41 + ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9140 + Length: 41 + ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9303 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9349 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9378 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9424 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9449 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9454 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9576 + Length: 66 + ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9646 + Length: 66 + ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9716 + Length: 66 + ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9808 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 9978 + Length: 50 + ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10032 + Length: 76 + ReplacementText: 'B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10112 + Length: 50 + ReplacementText: 'C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10166 + Length: 65 + ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10235 + Length: 120 + ReplacementText: 'Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10359 + Length: 65 + ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10428 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10452 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10476 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10506 + Length: 26 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10536 + Length: 133 + ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10673 + Length: 186 + ReplacementText: 'Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 10863 + Length: 133 + ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 11487 + Length: 24 + ReplacementText: 'q_ct1.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 11557 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 11603 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 11628 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 11999 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12028 + Length: 40 + ReplacementText: 'delete (Adesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12072 + Length: 46 + ReplacementText: 'delete (Bdesc_col4_4r2_8c)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12122 + Length: 40 + ReplacementText: 'delete (Cdesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12166 + Length: 44 + ReplacementText: 'delete (Adesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12214 + Length: 44 + ReplacementText: 'delete (Bdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12262 + Length: 44 + ReplacementText: 'delete (Cdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12310 + Length: 14 + ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12328 + Length: 14 + ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12346 + Length: 14 + ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12426 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12429 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12458 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12661 + Length: 41 + ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12706 + Length: 41 + ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12751 + Length: 41 + ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12914 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12960 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 12989 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13035 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13060 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13065 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13187 + Length: 66 + ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13257 + Length: 66 + ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13327 + Length: 66 + ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13419 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13589 + Length: 50 + ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13643 + Length: 76 + ReplacementText: 'B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13723 + Length: 50 + ReplacementText: 'C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13777 + Length: 65 + ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13846 + Length: 120 + ReplacementText: 'Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 13970 + Length: 65 + ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 14039 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 14063 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 14087 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 14117 + Length: 26 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 14147 + Length: 133 + ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 14284 + Length: 186 + ReplacementText: 'Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 14474 + Length: 133 + ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 14815 + Length: 44 + ReplacementText: 'alpha = sycl::malloc_shared(4, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15197 + Length: 24 + ReplacementText: 'q_ct1.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15267 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15313 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15338 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15709 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15738 + Length: 40 + ReplacementText: 'delete (Adesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15782 + Length: 46 + ReplacementText: 'delete (Bdesc_col4_4r2_8c)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15832 + Length: 40 + ReplacementText: 'delete (Cdesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15876 + Length: 44 + ReplacementText: 'delete (Adesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15924 + Length: 44 + ReplacementText: 'delete (Bdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 15972 + Length: 44 + ReplacementText: 'delete (Cdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16020 + Length: 14 + ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16038 + Length: 14 + ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16056 + Length: 14 + ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16074 + Length: 15 + ReplacementText: 'dpct::dpct_free(alpha, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16156 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16159 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16188 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16391 + Length: 41 + ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16436 + Length: 41 + ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16481 + Length: 42 + ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int32_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16645 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16691 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16720 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16766 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16791 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16796 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16918 + Length: 66 + ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 16988 + Length: 66 + ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17058 + Length: 67 + ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17151 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17324 + Length: 50 + ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17378 + Length: 93 + ReplacementText: 'B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17475 + Length: 51 + ReplacementText: 'C_col32 = (int32_t *)sycl::malloc_device(m * 32 * sizeof(std::int32_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17530 + Length: 65 + ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17599 + Length: 124 + ReplacementText: 'Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17727 + Length: 66 + ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17797 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17821 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17845 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17876 + Length: 27 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 17907 + Length: 133 + ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 18044 + Length: 189 + ReplacementText: 'Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 18237 + Length: 133 + ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 18867 + Length: 24 + ReplacementText: 'q_ct1.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 18938 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 18985 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19010 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19382 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19411 + Length: 40 + ReplacementText: 'delete (Adesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19455 + Length: 47 + ReplacementText: 'delete (Bdesc_col32_2r_4r4)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19506 + Length: 40 + ReplacementText: 'delete (Cdesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19550 + Length: 44 + ReplacementText: 'delete (Adesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19598 + Length: 44 + ReplacementText: 'delete (Bdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19646 + Length: 44 + ReplacementText: 'delete (Cdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19694 + Length: 14 + ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19712 + Length: 14 + ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19730 + Length: 14 + ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19810 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19813 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 19842 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20045 + Length: 41 + ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20090 + Length: 41 + ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20135 + Length: 41 + ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20298 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20344 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20373 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20419 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20444 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20449 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20571 + Length: 66 + ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20641 + Length: 66 + ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20711 + Length: 66 + ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20803 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 20975 + Length: 50 + ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21029 + Length: 93 + ReplacementText: 'B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21126 + Length: 50 + ReplacementText: 'C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21180 + Length: 65 + ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21249 + Length: 124 + ReplacementText: 'Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21377 + Length: 65 + ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21446 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21470 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21494 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21525 + Length: 27 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21556 + Length: 133 + ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21693 + Length: 189 + ReplacementText: 'Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 21886 + Length: 133 + ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 22513 + Length: 24 + ReplacementText: 'q_ct1.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 22583 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 22629 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 22654 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23025 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23054 + Length: 40 + ReplacementText: 'delete (Adesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23098 + Length: 47 + ReplacementText: 'delete (Bdesc_col32_2r_4r4)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23149 + Length: 40 + ReplacementText: 'delete (Cdesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23193 + Length: 44 + ReplacementText: 'delete (Adesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23241 + Length: 44 + ReplacementText: 'delete (Bdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23289 + Length: 44 + ReplacementText: 'delete (Cdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23337 + Length: 14 + ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23355 + Length: 14 + ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23373 + Length: 14 + ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23453 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23456 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23485 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23688 + Length: 41 + ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23733 + Length: 41 + ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23778 + Length: 41 + ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23941 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 23987 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24016 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24062 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24087 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24092 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24214 + Length: 66 + ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24284 + Length: 66 + ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24354 + Length: 66 + ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24446 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24618 + Length: 50 + ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24672 + Length: 93 + ReplacementText: 'B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24769 + Length: 50 + ReplacementText: 'C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24823 + Length: 65 + ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 24892 + Length: 124 + ReplacementText: 'Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 25020 + Length: 65 + ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 25089 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 25113 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 25137 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 25168 + Length: 27 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 25199 + Length: 133 + ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 25336 + Length: 189 + ReplacementText: 'Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 25529 + Length: 133 + ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 25872 + Length: 44 + ReplacementText: 'alpha = sycl::malloc_shared(4, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26255 + Length: 24 + ReplacementText: 'q_ct1.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26325 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26371 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26396 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26767 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26796 + Length: 40 + ReplacementText: 'delete (Adesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26840 + Length: 47 + ReplacementText: 'delete (Bdesc_col32_2r_4r4)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26891 + Length: 40 + ReplacementText: 'delete (Cdesc_col32)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26935 + Length: 44 + ReplacementText: 'delete (Adesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 26983 + Length: 44 + ReplacementText: 'delete (Bdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 27031 + Length: 44 + ReplacementText: 'delete (Cdesc_col_major)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 27079 + Length: 14 + ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 27097 + Length: 14 + ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 27115 + Length: 14 + ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Offset: 27133 + Length: 15 + ReplacementText: 'dpct::dpct_free(alpha, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 370 + Length: 22 + ReplacementText: "#include \n#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 410 + Length: 0 + ReplacementText: "\n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 427 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 490 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 554 + Length: 15 + ReplacementText: 'dpct::blas_gemm::experimental::order_t' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 605 + Length: 22 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 663 + Length: 29 + ReplacementText: 'dpct::blas_gemm::experimental::transform_desc_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 719 + Length: 66 + ReplacementText: 'in_desc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, dim1, dim2, ld_in)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 789 + Length: 68 + ReplacementText: 'out_desc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, dim1, dim2, ld_out)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 862 + Length: 135 + ReplacementText: 'in_desc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &order_in)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 1001 + Length: 138 + ReplacementText: 'out_desc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &order_out)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 1144 + Length: 62 + ReplacementText: 'transform_desc = new dpct::blas_gemm::experimental::transform_desc_t(dpct::library_data_t::real_float)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 1246 + Length: 133 + ReplacementText: 'dpct::blas_gemm::experimental::matrix_transform(transform_desc, &alpha, in, in_desc, &beta, NULL, NULL, out, out_desc, 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 1384 + Length: 36 + ReplacementText: 'delete (in_desc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 1424 + Length: 37 + ReplacementText: 'delete (out_desc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 1465 + Length: 50 + ReplacementText: 'delete (transform_desc)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 1537 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 1648 + Length: 47 + ReplacementText: 'in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 1980 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2034 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2072 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2077 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2106 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2188 + Length: 49 + ReplacementText: 'out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2241 + Length: 10 + ReplacementText: q_ct1.memset + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2292 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2331 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2380 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::row' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2441 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2498 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 2536 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 3236 + Length: 10 + ReplacementText: q_ct1.memset + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 3285 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 3381 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::row' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 3428 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 3457 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 3511 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 3549 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 3927 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4008 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4119 + Length: 47 + ReplacementText: 'in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4451 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4505 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4543 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4548 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4577 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4659 + Length: 49 + ReplacementText: 'out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4712 + Length: 10 + ReplacementText: q_ct1.memset + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4763 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4802 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4851 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4914 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 4971 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 5009 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 5893 + Length: 10 + ReplacementText: q_ct1.memset + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 5942 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 6038 + Length: 20 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 6087 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 6116 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 6170 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 6208 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 6590 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 6677 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 6788 + Length: 47 + ReplacementText: 'in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7120 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7174 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7212 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7217 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7246 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7338 + Length: 49 + ReplacementText: 'out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7391 + Length: 10 + ReplacementText: q_ct1.memset + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7442 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7481 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7530 + Length: 26 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7599 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7656 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 7694 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 11838 + Length: 10 + ReplacementText: q_ct1.memset + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 11887 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 11983 + Length: 26 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 12038 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 12067 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 12121 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 12159 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 12553 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 12641 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 12752 + Length: 47 + ReplacementText: 'in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13084 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13138 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13176 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13181 + Length: 16 + ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13210 + Length: 25 + ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13303 + Length: 49 + ReplacementText: 'out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13356 + Length: 10 + ReplacementText: q_ct1.memset + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13407 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13446 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13495 + Length: 27 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13565 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13622 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 13660 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 27206 + Length: 10 + ReplacementText: q_ct1.memset + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 27255 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 27351 + Length: 27 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 27407 + Length: 18 + ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 27436 + Length: 10 + ReplacementText: q_ct1.memcpy + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 27490 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 27528 + Length: 0 + ReplacementText: '.wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Offset: 27924 + Length: 25 + ReplacementText: 'delete (ltHandle)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false +MainSourceFilesDigest: + - MainSourceFile: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' + Digest: c3a2078551737823d86d0b1d17ca234a + - MainSourceFile: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' + Digest: 373b38f255915be55702128932b0b290 +DpctVersion: 19.0.0 +MainHelperFileName: '' +USMLevel: '' +FeatureMap: {} +CompileTargets: {} +OptionMap: + AnalysisScopePath: + Value: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt' + Specified: false + AsyncHandler: + Value: 'false' + Specified: false + BuildScript: + Value: '0' + Specified: false + CodePinEnabled: + Value: 'false' + Specified: false + CommentsEnabled: + Value: 'false' + Specified: false + CompilationsDir: + Value: '' + Specified: false + CtadEnabled: + Value: 'false' + Specified: false + EnablepProfiling: + Value: 'false' + Specified: false + ExperimentalFlag: + Value: '0' + Specified: false + ExplicitNamespace: + Value: '20' + Specified: false + ExtensionDDFlag: + Value: '0' + Specified: false + ExtensionDEFlag: + Value: '4294967295' + Specified: false + HelperFuncPreferenceFlag: + Value: '0' + Specified: false + NDRangeDim: + Value: '3' + Specified: false + NoDRYPattern: + Value: 'false' + Specified: false + OptimizeMigration: + Value: 'false' + Specified: false + ProcessAll: + Value: 'false' + Specified: false + RuleFile: + Value: '' + Specified: false + SyclNamedLambda: + Value: 'false' + Specified: false + UsmLevel: + Value: '1' + Specified: false +... diff --git a/features/feature_case/cublasLt/out/matmul.dp.cpp b/features/feature_case/cublasLt/out/matmul.dp.cpp new file mode 100644 index 00000000..cdb4049e --- /dev/null +++ b/features/feature_case/cublasLt/out/matmul.dp.cpp @@ -0,0 +1,742 @@ +// ===------------ matmul.cu ----------------------------- *- CUDA -* ----=== // +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#include +#include +#include +#include +#include +#include + + +const constexpr int COL_TURING = 0; +const constexpr int COL_AMPERE = 1; + +// The original source of below two functions was under the license below: +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. +// +// Repo: https://github.com/TimDettmers/bitsandbytes.git +inline int checkCublasStatus(int status) { + if (status != 0) { + printf("cuBLAS API failed with status %d\n", status); + //throw std::logic_error("cuBLAS API failed"); + return 1; + } + return 0; +} + +template int igemmlt(dpct::blas_gemm::experimental::descriptor_ptr ltHandle, int m, int n, int k, const int8_t *A, const int8_t *B, void *C, float *row_scale, int lda, int ldb, int ldc) + try { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + int has_error = 0; + dpct::blas_gemm::experimental::matmul_desc_ptr matmulDesc = NULL; + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc = NULL, Bdesc = NULL, Cdesc = NULL; + oneapi::mkl::transpose opT = oneapi::mkl::transpose::trans; + dpct::blas_gemm::experimental::pointer_mode_t alphaVec = dpct::blas_gemm::experimental::pointer_mode_t::alpha_device_vector_beta_zero; + dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; + dpct::blas_gemm::experimental::order_t col_turing = dpct::blas_gemm::experimental::order_t::col4_4r2_8c; + dpct::blas_gemm::experimental::order_t col_ampere = dpct::blas_gemm::experimental::order_t::col32_2r_4r4; + + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Adesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda))); + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Bdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb))); + + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Adesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))); + if(FORMATB == COL_TURING) + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Bdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col_turing))); + else + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Bdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col_ampere))); + + if(DTYPE_OUT == 32) + { + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc = new dpct::blas_gemm::experimental::matmul_desc_t(dpct::compute_type::i32, dpct::library_data_t::real_int32))); + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_b, &opT))); + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Cdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc))); + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Cdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))); + int alpha = 1, beta = 0; + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, &alpha, A, Adesc, B, Bdesc, &beta, (int32_t*)C, Cdesc, (int32_t*)C, Cdesc, 0))); + } + else + { + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc = new dpct::blas_gemm::experimental::matmul_desc_t(dpct::compute_type::i32, dpct::library_data_t::real_float))); + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_b, &opT))); + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Cdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc))); + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Cdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))); + if(!SCALE_ROWS) + { + float alpha = 1.0f, beta = 0.0f; + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, &alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, 0))); + } + else + { + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::pointer_mode, &alphaVec))); + has_error |= checkCublasStatus(DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, NULL, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, 0))); + } + } + + q_ct1.wait(); + + if (Cdesc) has_error |= checkCublasStatus(DPCT_CHECK_ERROR(delete (Cdesc))); + if (Bdesc) has_error |= checkCublasStatus(DPCT_CHECK_ERROR(delete (Bdesc))); + if (Adesc) has_error |= checkCublasStatus(DPCT_CHECK_ERROR(delete (Adesc))); + if (matmulDesc) has_error |= checkCublasStatus(DPCT_CHECK_ERROR(delete (matmulDesc))); + if(has_error == 1) + printf("error detected"); + + return has_error; +} +catch (sycl::exception const &exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); +} + +void transform(dpct::blas_gemm::experimental::descriptor_ptr ltHandle, const void *in, int ld_in, + dpct::blas_gemm::experimental::matrix_layout_ptr layout_in, void *out, int ld_out, + dpct::blas_gemm::experimental::matrix_layout_ptr layout_out) { + dpct::blas_gemm::experimental::transform_desc_ptr transform_desc = NULL; + transform_desc = new dpct::blas_gemm::experimental::transform_desc_t(dpct::library_data_t::real_float); + float alpha = 1.0f, beta = 0.0f; + dpct::blas_gemm::experimental::matrix_transform(transform_desc, &alpha, in, layout_in, &beta, NULL, NULL, out, layout_out, 0); + delete (transform_desc); +} + +// igemmlt +bool test1() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); + Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); + Cdev = (void *)sycl::malloc_device(m * n * sizeof(int32_t), q_ct1); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); + q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); + Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); + Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc); + + // Convert A and B + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col4_4r2_8c = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col4_4r2_8c; + int32_t *C_col32; + A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1); + C_col32 = (int32_t *)sycl::malloc_device(m * 32 * sizeof(std::int32_t), q_ct1); + Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); + Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32); + Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, m * 32); + dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; + dpct::blas_gemm::experimental::order_t col4_4r2_8c = dpct::blas_gemm::experimental::order_t::col4_4r2_8c; + Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c); + Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col4_4r2_8c, 8 * 32, + Bdesc_col4_4r2_8c); + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col4_4r2_8c, C_col32, + nullptr, m * 32, ((n + 8 - 1) / 8) * 8 * 32, + m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + q_ct1.wait(); + + // Check result + int32_t Chost[m * n]; + q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int32_t)).wait(); + + bool error = false; + int32_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + delete (ltHandle); + delete (Adesc_col32); + delete (Bdesc_col4_4r2_8c); + delete (Cdesc_col32); + delete (Adesc_col_major); + delete (Bdesc_col_major); + delete (Cdesc_col_major); + dpct::dpct_free(Adev, q_ct1); + dpct::dpct_free(Bdev, q_ct1); + dpct::dpct_free(Cdev, q_ct1); + + return !error; +} + +// igemmlt +bool test2() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); + Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); + Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); + q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); + Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); + Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc); + + // Convert A and B + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col4_4r2_8c = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col4_4r2_8c; + int8_t *C_col32; + A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1); + C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); + Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32); + Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32); + dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; + dpct::blas_gemm::experimental::order_t col4_4r2_8c = dpct::blas_gemm::experimental::order_t::col4_4r2_8c; + Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c); + Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col4_4r2_8c, 8 * 32, + Bdesc_col4_4r2_8c); + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col4_4r2_8c, C_col32, + nullptr, m * 32, ((n + 8 - 1) / 8) * 8 * 32, + m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + q_ct1.wait(); + + // Check result + int8_t Chost[m * n]; + q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int8_t)).wait(); + + bool error = false; + int8_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + delete (ltHandle); + delete (Adesc_col32); + delete (Bdesc_col4_4r2_8c); + delete (Cdesc_col32); + delete (Adesc_col_major); + delete (Bdesc_col_major); + delete (Cdesc_col_major); + dpct::dpct_free(Adev, q_ct1); + dpct::dpct_free(Bdev, q_ct1); + dpct::dpct_free(Cdev, q_ct1); + + return !error; +} + +// igemmlt +bool test3() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); + Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); + Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); + q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); + Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); + Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc); + + // Convert A and B + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col4_4r2_8c = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col4_4r2_8c; + int8_t *C_col32; + A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1); + C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); + Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32); + Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32); + dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; + dpct::blas_gemm::experimental::order_t col4_4r2_8c = dpct::blas_gemm::experimental::order_t::col4_4r2_8c; + Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c); + Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col4_4r2_8c, 8 * 32, + Bdesc_col4_4r2_8c); + + float *alpha; + alpha = sycl::malloc_shared(4, q_ct1); + alpha[0] = 0; + alpha[1] = 1; + alpha[2] = 2; + alpha[3] = 3; + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col4_4r2_8c, C_col32, + alpha, m * 32, ((n + 8 - 1) / 8) * 8 * 32, m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + q_ct1.wait(); + + // Check result + int8_t Chost[m * n]; + q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int8_t)).wait(); + + bool error = false; + int8_t C_ref[m * n] = {0, 17, 40, 69, 0, 6, 16, 30}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + delete (ltHandle); + delete (Adesc_col32); + delete (Bdesc_col4_4r2_8c); + delete (Cdesc_col32); + delete (Adesc_col_major); + delete (Bdesc_col_major); + delete (Cdesc_col_major); + dpct::dpct_free(Adev, q_ct1); + dpct::dpct_free(Bdev, q_ct1); + dpct::dpct_free(Cdev, q_ct1); + dpct::dpct_free(alpha, q_ct1); + + return !error; +} + +// igemmlt +bool test4() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); + Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); + Cdev = (void *)sycl::malloc_device(m * n * sizeof(int32_t), q_ct1); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); + q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); + Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); + Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc); + + // Convert A and B + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col32_2r_4r4 = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col32_2r_4r4; + int32_t *C_col32; + A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1); + C_col32 = (int32_t *)sycl::malloc_device(m * 32 * sizeof(std::int32_t), q_ct1); + Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); + Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32); + Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, m * 32); + dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; + dpct::blas_gemm::experimental::order_t col32_2r_4r4 = dpct::blas_gemm::experimental::order_t::col32_2r_4r4; + Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4); + Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col32_2r_4r4, 8 * 32, + Bdesc_col32_2r_4r4); + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col32_2r_4r4, + C_col32, nullptr, m * 32, + ((n + 8 - 1) / 8) * 8 * 32, m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + q_ct1.wait(); + + // Check result + int32_t Chost[m * n]; + q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int32_t)).wait(); + + bool error = false; + int32_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + delete (ltHandle); + delete (Adesc_col32); + delete (Bdesc_col32_2r_4r4); + delete (Cdesc_col32); + delete (Adesc_col_major); + delete (Bdesc_col_major); + delete (Cdesc_col_major); + dpct::dpct_free(Adev, q_ct1); + dpct::dpct_free(Bdev, q_ct1); + dpct::dpct_free(Cdev, q_ct1); + + return !error; +} + +// igemmlt +bool test5() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); + Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); + Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); + q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); + Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); + Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc); + + // Convert A and B + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col32_2r_4r4 = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col32_2r_4r4; + int8_t *C_col32; + A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1); + C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); + Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32); + Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32); + dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; + dpct::blas_gemm::experimental::order_t col32_2r_4r4 = dpct::blas_gemm::experimental::order_t::col32_2r_4r4; + Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4); + Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col32_2r_4r4, 8 * 32, + Bdesc_col32_2r_4r4); + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col32_2r_4r4, C_col32, + nullptr, m * 32, ((n + 8 - 1) / 8) * 8 * 32, + m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + q_ct1.wait(); + + // Check result + int8_t Chost[m * n]; + q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int8_t)).wait(); + + bool error = false; + int8_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + delete (ltHandle); + delete (Adesc_col32); + delete (Bdesc_col32_2r_4r4); + delete (Cdesc_col32); + delete (Adesc_col_major); + delete (Bdesc_col_major); + delete (Cdesc_col_major); + dpct::dpct_free(Adev, q_ct1); + dpct::dpct_free(Bdev, q_ct1); + dpct::dpct_free(Cdev, q_ct1); + + return !error; +} + +// igemmlt +bool test6() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + const constexpr int m = 4; + const constexpr int n = 2; + const constexpr int k = 3; + int lda = m; + int ldb = n; + int ldc = m; + void *Adev; + void *Bdev; + void *Cdev; + Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); + Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); + Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1); + + int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; + + q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); + q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, + Cdesc_col_major = NULL; + Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); + Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); + Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc); + + // Convert A and B + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col32_2r_4r4 = NULL, + Cdesc_col32 = NULL; + int8_t *A_col32, *B_col32_2r_4r4; + int8_t *C_col32; + A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1); + C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); + Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); + Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32); + Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32); + dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; + dpct::blas_gemm::experimental::order_t col32_2r_4r4 = dpct::blas_gemm::experimental::order_t::col32_2r_4r4; + Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4); + Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); + + transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); + transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col32_2r_4r4, 8 * 32, + Bdesc_col32_2r_4r4); + + float *alpha; + alpha = sycl::malloc_shared(4, q_ct1); + alpha[0] = 0; + alpha[1] = 1; + alpha[2] = 2; + alpha[3] = 3; + + // Matmul + igemmlt(ltHandle, m, n, k, A_col32, B_col32_2r_4r4, C_col32, + alpha, m * 32, ((n + 8 - 1) / 8) * 8 * 32, m * 32); + + // Convert C + transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); + q_ct1.wait(); + + // Check result + int8_t Chost[m * n]; + q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int8_t)).wait(); + + bool error = false; + int8_t C_ref[m * n] = {0, 17, 40, 69, 0, 6, 16, 30}; + for (int i = 0; i < m * n; i++) { + if (Chost[i] != C_ref[i]) { + error = true; + break; + } + } + printf("c:\n"); + for (int i = 0; i < m * n; i++) + printf("%d, ", Chost[i]); + printf("\n"); + + if (error) { + printf("error\n"); + } else { + printf("success\n"); + } + + delete (ltHandle); + delete (Adesc_col32); + delete (Bdesc_col32_2r_4r4); + delete (Cdesc_col32); + delete (Adesc_col_major); + delete (Bdesc_col_major); + delete (Cdesc_col_major); + dpct::dpct_free(Adev, q_ct1); + dpct::dpct_free(Bdev, q_ct1); + dpct::dpct_free(Cdev, q_ct1); + dpct::dpct_free(alpha, q_ct1); + + return !error; +} + +// clang-format off +// A (4*3) B (2*3) +// 6 10 14 5 -3 1 +// 7 11 15 4 -2 0 +// 8 12 16 +// 9 13 17 +// +// alpha * A * op(B) = alpha * C = C +// 0 6 10 14 5 4 0 14 4 0 0 +// 1 7 11 15 -3 -2 1 17 6 17 6 +// 2 8 12 16 1 0 2 20 8 40 16 +// 3 9 13 17 3 23 10 69 30 +// +// alpha * A * op(B) = alpha * C = C +// 1 6 10 14 5 4 1 14 4 14 4 +// 7 11 15 -3 -2 17 6 17 6 +// 8 12 16 1 0 20 8 20 8 +// 9 13 17 23 10 23 10 +// clang-format on + +int main() { + bool pass = true; + pass = test1() && pass; + pass = test2() && pass; + pass = test3() && pass; + pass = test4() && pass; + pass = test5() && pass; + pass = test6() && pass; + return pass ? 0 : 1; +} diff --git a/features/feature_case/cublasLt/out/transform.dp.cpp b/features/feature_case/cublasLt/out/transform.dp.cpp new file mode 100644 index 00000000..9d9a0717 --- /dev/null +++ b/features/feature_case/cublasLt/out/transform.dp.cpp @@ -0,0 +1,597 @@ +// ===------------ transform.cu -------------------------- *- CUDA -* ----=== // +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#include +#include +#include +#include +#include + + +void transform(dpct::blas_gemm::experimental::descriptor_ptr ltHandle, void *in, int ld_in, + dpct::blas_gemm::experimental::order_t order_in, void *out, int ld_out, + dpct::blas_gemm::experimental::order_t order_out, int dim1, int dim2) { + dpct::blas_gemm::experimental::matrix_layout_ptr in_desc = NULL, out_desc = NULL; + dpct::blas_gemm::experimental::transform_desc_ptr transform_desc = NULL; + + in_desc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, dim1, dim2, ld_in); + out_desc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, dim1, dim2, ld_out); + + in_desc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &order_in); + out_desc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &order_out); + + transform_desc = new dpct::blas_gemm::experimental::transform_desc_t(dpct::library_data_t::real_float); + + float alpha = 1.0f, beta = 0.0f; + dpct::blas_gemm::experimental::matrix_transform(transform_desc, &alpha, in, in_desc, &beta, NULL, NULL, out, out_desc, 0); + + delete (in_desc); + delete (out_desc); + delete (transform_desc); +} + +bool test_ROW() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + const constexpr int m = 2; + const constexpr int n = 33; + const constexpr int in_ld = 4; + void *in_dev; + in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1); + + int8_t in_host[n * in_ld]; + int8_t value = 0; + for (int i = 0; i < n * in_ld; i++) { + if (i % 4 < 2) { + in_host[i] = value; + value++; + } else + in_host[i] = 99; + } + int8_t ref_2nd[n * in_ld]; + std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); + + q_ct1.memcpy(in_dev, in_host, n * in_ld * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + + void *out_dev; + const constexpr int out_ld = 36; + out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1); + q_ct1.memset(out_dev, 0, out_ld * m * sizeof(int8_t)).wait(); + transform(ltHandle, in_dev, in_ld, dpct::blas_gemm::experimental::order_t::col, out_dev, out_ld, + dpct::blas_gemm::experimental::order_t::row, m, n); + + int8_t out_host[out_ld * m]; + q_ct1.memcpy(out_host, out_dev, out_ld * m * sizeof(int8_t)).wait(); + + bool pass_1st = true; + int8_t ref_1st[out_ld * m] = + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 0, 0, 0, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 0, 0, 0}; + for (int i = 0; i < out_ld * m; i++) { + if (i % out_ld < n) { + if (out_host[i] != ref_1st[i]) { + pass_1st = false; + break; + } + } + } + + for (int i = 0; i < out_ld * m; i++) { + printf("%d, ", out_host[i]); + } + printf("\n"); + if (pass_1st) { + printf("ROW 1st pass\n"); + } else { + printf("ROW 1st fail\n"); + } + + q_ct1.memset(in_dev, 0, n * in_ld * sizeof(int8_t)).wait(); + std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); + transform(ltHandle, out_dev, out_ld, dpct::blas_gemm::experimental::order_t::row, in_dev, in_ld, + dpct::blas_gemm::experimental::order_t::col, m, n); + q_ct1.memcpy(in_host, in_dev, n * in_ld * sizeof(int8_t)).wait(); + + bool pass_2nd = true; + for (int i = 0; i < n * in_ld; i++) { + if (i % in_ld < m) { + if (in_host[i] != ref_2nd[i]) { + pass_2nd = false; + break; + } + } + } + + for (int i = 0; i < n * in_ld; i++) { + printf("%d, ", in_host[i]); + } + printf("\n"); + if (pass_2nd) { + printf("ROW 2nd pass\n"); + } else { + printf("ROW 2nd fail\n"); + } + + delete (ltHandle); + + return pass_1st && pass_2nd; +} + +bool test_COL32() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + const constexpr int m = 2; + const constexpr int n = 33; + const constexpr int in_ld = 4; + void *in_dev; + in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1); + + int8_t in_host[n * in_ld]; + int8_t value = 0; + for (int i = 0; i < n * in_ld; i++) { + if (i % 4 < 2) { + in_host[i] = value; + value++; + } else + in_host[i] = 99; + } + int8_t ref_2nd[n * in_ld]; + std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); + + q_ct1.memcpy(in_dev, in_host, n * in_ld * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + + void *out_dev; + const constexpr int out_ld = 64; + out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1); + q_ct1.memset(out_dev, 0, out_ld * m * sizeof(int8_t)).wait(); + transform(ltHandle, in_dev, in_ld, dpct::blas_gemm::experimental::order_t::col, out_dev, out_ld, + dpct::blas_gemm::experimental::order_t::col32, m, n); + + int8_t out_host[out_ld * m]; + q_ct1.memcpy(out_host, out_dev, out_ld * m * sizeof(int8_t)).wait(); + + bool pass_1st = true; + int8_t ref_1st[out_ld * m] = + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, + 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < out_ld * m; i++) { + if (i % out_ld < n) { + if (out_host[i] != ref_1st[i]) { + pass_1st = false; + break; + } + } + } + + for (int i = 0; i < out_ld * m; i++) { + printf("%d, ", out_host[i]); + } + printf("\n"); + if (pass_1st) { + printf("COL32 1st pass\n"); + } else { + printf("COL32 1st fail\n"); + } + + q_ct1.memset(in_dev, 0, n * in_ld * sizeof(int8_t)).wait(); + std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); + transform(ltHandle, out_dev, out_ld, dpct::blas_gemm::experimental::order_t::col32, in_dev, in_ld, + dpct::blas_gemm::experimental::order_t::col, m, n); + q_ct1.memcpy(in_host, in_dev, n * in_ld * sizeof(int8_t)).wait(); + + bool pass_2nd = true; + for (int i = 0; i < n * in_ld; i++) { + if (i % in_ld < m) { + if (in_host[i] != ref_2nd[i]) { + pass_2nd = false; + break; + } + } + } + + for (int i = 0; i < n * in_ld; i++) { + printf("%d, ", in_host[i]); + } + printf("\n"); + if (pass_2nd) { + printf("COL32 2nd pass\n"); + } else { + printf("COL32 2nd fail\n"); + } + + delete (ltHandle); + + return pass_1st && pass_2nd; +} + +bool test_COL4_4R2_8C() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + const constexpr int m = 2; + const constexpr int n = 33; + const constexpr int in_ld = 4; + void *in_dev; + in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1); + + int8_t in_host[n * in_ld]; + int8_t value = 0; + for (int i = 0; i < n * in_ld; i++) { + if (i % 4 < 2) { + in_host[i] = value; + value++; + } else + in_host[i] = 99; + } + int8_t ref_2nd[n * in_ld]; + std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); + + q_ct1.memcpy(in_dev, in_host, n * in_ld * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + + void *out_dev; + const constexpr int out_ld = (32 * 8) * 2; + out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1); + q_ct1.memset(out_dev, 0, out_ld * m * sizeof(int8_t)).wait(); + transform(ltHandle, in_dev, in_ld, dpct::blas_gemm::experimental::order_t::col, out_dev, out_ld, + dpct::blas_gemm::experimental::order_t::col4_4r2_8c, m, n); + + int8_t out_host[out_ld * m]; + q_ct1.memcpy(out_host, out_dev, out_ld * m * sizeof(int8_t)).wait(); + + bool pass_1st = true; + int8_t ref_1st[out_ld * m] = + {0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 16, 18, 20, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 24, 26, 28, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 34, 36, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 40, 42, 44, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 48, 50, 52, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 56, 58, 60, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 17, 19, 21, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 25, 27, 29, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 33, 35, 37, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 41, 43, 45, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 49, 51, 53, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 57, 59, 61, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < out_ld * m; i++) { + if (i % out_ld < n) { + if (out_host[i] != ref_1st[i]) { + pass_1st = false; + break; + } + } + } + + for (int i = 0; i < out_ld * m; i++) { + printf("%d, ", out_host[i]); + } + printf("\n"); + if (pass_1st) { + printf("COL4_4R2_8C 1st pass\n"); + } else { + printf("COL4_4R2_8C 1st fail\n"); + } + + q_ct1.memset(in_dev, 0, n * in_ld * sizeof(int8_t)).wait(); + std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); + transform(ltHandle, out_dev, out_ld, dpct::blas_gemm::experimental::order_t::col4_4r2_8c, in_dev, + in_ld, dpct::blas_gemm::experimental::order_t::col, m, n); + q_ct1.memcpy(in_host, in_dev, n * in_ld * sizeof(int8_t)).wait(); + + bool pass_2nd = true; + for (int i = 0; i < n * in_ld; i++) { + if (i % in_ld < m) { + if (in_host[i] != ref_2nd[i]) { + pass_2nd = false; + break; + } + } + } + + for (int i = 0; i < n * in_ld; i++) { + printf("%d, ", in_host[i]); + } + printf("\n"); + if (pass_2nd) { + printf("COL4_4R2_8C 2nd pass\n"); + } else { + printf("COL4_4R2_8C 2nd fail\n"); + } + + delete (ltHandle); + + return pass_1st && pass_2nd; +} + +bool test_COL32_2R_4R4() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + const constexpr int m = 2; + const constexpr int n = 33; + const constexpr int in_ld = 4; + void *in_dev; + in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1); + + int8_t in_host[n * in_ld]; + int8_t value = 0; + for (int i = 0; i < n * in_ld; i++) { + if (i % 4 < 2) { + in_host[i] = value; + value++; + } else + in_host[i] = 99; + } + int8_t ref_2nd[n * in_ld]; + std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); + + q_ct1.memcpy(in_dev, in_host, n * in_ld * sizeof(int8_t)).wait(); + + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + + void *out_dev; + const constexpr int out_ld = (32 * 32) * 2; + out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1); + q_ct1.memset(out_dev, 0, out_ld * m * sizeof(int8_t)).wait(); + transform(ltHandle, in_dev, in_ld, dpct::blas_gemm::experimental::order_t::col, out_dev, out_ld, + dpct::blas_gemm::experimental::order_t::col32_2r_4r4, m, n); + + int8_t out_host[out_ld * m]; + q_ct1.memcpy(out_host, out_dev, out_ld * m * sizeof(int8_t)).wait(); + + bool pass_1st = true; + int8_t ref_1st[out_ld * m] = + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < out_ld * m; i++) { + if (i % out_ld < n) { + if (out_host[i] != ref_1st[i]) { + pass_1st = false; + break; + } + } + } + + for (int i = 0; i < out_ld * m; i++) { + printf("%d, ", out_host[i]); + } + printf("\n"); + if (pass_1st) { + printf("COL32_2R_4R4 1st pass\n"); + } else { + printf("COL32_2R_4R4 1st fail\n"); + } + + q_ct1.memset(in_dev, 0, n * in_ld * sizeof(int8_t)).wait(); + std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); + transform(ltHandle, out_dev, out_ld, dpct::blas_gemm::experimental::order_t::col32_2r_4r4, in_dev, + in_ld, dpct::blas_gemm::experimental::order_t::col, m, n); + q_ct1.memcpy(in_host, in_dev, n * in_ld * sizeof(int8_t)).wait(); + + bool pass_2nd = true; + for (int i = 0; i < n * in_ld; i++) { + if (i % in_ld < m) { + if (in_host[i] != ref_2nd[i]) { + pass_2nd = false; + break; + } + } + } + + for (int i = 0; i < n * in_ld; i++) { + printf("%d, ", in_host[i]); + } + printf("\n"); + if (pass_2nd) { + printf("COL32_2R_4R4 2nd pass\n"); + } else { + printf("COL32_2R_4R4 2nd fail\n"); + } + + delete (ltHandle); + + return pass_1st && pass_2nd; +} + +// Input col_major matrix: +// 2 rows * 33 columns, ld is 4 +int main() { + bool pass = true; + pass = test_ROW() && pass; + pass = test_COL32() && pass; + pass = test_COL4_4R2_8C() && pass; + pass = test_COL32_2R_4R4() && pass; + return pass ? 0 : 1; +} diff --git a/features/features.xml b/features/features.xml index 51de73ac..2afeea76 100644 --- a/features/features.xml +++ b/features/features.xml @@ -341,5 +341,7 @@ + + diff --git a/features/test_feature.py b/features/test_feature.py index e65c3f99..bed077ab 100644 --- a/features/test_feature.py +++ b/features/test_feature.py @@ -60,7 +60,7 @@ 'thrust_swap_ranges', 'thrust_uninitialized_fill_n', 'thrust_equal', 'system_atomic', 'thrust_detail_types', 'operator_eq', 'operator_neq', 'operator_lege', 'thrust_system', 'thrust_reverse_copy', 'thrust_device_new_delete', 'thrust_temporary_buffer', 'thrust_malloc_free', 'codepin', 'thrust_unique_count', - 'thrust_advance_trans_op_itr', 'cuda_stream_query'] + 'thrust_advance_trans_op_itr', 'cuda_stream_query', "matmul", "transform"] occupancy_calculation_exper = ['occupancy_calculation'] @@ -166,7 +166,7 @@ def build_test(): 'cudnn-binary', 'cudnn-bnp1', 'cudnn-bnp2', 'cudnn-bnp3', 'cudnn-normp1', 'cudnn-normp2', 'cudnn-normp3', 'cudnn-convp1', 'cudnn-convp2', 'cudnn-convp3', 'cudnn-convp4', 'cudnn-convp5', 'cudnn-convp6', 'cudnn-rnn', 'cudnn-GetErrorString', 'cudnn-convp7', - 'cudnn-types', 'cudnn-version', 'cudnn-dropout' + 'cudnn-types', 'cudnn-version', 'cudnn-dropout', 'matmul' ] no_fast_math_tests = ['math-emu-half-after11', 'math-emu-half2-after11', 'math-ext-half-after11', 'math-ext-half2-after11', diff --git a/help_function/help_function.xml b/help_function/help_function.xml index 3bdbd26e..86373b76 100644 --- a/help_function/help_function.xml +++ b/help_function/help_function.xml @@ -215,5 +215,6 @@ + diff --git a/help_function/src/blas_gemm_utils_interface.cpp b/help_function/src/blas_gemm_utils_interface.cpp new file mode 100644 index 00000000..551021b3 --- /dev/null +++ b/help_function/src/blas_gemm_utils_interface.cpp @@ -0,0 +1,152 @@ +// ===------ blas_gemm_utils_interface.cpp ----------------- *- C++ -* ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#include +#include +#include +#include + +void foo1 () { + dpct::blas_gemm::experimental::descriptor_ptr ltHandle; + ltHandle = new dpct::blas_gemm::experimental::descriptor(); + delete (ltHandle); + + dpct::blas_gemm::experimental::matrix_layout_ptr matLayout; + dpct::library_data_t type; + uint64_t rows; + uint64_t cols; + int64_t ld; + matLayout = + new dpct::blas_gemm::experimental::matrix_layout_t(type, rows, cols, ld); + + dpct::blas_gemm::experimental::matrix_layout_t::attribute attr1; + void *buf1; + size_t sizeInBytes1; + size_t *sizeWritten1; + matLayout->get_attribute(attr1, buf1); + matLayout->set_attribute(attr1, buf1); + delete (matLayout); + + dpct::blas_gemm::experimental::matmul_desc_ptr matmulDesc; + dpct::compute_type computeType; + dpct::library_data_t scaleType; + matmulDesc = + new dpct::blas_gemm::experimental::matmul_desc_t(computeType, scaleType); + + dpct::blas_gemm::experimental::matmul_desc_t::attribute attr2; + void *buf2; + size_t sizeInBytes2; + size_t *sizeWritten2; + matmulDesc->get_attribute(attr2, buf2); + matmulDesc->set_attribute(attr2, buf2); + delete (matmulDesc); + + int matmulPreference; + void *buf3; + size_t sizeInBytes3; + size_t *sizeWritten3; + + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc; + dpct::blas_gemm::experimental::matrix_layout_ptr Bdesc; + dpct::blas_gemm::experimental::matrix_layout_ptr Cdesc; + dpct::blas_gemm::experimental::matrix_layout_ptr Ddesc; + + int requestedAlgoCount = 1; + int heuristicResultsArray; + int returnAlgoCount; + returnAlgoCount = 1; +} + +void foo2() { + dpct::blas_gemm::experimental::descriptor_ptr lightHandle; + dpct::blas_gemm::experimental::matmul_desc_ptr computeDesc; + const void *alpha; + const void *A; + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc; + const void *B; + dpct::blas_gemm::experimental::matrix_layout_ptr Bdesc; + const void *beta; + const void *C; + dpct::blas_gemm::experimental::matrix_layout_ptr Cdesc; + void *D; + dpct::blas_gemm::experimental::matrix_layout_ptr Ddesc; + const int *algo; + void *workspace; + size_t workspaceSizeInBytes; + dpct::queue_ptr stream; + dpct::blas_gemm::experimental::matmul(lightHandle, computeDesc, alpha, A, + Adesc, B, Bdesc, beta, C, Cdesc, D, + Ddesc, stream); +} + +void foo3() { + dpct::blas_gemm::experimental::order_t a; + a = dpct::blas_gemm::experimental::order_t::col; + a = dpct::blas_gemm::experimental::order_t::row; + a = dpct::blas_gemm::experimental::order_t::col32; + a = dpct::blas_gemm::experimental::order_t::col4_4r2_8c; + a = dpct::blas_gemm::experimental::order_t::col32_2r_4r4; + + dpct::blas_gemm::experimental::pointer_mode_t b; + b = dpct::blas_gemm::experimental::pointer_mode_t::host; + b = dpct::blas_gemm::experimental::pointer_mode_t::device; + b = dpct::blas_gemm::experimental::pointer_mode_t::device_vector; + b = dpct::blas_gemm::experimental::pointer_mode_t:: + alpha_device_vector_beta_zero; + b = dpct::blas_gemm::experimental::pointer_mode_t:: + alpha_device_vector_beta_host; + + dpct::blas_gemm::experimental::matrix_layout_t::attribute c; + c = dpct::blas_gemm::experimental::matrix_layout_t::attribute::type; + c = dpct::blas_gemm::experimental::matrix_layout_t::attribute::order; + c = dpct::blas_gemm::experimental::matrix_layout_t::attribute::rows; + c = dpct::blas_gemm::experimental::matrix_layout_t::attribute::cols; + c = dpct::blas_gemm::experimental::matrix_layout_t::attribute::ld; + + dpct::blas_gemm::experimental::matmul_desc_t::attribute d; + d = dpct::blas_gemm::experimental::matmul_desc_t::attribute::compute_type; + d = dpct::blas_gemm::experimental::matmul_desc_t::attribute::scale_type; + d = dpct::blas_gemm::experimental::matmul_desc_t::attribute::pointer_mode; + d = dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_a; + d = dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_b; + d = dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_c; + d = dpct::blas_gemm::experimental::matmul_desc_t::attribute::epilogue; +} + +void foo4() { + dpct::blas_gemm::experimental::transform_desc_ptr transformDesc; + dpct::library_data_t scaleType; + transformDesc = + new dpct::blas_gemm::experimental::transform_desc_t(scaleType); + oneapi::mkl::transpose opT = oneapi::mkl::transpose::trans; + size_t sizeWritten; + transformDesc->set_attribute( + dpct::blas_gemm::experimental::transform_desc_t::attribute::trans_a, + &opT); + transformDesc->get_attribute( + dpct::blas_gemm::experimental::transform_desc_t::attribute::trans_a, + &opT); + delete (transformDesc); + + dpct::blas_gemm::experimental::descriptor_ptr lightHandle; + const void *alpha; + const void *A; + dpct::blas_gemm::experimental::matrix_layout_ptr Adesc; + const void *beta; + const void *B; + dpct::blas_gemm::experimental::matrix_layout_ptr Bdesc; + void *C; + dpct::blas_gemm::experimental::matrix_layout_ptr Cdesc; + dpct::queue_ptr stream; + dpct::blas_gemm::experimental::matrix_transform( + transformDesc, alpha, A, Adesc, beta, B, Bdesc, C, Cdesc, stream); +} + +int main() { + return 0; +} diff --git a/help_function/test_help.py b/help_function/test_help.py index b6c58527..52588464 100644 --- a/help_function/test_help.py +++ b/help_function/test_help.py @@ -45,7 +45,7 @@ def build_test(): "dnnl_utils_batch_normalization_2", "dnnl_utils_batch_normalization_3", "dnnl_utils_convolution_1", "dnnl_utils_convolution_2", "dnnl_utils_convolution_3", "dnnl_utils_convolution_4", "dnnl_utils_convolution_5", "dnnl_utils_normalization_1", "dnnl_utils_normalization_2", "dnnl_utils_normalization_3", "dnnl_utils_rnn", - "dnnl_utils_version", "dnnl_utils_dropout"] + "dnnl_utils_version", "dnnl_utils_dropout", "blas_gemm_utils_interface"] fft_cases = ["fft_utils_engine_buffer", "fft_utils_engine_usm", "fft_workspace_interface", "fft_set_workspace"] lapack_cases = ["lapack_utils_buffer", "lapack_utils_usm"] rng_cases = ["rng_generator", "rng_generator_vec_size_1", "rng_host"] From 7432afafce5bdf34a3b10a5df3d6bde5becb79e6 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Thu, 27 Jun 2024 09:13:38 +0800 Subject: [PATCH 3/4] Skip cuda backend Signed-off-by: Jiang, Zhiwei --- help_function/help_function.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/help_function/help_function.xml b/help_function/help_function.xml index 86373b76..fc047e53 100644 --- a/help_function/help_function.xml +++ b/help_function/help_function.xml @@ -215,6 +215,6 @@ - + From b49b36f532d39ad43f57f7ed760d6004cb97f002 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Thu, 27 Jun 2024 11:26:32 +0800 Subject: [PATCH 4/4] Remove test code Signed-off-by: Jiang, Zhiwei --- .../cublasLt/out/MainSourceFiles.yaml | 3846 ----------------- .../feature_case/cublasLt/out/matmul.dp.cpp | 742 ---- .../cublasLt/out/transform.dp.cpp | 597 --- 3 files changed, 5185 deletions(-) delete mode 100644 features/feature_case/cublasLt/out/MainSourceFiles.yaml delete mode 100644 features/feature_case/cublasLt/out/matmul.dp.cpp delete mode 100644 features/feature_case/cublasLt/out/transform.dp.cpp diff --git a/features/feature_case/cublasLt/out/MainSourceFiles.yaml b/features/feature_case/cublasLt/out/MainSourceFiles.yaml deleted file mode 100644 index d8674ec9..00000000 --- a/features/feature_case/cublasLt/out/MainSourceFiles.yaml +++ /dev/null @@ -1,3846 +0,0 @@ ---- -MainSourceFile: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/MainSrcFiles_placehold' -Replacements: - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 370 - Length: 22 - ReplacementText: "#include \n#include \n#include \n" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 431 - Length: 0 - ReplacementText: "\n#include \n" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 851 - Length: 14 - ReplacementText: int - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 894 - Length: 21 - ReplacementText: '0' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1143 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1279 - Length: 0 - ReplacementText: ' try ' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1280 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1308 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::matmul_desc_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1352 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1421 - Length: 17 - ReplacementText: 'oneapi::mkl::transpose' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1445 - Length: 11 - ReplacementText: 'oneapi::mkl::transpose::trans' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1462 - Length: 21 - ReplacementText: 'dpct::blas_gemm::experimental::pointer_mode_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1495 - Length: 51 - ReplacementText: 'dpct::blas_gemm::experimental::pointer_mode_t::alpha_device_vector_beta_zero' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1552 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1576 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1602 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1631 - Length: 26 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1663 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1692 - Length: 27 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1757 - Length: 56 - ReplacementText: 'DPCT_CHECK_ERROR(Adesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1851 - Length: 56 - ReplacementText: 'DPCT_CHECK_ERROR(Bdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 1946 - Length: 92 - ReplacementText: 'DPCT_CHECK_ERROR(Adesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 2108 - Length: 102 - ReplacementText: 'DPCT_CHECK_ERROR(Bdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col_turing))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 2259 - Length: 102 - ReplacementText: 'DPCT_CHECK_ERROR(Bdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col_ampere))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 2432 - Length: 69 - ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc = new dpct::blas_gemm::experimental::matmul_desc_t(dpct::compute_type::i32, dpct::library_data_t::real_int32))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 2541 - Length: 90 - ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_b, &opT))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 2671 - Length: 57 - ReplacementText: 'DPCT_CHECK_ERROR(Cdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 2768 - Length: 92 - ReplacementText: 'DPCT_CHECK_ERROR(Cdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 2931 - Length: 128 - ReplacementText: 'DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, &alpha, A, Adesc, B, Bdesc, &beta, (int32_t*)C, Cdesc, (int32_t*)C, Cdesc, 0))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 3120 - Length: 69 - ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc = new dpct::blas_gemm::experimental::matmul_desc_t(dpct::compute_type::i32, dpct::library_data_t::real_float))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 3229 - Length: 90 - ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_b, &opT))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 3359 - Length: 56 - ReplacementText: 'DPCT_CHECK_ERROR(Cdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 3455 - Length: 92 - ReplacementText: 'DPCT_CHECK_ERROR(Cdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 3660 - Length: 126 - ReplacementText: 'DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, &alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, 0))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 3855 - Length: 106 - ReplacementText: 'DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::pointer_mode, &alphaVec))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4003 - Length: 129 - ReplacementText: 'DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, NULL, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, 0))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4154 - Length: 24 - ReplacementText: 'q_ct1.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4227 - Length: 34 - ReplacementText: 'DPCT_CHECK_ERROR(delete (Cdesc))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4310 - Length: 34 - ReplacementText: 'DPCT_CHECK_ERROR(delete (Bdesc))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4393 - Length: 34 - ReplacementText: 'DPCT_CHECK_ERROR(delete (Adesc))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4481 - Length: 37 - ReplacementText: 'DPCT_CHECK_ERROR(delete (matmulDesc))' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4600 - Length: 0 - ReplacementText: "\ncatch (sycl::exception const &exc) {\n std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n std::exit(1);\n}" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4617 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4686 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4758 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4797 - Length: 29 - ReplacementText: 'dpct::blas_gemm::experimental::transform_desc_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4852 - Length: 62 - ReplacementText: 'transform_desc = new dpct::blas_gemm::experimental::transform_desc_t(dpct::library_data_t::real_float)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 4953 - Length: 137 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_transform(transform_desc, &alpha, in, layout_in, &beta, NULL, NULL, out, layout_out, 0)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5094 - Length: 50 - ReplacementText: 'delete (transform_desc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5193 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5196 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5225 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5428 - Length: 41 - ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5473 - Length: 41 - ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5518 - Length: 42 - ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int32_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5682 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5728 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5757 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5803 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5828 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5833 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 5955 - Length: 66 - ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6025 - Length: 66 - ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6095 - Length: 67 - ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6188 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6359 - Length: 50 - ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6413 - Length: 76 - ReplacementText: 'B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6493 - Length: 51 - ReplacementText: 'C_col32 = (int32_t *)sycl::malloc_device(m * 32 * sizeof(std::int32_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6548 - Length: 65 - ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6617 - Length: 120 - ReplacementText: 'Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6741 - Length: 66 - ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6811 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6835 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6859 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6889 - Length: 26 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 6919 - Length: 133 - ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 7056 - Length: 186 - ReplacementText: 'Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 7246 - Length: 133 - ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 7873 - Length: 24 - ReplacementText: 'q_ct1.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 7944 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 7991 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8016 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8388 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8417 - Length: 40 - ReplacementText: 'delete (Adesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8461 - Length: 46 - ReplacementText: 'delete (Bdesc_col4_4r2_8c)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8511 - Length: 40 - ReplacementText: 'delete (Cdesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8555 - Length: 44 - ReplacementText: 'delete (Adesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8603 - Length: 44 - ReplacementText: 'delete (Bdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8651 - Length: 44 - ReplacementText: 'delete (Cdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8699 - Length: 14 - ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8717 - Length: 14 - ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8735 - Length: 14 - ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8815 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8818 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 8847 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9050 - Length: 41 - ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9095 - Length: 41 - ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9140 - Length: 41 - ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9303 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9349 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9378 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9424 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9449 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9454 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9576 - Length: 66 - ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9646 - Length: 66 - ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9716 - Length: 66 - ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9808 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 9978 - Length: 50 - ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10032 - Length: 76 - ReplacementText: 'B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10112 - Length: 50 - ReplacementText: 'C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10166 - Length: 65 - ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10235 - Length: 120 - ReplacementText: 'Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10359 - Length: 65 - ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10428 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10452 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10476 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10506 - Length: 26 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10536 - Length: 133 - ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10673 - Length: 186 - ReplacementText: 'Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 10863 - Length: 133 - ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 11487 - Length: 24 - ReplacementText: 'q_ct1.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 11557 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 11603 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 11628 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 11999 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12028 - Length: 40 - ReplacementText: 'delete (Adesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12072 - Length: 46 - ReplacementText: 'delete (Bdesc_col4_4r2_8c)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12122 - Length: 40 - ReplacementText: 'delete (Cdesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12166 - Length: 44 - ReplacementText: 'delete (Adesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12214 - Length: 44 - ReplacementText: 'delete (Bdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12262 - Length: 44 - ReplacementText: 'delete (Cdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12310 - Length: 14 - ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12328 - Length: 14 - ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12346 - Length: 14 - ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12426 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12429 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12458 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12661 - Length: 41 - ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12706 - Length: 41 - ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12751 - Length: 41 - ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12914 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12960 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 12989 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13035 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13060 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13065 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13187 - Length: 66 - ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13257 - Length: 66 - ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13327 - Length: 66 - ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13419 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13589 - Length: 50 - ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13643 - Length: 76 - ReplacementText: 'B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13723 - Length: 50 - ReplacementText: 'C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13777 - Length: 65 - ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13846 - Length: 120 - ReplacementText: 'Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 13970 - Length: 65 - ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 14039 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 14063 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 14087 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 14117 - Length: 26 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 14147 - Length: 133 - ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 14284 - Length: 186 - ReplacementText: 'Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 14474 - Length: 133 - ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 14815 - Length: 44 - ReplacementText: 'alpha = sycl::malloc_shared(4, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15197 - Length: 24 - ReplacementText: 'q_ct1.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15267 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15313 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15338 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15709 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15738 - Length: 40 - ReplacementText: 'delete (Adesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15782 - Length: 46 - ReplacementText: 'delete (Bdesc_col4_4r2_8c)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15832 - Length: 40 - ReplacementText: 'delete (Cdesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15876 - Length: 44 - ReplacementText: 'delete (Adesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15924 - Length: 44 - ReplacementText: 'delete (Bdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 15972 - Length: 44 - ReplacementText: 'delete (Cdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16020 - Length: 14 - ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16038 - Length: 14 - ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16056 - Length: 14 - ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16074 - Length: 15 - ReplacementText: 'dpct::dpct_free(alpha, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16156 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16159 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16188 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16391 - Length: 41 - ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16436 - Length: 41 - ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16481 - Length: 42 - ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int32_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16645 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16691 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16720 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16766 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16791 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16796 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16918 - Length: 66 - ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 16988 - Length: 66 - ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17058 - Length: 67 - ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17151 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17324 - Length: 50 - ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17378 - Length: 93 - ReplacementText: 'B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17475 - Length: 51 - ReplacementText: 'C_col32 = (int32_t *)sycl::malloc_device(m * 32 * sizeof(std::int32_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17530 - Length: 65 - ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17599 - Length: 124 - ReplacementText: 'Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17727 - Length: 66 - ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17797 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17821 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17845 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17876 - Length: 27 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 17907 - Length: 133 - ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 18044 - Length: 189 - ReplacementText: 'Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 18237 - Length: 133 - ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 18867 - Length: 24 - ReplacementText: 'q_ct1.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 18938 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 18985 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19010 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19382 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19411 - Length: 40 - ReplacementText: 'delete (Adesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19455 - Length: 47 - ReplacementText: 'delete (Bdesc_col32_2r_4r4)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19506 - Length: 40 - ReplacementText: 'delete (Cdesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19550 - Length: 44 - ReplacementText: 'delete (Adesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19598 - Length: 44 - ReplacementText: 'delete (Bdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19646 - Length: 44 - ReplacementText: 'delete (Cdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19694 - Length: 14 - ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19712 - Length: 14 - ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19730 - Length: 14 - ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19810 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19813 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 19842 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20045 - Length: 41 - ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20090 - Length: 41 - ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20135 - Length: 41 - ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20298 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20344 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20373 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20419 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20444 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20449 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20571 - Length: 66 - ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20641 - Length: 66 - ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20711 - Length: 66 - ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20803 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 20975 - Length: 50 - ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21029 - Length: 93 - ReplacementText: 'B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21126 - Length: 50 - ReplacementText: 'C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21180 - Length: 65 - ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21249 - Length: 124 - ReplacementText: 'Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21377 - Length: 65 - ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21446 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21470 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21494 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21525 - Length: 27 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21556 - Length: 133 - ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21693 - Length: 189 - ReplacementText: 'Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 21886 - Length: 133 - ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 22513 - Length: 24 - ReplacementText: 'q_ct1.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 22583 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 22629 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 22654 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23025 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23054 - Length: 40 - ReplacementText: 'delete (Adesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23098 - Length: 47 - ReplacementText: 'delete (Bdesc_col32_2r_4r4)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23149 - Length: 40 - ReplacementText: 'delete (Cdesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23193 - Length: 44 - ReplacementText: 'delete (Adesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23241 - Length: 44 - ReplacementText: 'delete (Bdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23289 - Length: 44 - ReplacementText: 'delete (Cdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23337 - Length: 14 - ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23355 - Length: 14 - ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23373 - Length: 14 - ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23453 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23456 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23485 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23688 - Length: 41 - ReplacementText: 'Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23733 - Length: 41 - ReplacementText: 'Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23778 - Length: 41 - ReplacementText: 'Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23941 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 23987 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24016 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24062 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24087 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24092 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24214 - Length: 66 - ReplacementText: 'Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24284 - Length: 66 - ReplacementText: 'Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24354 - Length: 66 - ReplacementText: 'Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24446 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24618 - Length: 50 - ReplacementText: 'A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24672 - Length: 93 - ReplacementText: 'B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24769 - Length: 50 - ReplacementText: 'C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24823 - Length: 65 - ReplacementText: 'Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 24892 - Length: 124 - ReplacementText: 'Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 25020 - Length: 65 - ReplacementText: 'Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 25089 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 25113 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 25137 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 25168 - Length: 27 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 25199 - Length: 133 - ReplacementText: 'Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 25336 - Length: 189 - ReplacementText: 'Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 25529 - Length: 133 - ReplacementText: 'Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 25872 - Length: 44 - ReplacementText: 'alpha = sycl::malloc_shared(4, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26255 - Length: 24 - ReplacementText: 'q_ct1.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26325 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26371 - Length: 24 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26396 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26767 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26796 - Length: 40 - ReplacementText: 'delete (Adesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26840 - Length: 47 - ReplacementText: 'delete (Bdesc_col32_2r_4r4)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26891 - Length: 40 - ReplacementText: 'delete (Cdesc_col32)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26935 - Length: 44 - ReplacementText: 'delete (Adesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 26983 - Length: 44 - ReplacementText: 'delete (Bdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 27031 - Length: 44 - ReplacementText: 'delete (Cdesc_col_major)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 27079 - Length: 14 - ReplacementText: 'dpct::dpct_free(Adev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 27097 - Length: 14 - ReplacementText: 'dpct::dpct_free(Bdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 27115 - Length: 14 - ReplacementText: 'dpct::dpct_free(Cdev, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Offset: 27133 - Length: 15 - ReplacementText: 'dpct::dpct_free(alpha, q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 370 - Length: 22 - ReplacementText: "#include \n#include \n#include \n" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 410 - Length: 0 - ReplacementText: "\n#include \n" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 427 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 490 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 554 - Length: 15 - ReplacementText: 'dpct::blas_gemm::experimental::order_t' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 605 - Length: 22 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_layout_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 663 - Length: 29 - ReplacementText: 'dpct::blas_gemm::experimental::transform_desc_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 719 - Length: 66 - ReplacementText: 'in_desc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, dim1, dim2, ld_in)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 789 - Length: 68 - ReplacementText: 'out_desc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, dim1, dim2, ld_out)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 862 - Length: 135 - ReplacementText: 'in_desc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &order_in)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 1001 - Length: 138 - ReplacementText: 'out_desc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &order_out)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 1144 - Length: 62 - ReplacementText: 'transform_desc = new dpct::blas_gemm::experimental::transform_desc_t(dpct::library_data_t::real_float)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 1246 - Length: 133 - ReplacementText: 'dpct::blas_gemm::experimental::matrix_transform(transform_desc, &alpha, in, in_desc, &beta, NULL, NULL, out, out_desc, 0)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 1384 - Length: 36 - ReplacementText: 'delete (in_desc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 1424 - Length: 37 - ReplacementText: 'delete (out_desc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 1465 - Length: 50 - ReplacementText: 'delete (transform_desc)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 1537 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 1648 - Length: 47 - ReplacementText: 'in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 1980 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2034 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2072 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2077 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2106 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2188 - Length: 49 - ReplacementText: 'out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2241 - Length: 10 - ReplacementText: q_ct1.memset - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2292 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2331 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2380 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::row' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2441 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2498 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 2536 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 3236 - Length: 10 - ReplacementText: q_ct1.memset - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 3285 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 3381 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::row' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 3428 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 3457 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 3511 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 3549 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 3927 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4008 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4119 - Length: 47 - ReplacementText: 'in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4451 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4505 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4543 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4548 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4577 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4659 - Length: 49 - ReplacementText: 'out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4712 - Length: 10 - ReplacementText: q_ct1.memset - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4763 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4802 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4851 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4914 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 4971 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 5009 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 5893 - Length: 10 - ReplacementText: q_ct1.memset - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 5942 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 6038 - Length: 20 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 6087 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 6116 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 6170 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 6208 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 6590 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 6677 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 6788 - Length: 47 - ReplacementText: 'in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7120 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7174 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7212 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7217 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7246 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7338 - Length: 49 - ReplacementText: 'out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7391 - Length: 10 - ReplacementText: q_ct1.memset - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7442 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7481 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7530 - Length: 26 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7599 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7656 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 7694 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 11838 - Length: 10 - ReplacementText: q_ct1.memset - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 11887 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 11983 - Length: 26 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col4_4r2_8c' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 12038 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 12067 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 12121 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 12159 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 12553 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 12641 - Length: 0 - ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 12752 - Length: 47 - ReplacementText: 'in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13084 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13138 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13176 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13181 - Length: 16 - ReplacementText: 'dpct::blas_gemm::experimental::descriptor_ptr' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13210 - Length: 25 - ReplacementText: 'ltHandle = new dpct::blas_gemm::experimental::descriptor()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13303 - Length: 49 - ReplacementText: 'out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13356 - Length: 10 - ReplacementText: q_ct1.memset - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13407 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13446 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13495 - Length: 27 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13565 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13622 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 13660 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 27206 - Length: 10 - ReplacementText: q_ct1.memset - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 27255 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 27351 - Length: 27 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col32_2r_4r4' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 27407 - Length: 18 - ReplacementText: 'dpct::blas_gemm::experimental::order_t::col' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 27436 - Length: 10 - ReplacementText: q_ct1.memcpy - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 27490 - Length: 37 - ReplacementText: '' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 27528 - Length: 0 - ReplacementText: '.wait()' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false - - FilePath: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Offset: 27924 - Length: 25 - ReplacementText: 'delete (ltHandle)' - ConstantFlag: '' - ConstantOffset: 0 - InitStr: '' - NewHostVarName: '' - BlockLevelFormatFlag: false -MainSourceFilesDigest: - - MainSourceFile: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/matmul.cu' - Digest: c3a2078551737823d86d0b1d17ca234a - - MainSourceFile: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt/transform.cu' - Digest: 373b38f255915be55702128932b0b290 -DpctVersion: 19.0.0 -MainHelperFileName: '' -USMLevel: '' -FeatureMap: {} -CompileTargets: {} -OptionMap: - AnalysisScopePath: - Value: '/home/zhiwei/newdisk/SYCLomatic-test/features/feature_case/cublasLt' - Specified: false - AsyncHandler: - Value: 'false' - Specified: false - BuildScript: - Value: '0' - Specified: false - CodePinEnabled: - Value: 'false' - Specified: false - CommentsEnabled: - Value: 'false' - Specified: false - CompilationsDir: - Value: '' - Specified: false - CtadEnabled: - Value: 'false' - Specified: false - EnablepProfiling: - Value: 'false' - Specified: false - ExperimentalFlag: - Value: '0' - Specified: false - ExplicitNamespace: - Value: '20' - Specified: false - ExtensionDDFlag: - Value: '0' - Specified: false - ExtensionDEFlag: - Value: '4294967295' - Specified: false - HelperFuncPreferenceFlag: - Value: '0' - Specified: false - NDRangeDim: - Value: '3' - Specified: false - NoDRYPattern: - Value: 'false' - Specified: false - OptimizeMigration: - Value: 'false' - Specified: false - ProcessAll: - Value: 'false' - Specified: false - RuleFile: - Value: '' - Specified: false - SyclNamedLambda: - Value: 'false' - Specified: false - UsmLevel: - Value: '1' - Specified: false -... diff --git a/features/feature_case/cublasLt/out/matmul.dp.cpp b/features/feature_case/cublasLt/out/matmul.dp.cpp deleted file mode 100644 index cdb4049e..00000000 --- a/features/feature_case/cublasLt/out/matmul.dp.cpp +++ /dev/null @@ -1,742 +0,0 @@ -// ===------------ matmul.cu ----------------------------- *- CUDA -* ----=== // -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// ===--------------------------------------------------------------------=== // - -#include -#include -#include -#include -#include -#include - - -const constexpr int COL_TURING = 0; -const constexpr int COL_AMPERE = 1; - -// The original source of below two functions was under the license below: -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. -// -// Repo: https://github.com/TimDettmers/bitsandbytes.git -inline int checkCublasStatus(int status) { - if (status != 0) { - printf("cuBLAS API failed with status %d\n", status); - //throw std::logic_error("cuBLAS API failed"); - return 1; - } - return 0; -} - -template int igemmlt(dpct::blas_gemm::experimental::descriptor_ptr ltHandle, int m, int n, int k, const int8_t *A, const int8_t *B, void *C, float *row_scale, int lda, int ldb, int ldc) - try { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - int has_error = 0; - dpct::blas_gemm::experimental::matmul_desc_ptr matmulDesc = NULL; - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc = NULL, Bdesc = NULL, Cdesc = NULL; - oneapi::mkl::transpose opT = oneapi::mkl::transpose::trans; - dpct::blas_gemm::experimental::pointer_mode_t alphaVec = dpct::blas_gemm::experimental::pointer_mode_t::alpha_device_vector_beta_zero; - dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; - dpct::blas_gemm::experimental::order_t col_turing = dpct::blas_gemm::experimental::order_t::col4_4r2_8c; - dpct::blas_gemm::experimental::order_t col_ampere = dpct::blas_gemm::experimental::order_t::col32_2r_4r4; - - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Adesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda))); - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Bdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb))); - - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Adesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))); - if(FORMATB == COL_TURING) - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Bdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col_turing))); - else - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Bdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col_ampere))); - - if(DTYPE_OUT == 32) - { - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc = new dpct::blas_gemm::experimental::matmul_desc_t(dpct::compute_type::i32, dpct::library_data_t::real_int32))); - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_b, &opT))); - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Cdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc))); - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Cdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))); - int alpha = 1, beta = 0; - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, &alpha, A, Adesc, B, Bdesc, &beta, (int32_t*)C, Cdesc, (int32_t*)C, Cdesc, 0))); - } - else - { - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc = new dpct::blas_gemm::experimental::matmul_desc_t(dpct::compute_type::i32, dpct::library_data_t::real_float))); - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::trans_b, &opT))); - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Cdesc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc))); - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(Cdesc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32))); - if(!SCALE_ROWS) - { - float alpha = 1.0f, beta = 0.0f; - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, &alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, 0))); - } - else - { - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(matmulDesc->set_attribute(dpct::blas_gemm::experimental::matmul_desc_t::attribute::pointer_mode, &alphaVec))); - has_error |= checkCublasStatus(DPCT_CHECK_ERROR(dpct::blas_gemm::experimental::matmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, NULL, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, 0))); - } - } - - q_ct1.wait(); - - if (Cdesc) has_error |= checkCublasStatus(DPCT_CHECK_ERROR(delete (Cdesc))); - if (Bdesc) has_error |= checkCublasStatus(DPCT_CHECK_ERROR(delete (Bdesc))); - if (Adesc) has_error |= checkCublasStatus(DPCT_CHECK_ERROR(delete (Adesc))); - if (matmulDesc) has_error |= checkCublasStatus(DPCT_CHECK_ERROR(delete (matmulDesc))); - if(has_error == 1) - printf("error detected"); - - return has_error; -} -catch (sycl::exception const &exc) { - std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; - std::exit(1); -} - -void transform(dpct::blas_gemm::experimental::descriptor_ptr ltHandle, const void *in, int ld_in, - dpct::blas_gemm::experimental::matrix_layout_ptr layout_in, void *out, int ld_out, - dpct::blas_gemm::experimental::matrix_layout_ptr layout_out) { - dpct::blas_gemm::experimental::transform_desc_ptr transform_desc = NULL; - transform_desc = new dpct::blas_gemm::experimental::transform_desc_t(dpct::library_data_t::real_float); - float alpha = 1.0f, beta = 0.0f; - dpct::blas_gemm::experimental::matrix_transform(transform_desc, &alpha, in, layout_in, &beta, NULL, NULL, out, layout_out, 0); - delete (transform_desc); -} - -// igemmlt -bool test1() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - const constexpr int m = 4; - const constexpr int n = 2; - const constexpr int k = 3; - int lda = m; - int ldb = n; - int ldc = m; - void *Adev; - void *Bdev; - void *Cdev; - Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); - Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); - Cdev = (void *)sycl::malloc_device(m * n * sizeof(int32_t), q_ct1); - - int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; - int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; - - q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); - q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, - Cdesc_col_major = NULL; - Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); - Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); - Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc); - - // Convert A and B - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col4_4r2_8c = NULL, - Cdesc_col32 = NULL; - int8_t *A_col32, *B_col4_4r2_8c; - int32_t *C_col32; - A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1); - C_col32 = (int32_t *)sycl::malloc_device(m * 32 * sizeof(std::int32_t), q_ct1); - Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); - Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32); - Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, m * 32); - dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; - dpct::blas_gemm::experimental::order_t col4_4r2_8c = dpct::blas_gemm::experimental::order_t::col4_4r2_8c; - Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c); - Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - - transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); - transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col4_4r2_8c, 8 * 32, - Bdesc_col4_4r2_8c); - - // Matmul - igemmlt(ltHandle, m, n, k, A_col32, B_col4_4r2_8c, C_col32, - nullptr, m * 32, ((n + 8 - 1) / 8) * 8 * 32, - m * 32); - - // Convert C - transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); - q_ct1.wait(); - - // Check result - int32_t Chost[m * n]; - q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int32_t)).wait(); - - bool error = false; - int32_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; - for (int i = 0; i < m * n; i++) { - if (Chost[i] != C_ref[i]) { - error = true; - break; - } - } - printf("c:\n"); - for (int i = 0; i < m * n; i++) - printf("%d, ", Chost[i]); - printf("\n"); - - if (error) { - printf("error\n"); - } else { - printf("success\n"); - } - - delete (ltHandle); - delete (Adesc_col32); - delete (Bdesc_col4_4r2_8c); - delete (Cdesc_col32); - delete (Adesc_col_major); - delete (Bdesc_col_major); - delete (Cdesc_col_major); - dpct::dpct_free(Adev, q_ct1); - dpct::dpct_free(Bdev, q_ct1); - dpct::dpct_free(Cdev, q_ct1); - - return !error; -} - -// igemmlt -bool test2() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - const constexpr int m = 4; - const constexpr int n = 2; - const constexpr int k = 3; - int lda = m; - int ldb = n; - int ldc = m; - void *Adev; - void *Bdev; - void *Cdev; - Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); - Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); - Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1); - - int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; - int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; - - q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); - q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, - Cdesc_col_major = NULL; - Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); - Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); - Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc); - - // Convert A and B - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col4_4r2_8c = NULL, - Cdesc_col32 = NULL; - int8_t *A_col32, *B_col4_4r2_8c; - int8_t *C_col32; - A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1); - C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); - Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32); - Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32); - dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; - dpct::blas_gemm::experimental::order_t col4_4r2_8c = dpct::blas_gemm::experimental::order_t::col4_4r2_8c; - Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c); - Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - - transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); - transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col4_4r2_8c, 8 * 32, - Bdesc_col4_4r2_8c); - - // Matmul - igemmlt(ltHandle, m, n, k, A_col32, B_col4_4r2_8c, C_col32, - nullptr, m * 32, ((n + 8 - 1) / 8) * 8 * 32, - m * 32); - - // Convert C - transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); - q_ct1.wait(); - - // Check result - int8_t Chost[m * n]; - q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int8_t)).wait(); - - bool error = false; - int8_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; - for (int i = 0; i < m * n; i++) { - if (Chost[i] != C_ref[i]) { - error = true; - break; - } - } - printf("c:\n"); - for (int i = 0; i < m * n; i++) - printf("%d, ", Chost[i]); - printf("\n"); - - if (error) { - printf("error\n"); - } else { - printf("success\n"); - } - - delete (ltHandle); - delete (Adesc_col32); - delete (Bdesc_col4_4r2_8c); - delete (Cdesc_col32); - delete (Adesc_col_major); - delete (Bdesc_col_major); - delete (Cdesc_col_major); - dpct::dpct_free(Adev, q_ct1); - dpct::dpct_free(Bdev, q_ct1); - dpct::dpct_free(Cdev, q_ct1); - - return !error; -} - -// igemmlt -bool test3() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - const constexpr int m = 4; - const constexpr int n = 2; - const constexpr int k = 3; - int lda = m; - int ldb = n; - int ldc = m; - void *Adev; - void *Bdev; - void *Cdev; - Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); - Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); - Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1); - - int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; - int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; - - q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); - q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, - Cdesc_col_major = NULL; - Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); - Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); - Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc); - - // Convert A and B - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col4_4r2_8c = NULL, - Cdesc_col32 = NULL; - int8_t *A_col32, *B_col4_4r2_8c; - int8_t *C_col32; - A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - B_col4_4r2_8c = (int8_t *)sycl::malloc_device(((n + 8 - 1) / 8) * 8 * 32 * sizeof(std::int8_t), q_ct1); - C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); - Bdesc_col4_4r2_8c = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 8 - 1) / 8) * 8 * 32); - Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32); - dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; - dpct::blas_gemm::experimental::order_t col4_4r2_8c = dpct::blas_gemm::experimental::order_t::col4_4r2_8c; - Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - Bdesc_col4_4r2_8c->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col4_4r2_8c); - Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - - transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); - transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col4_4r2_8c, 8 * 32, - Bdesc_col4_4r2_8c); - - float *alpha; - alpha = sycl::malloc_shared(4, q_ct1); - alpha[0] = 0; - alpha[1] = 1; - alpha[2] = 2; - alpha[3] = 3; - - // Matmul - igemmlt(ltHandle, m, n, k, A_col32, B_col4_4r2_8c, C_col32, - alpha, m * 32, ((n + 8 - 1) / 8) * 8 * 32, m * 32); - - // Convert C - transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); - q_ct1.wait(); - - // Check result - int8_t Chost[m * n]; - q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int8_t)).wait(); - - bool error = false; - int8_t C_ref[m * n] = {0, 17, 40, 69, 0, 6, 16, 30}; - for (int i = 0; i < m * n; i++) { - if (Chost[i] != C_ref[i]) { - error = true; - break; - } - } - printf("c:\n"); - for (int i = 0; i < m * n; i++) - printf("%d, ", Chost[i]); - printf("\n"); - - if (error) { - printf("error\n"); - } else { - printf("success\n"); - } - - delete (ltHandle); - delete (Adesc_col32); - delete (Bdesc_col4_4r2_8c); - delete (Cdesc_col32); - delete (Adesc_col_major); - delete (Bdesc_col_major); - delete (Cdesc_col_major); - dpct::dpct_free(Adev, q_ct1); - dpct::dpct_free(Bdev, q_ct1); - dpct::dpct_free(Cdev, q_ct1); - dpct::dpct_free(alpha, q_ct1); - - return !error; -} - -// igemmlt -bool test4() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - const constexpr int m = 4; - const constexpr int n = 2; - const constexpr int k = 3; - int lda = m; - int ldb = n; - int ldc = m; - void *Adev; - void *Bdev; - void *Cdev; - Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); - Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); - Cdev = (void *)sycl::malloc_device(m * n * sizeof(int32_t), q_ct1); - - int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; - int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; - - q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); - q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, - Cdesc_col_major = NULL; - Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); - Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); - Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, ldc); - - // Convert A and B - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col32_2r_4r4 = NULL, - Cdesc_col32 = NULL; - int8_t *A_col32, *B_col32_2r_4r4; - int32_t *C_col32; - A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1); - C_col32 = (int32_t *)sycl::malloc_device(m * 32 * sizeof(std::int32_t), q_ct1); - Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); - Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32); - Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int32, m, n, m * 32); - dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; - dpct::blas_gemm::experimental::order_t col32_2r_4r4 = dpct::blas_gemm::experimental::order_t::col32_2r_4r4; - Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4); - Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - - transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); - transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col32_2r_4r4, 8 * 32, - Bdesc_col32_2r_4r4); - - // Matmul - igemmlt(ltHandle, m, n, k, A_col32, B_col32_2r_4r4, - C_col32, nullptr, m * 32, - ((n + 8 - 1) / 8) * 8 * 32, m * 32); - - // Convert C - transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); - q_ct1.wait(); - - // Check result - int32_t Chost[m * n]; - q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int32_t)).wait(); - - bool error = false; - int32_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; - for (int i = 0; i < m * n; i++) { - if (Chost[i] != C_ref[i]) { - error = true; - break; - } - } - printf("c:\n"); - for (int i = 0; i < m * n; i++) - printf("%d, ", Chost[i]); - printf("\n"); - - if (error) { - printf("error\n"); - } else { - printf("success\n"); - } - - delete (ltHandle); - delete (Adesc_col32); - delete (Bdesc_col32_2r_4r4); - delete (Cdesc_col32); - delete (Adesc_col_major); - delete (Bdesc_col_major); - delete (Cdesc_col_major); - dpct::dpct_free(Adev, q_ct1); - dpct::dpct_free(Bdev, q_ct1); - dpct::dpct_free(Cdev, q_ct1); - - return !error; -} - -// igemmlt -bool test5() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - const constexpr int m = 4; - const constexpr int n = 2; - const constexpr int k = 3; - int lda = m; - int ldb = n; - int ldc = m; - void *Adev; - void *Bdev; - void *Cdev; - Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); - Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); - Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1); - - int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; - int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; - - q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); - q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, - Cdesc_col_major = NULL; - Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); - Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); - Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc); - - // Convert A and B - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col32_2r_4r4 = NULL, - Cdesc_col32 = NULL; - int8_t *A_col32, *B_col32_2r_4r4; - int8_t *C_col32; - A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1); - C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); - Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32); - Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32); - dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; - dpct::blas_gemm::experimental::order_t col32_2r_4r4 = dpct::blas_gemm::experimental::order_t::col32_2r_4r4; - Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4); - Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - - transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); - transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col32_2r_4r4, 8 * 32, - Bdesc_col32_2r_4r4); - - // Matmul - igemmlt(ltHandle, m, n, k, A_col32, B_col32_2r_4r4, C_col32, - nullptr, m * 32, ((n + 8 - 1) / 8) * 8 * 32, - m * 32); - - // Convert C - transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); - q_ct1.wait(); - - // Check result - int8_t Chost[m * n]; - q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int8_t)).wait(); - - bool error = false; - int8_t C_ref[m * n] = {14, 17, 20, 23, 4, 6, 8, 10}; - for (int i = 0; i < m * n; i++) { - if (Chost[i] != C_ref[i]) { - error = true; - break; - } - } - printf("c:\n"); - for (int i = 0; i < m * n; i++) - printf("%d, ", Chost[i]); - printf("\n"); - - if (error) { - printf("error\n"); - } else { - printf("success\n"); - } - - delete (ltHandle); - delete (Adesc_col32); - delete (Bdesc_col32_2r_4r4); - delete (Cdesc_col32); - delete (Adesc_col_major); - delete (Bdesc_col_major); - delete (Cdesc_col_major); - dpct::dpct_free(Adev, q_ct1); - dpct::dpct_free(Bdev, q_ct1); - dpct::dpct_free(Cdev, q_ct1); - - return !error; -} - -// igemmlt -bool test6() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - const constexpr int m = 4; - const constexpr int n = 2; - const constexpr int k = 3; - int lda = m; - int ldb = n; - int ldc = m; - void *Adev; - void *Bdev; - void *Cdev; - Adev = (void *)sycl::malloc_device(m * k * sizeof(int8_t), q_ct1); - Bdev = (void *)sycl::malloc_device(n * k * sizeof(int8_t), q_ct1); - Cdev = (void *)sycl::malloc_device(m * n * sizeof(int8_t), q_ct1); - - int8_t Ahost[m * k] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; - int8_t Bhost[n * k] = {5, 4, -3, -2, 1, 0}; - - q_ct1.memcpy(Adev, Ahost, m * k * sizeof(int8_t)); - q_ct1.memcpy(Bdev, Bhost, n * k * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col_major = NULL, Bdesc_col_major = NULL, - Cdesc_col_major = NULL; - Adesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, lda); - Bdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, n, k, ldb); - Cdesc_col_major = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, ldc); - - // Convert A and B - dpct::blas_gemm::experimental::matrix_layout_ptr Adesc_col32 = NULL, Bdesc_col32_2r_4r4 = NULL, - Cdesc_col32 = NULL; - int8_t *A_col32, *B_col32_2r_4r4; - int8_t *C_col32; - A_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - B_col32_2r_4r4 = (int8_t *)sycl::malloc_device(((n + 32 - 1) / 32) * 32 * 32 * sizeof(std::int8_t), q_ct1); - C_col32 = (int8_t *)sycl::malloc_device(m * 32 * sizeof(std::int8_t), q_ct1); - Adesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, k, m * 32); - Bdesc_col32_2r_4r4 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, k, n, ((n + 32 - 1) / 32) * 32 * 32); - Cdesc_col32 = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, m, n, m * 32); - dpct::blas_gemm::experimental::order_t col32 = dpct::blas_gemm::experimental::order_t::col32; - dpct::blas_gemm::experimental::order_t col32_2r_4r4 = dpct::blas_gemm::experimental::order_t::col32_2r_4r4; - Adesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - Bdesc_col32_2r_4r4->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32_2r_4r4); - Cdesc_col32->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &col32); - - transform(ltHandle, Adev, lda, Adesc_col_major, A_col32, m * 32, Adesc_col32); - transform(ltHandle, Bdev, ldb, Bdesc_col_major, B_col32_2r_4r4, 8 * 32, - Bdesc_col32_2r_4r4); - - float *alpha; - alpha = sycl::malloc_shared(4, q_ct1); - alpha[0] = 0; - alpha[1] = 1; - alpha[2] = 2; - alpha[3] = 3; - - // Matmul - igemmlt(ltHandle, m, n, k, A_col32, B_col32_2r_4r4, C_col32, - alpha, m * 32, ((n + 8 - 1) / 8) * 8 * 32, m * 32); - - // Convert C - transform(ltHandle, C_col32, m * 32, Cdesc_col32, Cdev, ldc, Cdesc_col_major); - q_ct1.wait(); - - // Check result - int8_t Chost[m * n]; - q_ct1.memcpy(Chost, Cdev, m * n * sizeof(int8_t)).wait(); - - bool error = false; - int8_t C_ref[m * n] = {0, 17, 40, 69, 0, 6, 16, 30}; - for (int i = 0; i < m * n; i++) { - if (Chost[i] != C_ref[i]) { - error = true; - break; - } - } - printf("c:\n"); - for (int i = 0; i < m * n; i++) - printf("%d, ", Chost[i]); - printf("\n"); - - if (error) { - printf("error\n"); - } else { - printf("success\n"); - } - - delete (ltHandle); - delete (Adesc_col32); - delete (Bdesc_col32_2r_4r4); - delete (Cdesc_col32); - delete (Adesc_col_major); - delete (Bdesc_col_major); - delete (Cdesc_col_major); - dpct::dpct_free(Adev, q_ct1); - dpct::dpct_free(Bdev, q_ct1); - dpct::dpct_free(Cdev, q_ct1); - dpct::dpct_free(alpha, q_ct1); - - return !error; -} - -// clang-format off -// A (4*3) B (2*3) -// 6 10 14 5 -3 1 -// 7 11 15 4 -2 0 -// 8 12 16 -// 9 13 17 -// -// alpha * A * op(B) = alpha * C = C -// 0 6 10 14 5 4 0 14 4 0 0 -// 1 7 11 15 -3 -2 1 17 6 17 6 -// 2 8 12 16 1 0 2 20 8 40 16 -// 3 9 13 17 3 23 10 69 30 -// -// alpha * A * op(B) = alpha * C = C -// 1 6 10 14 5 4 1 14 4 14 4 -// 7 11 15 -3 -2 17 6 17 6 -// 8 12 16 1 0 20 8 20 8 -// 9 13 17 23 10 23 10 -// clang-format on - -int main() { - bool pass = true; - pass = test1() && pass; - pass = test2() && pass; - pass = test3() && pass; - pass = test4() && pass; - pass = test5() && pass; - pass = test6() && pass; - return pass ? 0 : 1; -} diff --git a/features/feature_case/cublasLt/out/transform.dp.cpp b/features/feature_case/cublasLt/out/transform.dp.cpp deleted file mode 100644 index 9d9a0717..00000000 --- a/features/feature_case/cublasLt/out/transform.dp.cpp +++ /dev/null @@ -1,597 +0,0 @@ -// ===------------ transform.cu -------------------------- *- CUDA -* ----=== // -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// ===--------------------------------------------------------------------=== // - -#include -#include -#include -#include -#include - - -void transform(dpct::blas_gemm::experimental::descriptor_ptr ltHandle, void *in, int ld_in, - dpct::blas_gemm::experimental::order_t order_in, void *out, int ld_out, - dpct::blas_gemm::experimental::order_t order_out, int dim1, int dim2) { - dpct::blas_gemm::experimental::matrix_layout_ptr in_desc = NULL, out_desc = NULL; - dpct::blas_gemm::experimental::transform_desc_ptr transform_desc = NULL; - - in_desc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, dim1, dim2, ld_in); - out_desc = new dpct::blas_gemm::experimental::matrix_layout_t(dpct::library_data_t::real_int8, dim1, dim2, ld_out); - - in_desc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &order_in); - out_desc->set_attribute(dpct::blas_gemm::experimental::matrix_layout_t::attribute::order, &order_out); - - transform_desc = new dpct::blas_gemm::experimental::transform_desc_t(dpct::library_data_t::real_float); - - float alpha = 1.0f, beta = 0.0f; - dpct::blas_gemm::experimental::matrix_transform(transform_desc, &alpha, in, in_desc, &beta, NULL, NULL, out, out_desc, 0); - - delete (in_desc); - delete (out_desc); - delete (transform_desc); -} - -bool test_ROW() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - const constexpr int m = 2; - const constexpr int n = 33; - const constexpr int in_ld = 4; - void *in_dev; - in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1); - - int8_t in_host[n * in_ld]; - int8_t value = 0; - for (int i = 0; i < n * in_ld; i++) { - if (i % 4 < 2) { - in_host[i] = value; - value++; - } else - in_host[i] = 99; - } - int8_t ref_2nd[n * in_ld]; - std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); - - q_ct1.memcpy(in_dev, in_host, n * in_ld * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - - void *out_dev; - const constexpr int out_ld = 36; - out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1); - q_ct1.memset(out_dev, 0, out_ld * m * sizeof(int8_t)).wait(); - transform(ltHandle, in_dev, in_ld, dpct::blas_gemm::experimental::order_t::col, out_dev, out_ld, - dpct::blas_gemm::experimental::order_t::row, m, n); - - int8_t out_host[out_ld * m]; - q_ct1.memcpy(out_host, out_dev, out_ld * m * sizeof(int8_t)).wait(); - - bool pass_1st = true; - int8_t ref_1st[out_ld * m] = - {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 0, 0, 0, - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 0, 0, 0}; - for (int i = 0; i < out_ld * m; i++) { - if (i % out_ld < n) { - if (out_host[i] != ref_1st[i]) { - pass_1st = false; - break; - } - } - } - - for (int i = 0; i < out_ld * m; i++) { - printf("%d, ", out_host[i]); - } - printf("\n"); - if (pass_1st) { - printf("ROW 1st pass\n"); - } else { - printf("ROW 1st fail\n"); - } - - q_ct1.memset(in_dev, 0, n * in_ld * sizeof(int8_t)).wait(); - std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); - transform(ltHandle, out_dev, out_ld, dpct::blas_gemm::experimental::order_t::row, in_dev, in_ld, - dpct::blas_gemm::experimental::order_t::col, m, n); - q_ct1.memcpy(in_host, in_dev, n * in_ld * sizeof(int8_t)).wait(); - - bool pass_2nd = true; - for (int i = 0; i < n * in_ld; i++) { - if (i % in_ld < m) { - if (in_host[i] != ref_2nd[i]) { - pass_2nd = false; - break; - } - } - } - - for (int i = 0; i < n * in_ld; i++) { - printf("%d, ", in_host[i]); - } - printf("\n"); - if (pass_2nd) { - printf("ROW 2nd pass\n"); - } else { - printf("ROW 2nd fail\n"); - } - - delete (ltHandle); - - return pass_1st && pass_2nd; -} - -bool test_COL32() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - const constexpr int m = 2; - const constexpr int n = 33; - const constexpr int in_ld = 4; - void *in_dev; - in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1); - - int8_t in_host[n * in_ld]; - int8_t value = 0; - for (int i = 0; i < n * in_ld; i++) { - if (i % 4 < 2) { - in_host[i] = value; - value++; - } else - in_host[i] = 99; - } - int8_t ref_2nd[n * in_ld]; - std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); - - q_ct1.memcpy(in_dev, in_host, n * in_ld * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - - void *out_dev; - const constexpr int out_ld = 64; - out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1); - q_ct1.memset(out_dev, 0, out_ld * m * sizeof(int8_t)).wait(); - transform(ltHandle, in_dev, in_ld, dpct::blas_gemm::experimental::order_t::col, out_dev, out_ld, - dpct::blas_gemm::experimental::order_t::col32, m, n); - - int8_t out_host[out_ld * m]; - q_ct1.memcpy(out_host, out_dev, out_ld * m * sizeof(int8_t)).wait(); - - bool pass_1st = true; - int8_t ref_1st[out_ld * m] = - {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, - 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < out_ld * m; i++) { - if (i % out_ld < n) { - if (out_host[i] != ref_1st[i]) { - pass_1st = false; - break; - } - } - } - - for (int i = 0; i < out_ld * m; i++) { - printf("%d, ", out_host[i]); - } - printf("\n"); - if (pass_1st) { - printf("COL32 1st pass\n"); - } else { - printf("COL32 1st fail\n"); - } - - q_ct1.memset(in_dev, 0, n * in_ld * sizeof(int8_t)).wait(); - std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); - transform(ltHandle, out_dev, out_ld, dpct::blas_gemm::experimental::order_t::col32, in_dev, in_ld, - dpct::blas_gemm::experimental::order_t::col, m, n); - q_ct1.memcpy(in_host, in_dev, n * in_ld * sizeof(int8_t)).wait(); - - bool pass_2nd = true; - for (int i = 0; i < n * in_ld; i++) { - if (i % in_ld < m) { - if (in_host[i] != ref_2nd[i]) { - pass_2nd = false; - break; - } - } - } - - for (int i = 0; i < n * in_ld; i++) { - printf("%d, ", in_host[i]); - } - printf("\n"); - if (pass_2nd) { - printf("COL32 2nd pass\n"); - } else { - printf("COL32 2nd fail\n"); - } - - delete (ltHandle); - - return pass_1st && pass_2nd; -} - -bool test_COL4_4R2_8C() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - const constexpr int m = 2; - const constexpr int n = 33; - const constexpr int in_ld = 4; - void *in_dev; - in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1); - - int8_t in_host[n * in_ld]; - int8_t value = 0; - for (int i = 0; i < n * in_ld; i++) { - if (i % 4 < 2) { - in_host[i] = value; - value++; - } else - in_host[i] = 99; - } - int8_t ref_2nd[n * in_ld]; - std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); - - q_ct1.memcpy(in_dev, in_host, n * in_ld * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - - void *out_dev; - const constexpr int out_ld = (32 * 8) * 2; - out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1); - q_ct1.memset(out_dev, 0, out_ld * m * sizeof(int8_t)).wait(); - transform(ltHandle, in_dev, in_ld, dpct::blas_gemm::experimental::order_t::col, out_dev, out_ld, - dpct::blas_gemm::experimental::order_t::col4_4r2_8c, m, n); - - int8_t out_host[out_ld * m]; - q_ct1.memcpy(out_host, out_dev, out_ld * m * sizeof(int8_t)).wait(); - - bool pass_1st = true; - int8_t ref_1st[out_ld * m] = - {0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 16, 18, 20, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 24, 26, 28, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 32, 34, 36, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 40, 42, 44, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 48, 50, 52, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 56, 58, 60, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 17, 19, 21, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 25, 27, 29, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 33, 35, 37, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 41, 43, 45, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 49, 51, 53, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 57, 59, 61, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < out_ld * m; i++) { - if (i % out_ld < n) { - if (out_host[i] != ref_1st[i]) { - pass_1st = false; - break; - } - } - } - - for (int i = 0; i < out_ld * m; i++) { - printf("%d, ", out_host[i]); - } - printf("\n"); - if (pass_1st) { - printf("COL4_4R2_8C 1st pass\n"); - } else { - printf("COL4_4R2_8C 1st fail\n"); - } - - q_ct1.memset(in_dev, 0, n * in_ld * sizeof(int8_t)).wait(); - std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); - transform(ltHandle, out_dev, out_ld, dpct::blas_gemm::experimental::order_t::col4_4r2_8c, in_dev, - in_ld, dpct::blas_gemm::experimental::order_t::col, m, n); - q_ct1.memcpy(in_host, in_dev, n * in_ld * sizeof(int8_t)).wait(); - - bool pass_2nd = true; - for (int i = 0; i < n * in_ld; i++) { - if (i % in_ld < m) { - if (in_host[i] != ref_2nd[i]) { - pass_2nd = false; - break; - } - } - } - - for (int i = 0; i < n * in_ld; i++) { - printf("%d, ", in_host[i]); - } - printf("\n"); - if (pass_2nd) { - printf("COL4_4R2_8C 2nd pass\n"); - } else { - printf("COL4_4R2_8C 2nd fail\n"); - } - - delete (ltHandle); - - return pass_1st && pass_2nd; -} - -bool test_COL32_2R_4R4() { - dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.in_order_queue(); - const constexpr int m = 2; - const constexpr int n = 33; - const constexpr int in_ld = 4; - void *in_dev; - in_dev = (void *)sycl::malloc_device(n * in_ld * sizeof(int8_t), q_ct1); - - int8_t in_host[n * in_ld]; - int8_t value = 0; - for (int i = 0; i < n * in_ld; i++) { - if (i % 4 < 2) { - in_host[i] = value; - value++; - } else - in_host[i] = 99; - } - int8_t ref_2nd[n * in_ld]; - std::memcpy(ref_2nd, in_host, n * in_ld * sizeof(int8_t)); - - q_ct1.memcpy(in_dev, in_host, n * in_ld * sizeof(int8_t)).wait(); - - dpct::blas_gemm::experimental::descriptor_ptr ltHandle; - ltHandle = new dpct::blas_gemm::experimental::descriptor(); - - void *out_dev; - const constexpr int out_ld = (32 * 32) * 2; - out_dev = (void *)sycl::malloc_device(out_ld * m * sizeof(int8_t), q_ct1); - q_ct1.memset(out_dev, 0, out_ld * m * sizeof(int8_t)).wait(); - transform(ltHandle, in_dev, in_ld, dpct::blas_gemm::experimental::order_t::col, out_dev, out_ld, - dpct::blas_gemm::experimental::order_t::col32_2r_4r4, m, n); - - int8_t out_host[out_ld * m]; - q_ct1.memcpy(out_host, out_dev, out_ld * m * sizeof(int8_t)).wait(); - - bool pass_1st = true; - int8_t ref_1st[out_ld * m] = - {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < out_ld * m; i++) { - if (i % out_ld < n) { - if (out_host[i] != ref_1st[i]) { - pass_1st = false; - break; - } - } - } - - for (int i = 0; i < out_ld * m; i++) { - printf("%d, ", out_host[i]); - } - printf("\n"); - if (pass_1st) { - printf("COL32_2R_4R4 1st pass\n"); - } else { - printf("COL32_2R_4R4 1st fail\n"); - } - - q_ct1.memset(in_dev, 0, n * in_ld * sizeof(int8_t)).wait(); - std::memset(in_host, 0, n * in_ld * sizeof(int8_t)); - transform(ltHandle, out_dev, out_ld, dpct::blas_gemm::experimental::order_t::col32_2r_4r4, in_dev, - in_ld, dpct::blas_gemm::experimental::order_t::col, m, n); - q_ct1.memcpy(in_host, in_dev, n * in_ld * sizeof(int8_t)).wait(); - - bool pass_2nd = true; - for (int i = 0; i < n * in_ld; i++) { - if (i % in_ld < m) { - if (in_host[i] != ref_2nd[i]) { - pass_2nd = false; - break; - } - } - } - - for (int i = 0; i < n * in_ld; i++) { - printf("%d, ", in_host[i]); - } - printf("\n"); - if (pass_2nd) { - printf("COL32_2R_4R4 2nd pass\n"); - } else { - printf("COL32_2R_4R4 2nd fail\n"); - } - - delete (ltHandle); - - return pass_1st && pass_2nd; -} - -// Input col_major matrix: -// 2 rows * 33 columns, ld is 4 -int main() { - bool pass = true; - pass = test_ROW() && pass; - pass = test_COL32() && pass; - pass = test_COL4_4R2_8C() && pass; - pass = test_COL32_2R_4R4() && pass; - return pass ? 0 : 1; -}