Skip to content

Commit f1380d7

Browse files
committed
test-backend-ops : add cpy from f32 -> all types test
1 parent 54d254b commit f1380d7

File tree

2 files changed

+40
-9
lines changed

2 files changed

+40
-9
lines changed

ggml-cuda.cu

+24-1
Original file line numberDiff line numberDiff line change
@@ -9316,6 +9316,30 @@ static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_ten
93169316
return false;
93179317
}
93189318
} break;
9319+
case GGML_OP_CPY:
9320+
{
9321+
ggml_type src0_type = op->src[0]->type;
9322+
ggml_type src1_type = op->src[1]->type;
9323+
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F32) {
9324+
return true;
9325+
}
9326+
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F16) {
9327+
return true;
9328+
}
9329+
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q8_0) {
9330+
return true;
9331+
}
9332+
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q4_0) {
9333+
return true;
9334+
}
9335+
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q4_1) {
9336+
return true;
9337+
}
9338+
if (src0_type == GGML_TYPE_F16 && src1_type == GGML_TYPE_F16) {
9339+
return true;
9340+
}
9341+
return false;
9342+
} break;
93199343
case GGML_OP_NONE:
93209344
case GGML_OP_RESHAPE:
93219345
case GGML_OP_VIEW:
@@ -9331,7 +9355,6 @@ static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_ten
93319355
case GGML_OP_SCALE:
93329356
case GGML_OP_SQR:
93339357
case GGML_OP_CLAMP:
9334-
case GGML_OP_CPY:
93359358
case GGML_OP_CONT:
93369359
case GGML_OP_DIAG_MASK_INF:
93379360
case GGML_OP_SOFT_MAX:

tests/test-backend-ops.cpp

+16-8
Original file line numberDiff line numberDiff line change
@@ -70,23 +70,27 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
7070
std::vector<uint8_t> buf(ggml_nbytes(t));
7171
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
7272

73+
ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
74+
7375
// access elements by index to avoid gaps in views
7476
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
7577
for (int64_t i2 = 0; i2 < t->ne[2]; i2++) {
7678
for (int64_t i1 = 0; i1 < t->ne[1]; i1++) {
77-
for (int64_t i0 = 0; i0 < t->ne[0]; i0++) {
79+
for (int64_t i0 = 0; i0 < t->ne[0]; i0 += ggml_blck_size(t->type)) {
7880
size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0*t->nb[0];
79-
float v;
8081
if (t->type == GGML_TYPE_F16) {
81-
v = (float) ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]);
82+
tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]));
8283
} else if (t->type == GGML_TYPE_F32) {
83-
v = *(float *) &buf[i];
84+
tv.push_back(*(float *) &buf[i]);
8485
} else if (t->type == GGML_TYPE_I32) {
85-
v = *(int32_t *) &buf[i];
86+
tv.push_back((float)*(int32_t *) &buf[i]);
87+
} else if (ggml_is_quantized(t->type)) {
88+
std::vector<float> vq(ggml_blck_size(t->type));
89+
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
90+
tv.insert(tv.end(), vq.begin(), vq.end());
8691
} else {
8792
GGML_ASSERT(false);
8893
}
89-
tv.push_back(v);
9094
}
9195
}
9296
}
@@ -320,7 +324,7 @@ struct test_case {
320324
for (size_t i = 0; i < f1.size(); i++) {
321325
// check for nans
322326
if (std::isnan(f1[i]) || std::isnan(f2[i])) {
323-
printf("[%s] NaN at index %zu ", ggml_op_desc(t1), i);
327+
printf("[%s] NaN at index %zu (%f %f) ", ggml_op_desc(t1), i, f1[i], f2[i]);
324328
ud->ok = false;
325329
return true;
326330
}
@@ -1253,7 +1257,11 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
12531257
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 2}));
12541258

12551259
test_cases.emplace_back(new test_dup());
1256-
test_cases.emplace_back(new test_cpy());
1260+
1261+
for (ggml_type type : all_types) {
1262+
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 100, 100, 1}));
1263+
}
1264+
12571265
test_cases.emplace_back(new test_cont());
12581266

12591267
auto add_test_bin_bcast = [&](ggml_type type, std::array<int64_t, 4> ne, std::array<int, 4> nr) {

0 commit comments

Comments
 (0)