Skip to content

Commit 5f66ebc

Browse files
gwenzekggerganov
andcommitted
ggml : extend ggml_get_rows, ggml_repeat, ggml_concat (ggml/639)
* add more int ops * ggml_compute_forward_dup_bytes * add tests * PR comments * tests : minor indentations --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent f2eb19b commit 5f66ebc

File tree

2 files changed

+198
-10
lines changed

2 files changed

+198
-10
lines changed

ggml.c

Lines changed: 162 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4766,8 +4766,11 @@ struct ggml_tensor * ggml_get_rows(
47664766
}
47674767

47684768
// TODO: implement non F32 return
4769-
//struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
4770-
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
4769+
enum ggml_type type = GGML_TYPE_F32;
4770+
if (a->type == GGML_TYPE_I32) {
4771+
type = a->type;
4772+
}
4773+
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
47714774

47724775
result->op = GGML_OP_GET_ROWS;
47734776
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6938,14 +6941,165 @@ static void ggml_compute_forward_dup_f32(
69386941
}
69396942
}
69406943

6941-
static void ggml_compute_forward_dup(
6944+
// A simplified version of ggml_compute_forward_dup that doesn't do float upcasting, and just plain old memcpy.
6945+
static void ggml_compute_forward_dup_bytes(
69426946
const struct ggml_compute_params * params,
69436947
const struct ggml_tensor * src0,
69446948
struct ggml_tensor * dst) {
6945-
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
6949+
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
6950+
GGML_ASSERT(src0->type == dst->type);
6951+
6952+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
6953+
return;
6954+
}
6955+
6956+
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
69466957
ggml_compute_forward_dup_same_cont(params, src0, dst);
69476958
return;
69486959
}
6960+
6961+
GGML_TENSOR_UNARY_OP_LOCALS;
6962+
6963+
const size_t type_size = ggml_type_size(src0->type);
6964+
const int ith = params->ith; // thread index
6965+
const int nth = params->nth; // number of threads
6966+
6967+
6968+
// parallelize by rows
6969+
const int nr = ne01;
6970+
// number of rows per thread
6971+
const int dr = (nr + nth - 1) / nth;
6972+
// row range for this thread
6973+
const int ir0 = dr * ith;
6974+
const int ir1 = MIN(ir0 + dr, nr);
6975+
6976+
if (src0->type == dst->type &&
6977+
ne00 == ne0 &&
6978+
nb00 == type_size && nb0 == type_size) {
6979+
// copy by rows
6980+
const size_t rs = ne00 * type_size;
6981+
for (int64_t i03 = 0; i03 < ne03; i03++) {
6982+
for (int64_t i02 = 0; i02 < ne02; i02++) {
6983+
for (int64_t i01 = ir0; i01 < ir1; i01++) {
6984+
memcpy(
6985+
((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3),
6986+
((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03),
6987+
rs);
6988+
}
6989+
}
6990+
}
6991+
return;
6992+
}
6993+
6994+
if (ggml_is_contiguous(dst)) {
6995+
size_t id = 0;
6996+
char * dst_ptr = (char *) dst->data;
6997+
const size_t rs = ne00 * type_size;
6998+
6999+
if (nb00 == type_size) {
7000+
// src0 is contigous on first dimension, copy by rows
7001+
for (int64_t i03 = 0; i03 < ne03; i03++) {
7002+
for (int64_t i02 = 0; i02 < ne02; i02++) {
7003+
id += rs * ir0;
7004+
for (int64_t i01 = ir0; i01 < ir1; i01++) {
7005+
const char * src0_ptr = (char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03;
7006+
memcpy(dst_ptr + id, src0_ptr, rs);
7007+
id += rs;
7008+
}
7009+
id += rs * (ne01 - ir1);
7010+
}
7011+
}
7012+
} else {
7013+
//printf("%s: this is not optimal - fix me\n", __func__);
7014+
7015+
for (int64_t i03 = 0; i03 < ne03; i03++) {
7016+
for (int64_t i02 = 0; i02 < ne02; i02++) {
7017+
id += rs * ir0;
7018+
for (int64_t i01 = ir0; i01 < ir1; i01++) {
7019+
for (int64_t i00 = 0; i00 < ne00; i00++) {
7020+
const char * src0_ptr = (char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03;
7021+
memcpy(dst_ptr + id, src0_ptr, type_size);
7022+
7023+
id += type_size;
7024+
}
7025+
}
7026+
id += rs * (ne01 - ir1);
7027+
}
7028+
}
7029+
}
7030+
7031+
return;
7032+
}
7033+
7034+
// dst counters
7035+
7036+
int64_t i10 = 0;
7037+
int64_t i11 = 0;
7038+
int64_t i12 = 0;
7039+
int64_t i13 = 0;
7040+
7041+
for (int64_t i03 = 0; i03 < ne03; i03++) {
7042+
for (int64_t i02 = 0; i02 < ne02; i02++) {
7043+
i10 += ne00 * ir0;
7044+
while (i10 >= ne0) {
7045+
i10 -= ne0;
7046+
if (++i11 == ne1) {
7047+
i11 = 0;
7048+
if (++i12 == ne2) {
7049+
i12 = 0;
7050+
if (++i13 == ne3) {
7051+
i13 = 0;
7052+
}
7053+
}
7054+
}
7055+
}
7056+
for (int64_t i01 = ir0; i01 < ir1; i01++) {
7057+
for (int64_t i00 = 0; i00 < ne00; i00++) {
7058+
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
7059+
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
7060+
7061+
memcpy(dst_ptr, src0_ptr, type_size);
7062+
7063+
if (++i10 == ne0) {
7064+
i10 = 0;
7065+
if (++i11 == ne1) {
7066+
i11 = 0;
7067+
if (++i12 == ne2) {
7068+
i12 = 0;
7069+
if (++i13 == ne3) {
7070+
i13 = 0;
7071+
}
7072+
}
7073+
}
7074+
}
7075+
}
7076+
}
7077+
i10 += ne00 * (ne01 - ir1);
7078+
while (i10 >= ne0) {
7079+
i10 -= ne0;
7080+
if (++i11 == ne1) {
7081+
i11 = 0;
7082+
if (++i12 == ne2) {
7083+
i12 = 0;
7084+
if (++i13 == ne3) {
7085+
i13 = 0;
7086+
}
7087+
}
7088+
}
7089+
}
7090+
}
7091+
}
7092+
}
7093+
7094+
static void ggml_compute_forward_dup(
7095+
const struct ggml_compute_params * params,
7096+
const struct ggml_tensor * src0,
7097+
struct ggml_tensor * dst) {
7098+
if (src0->type == dst->type) {
7099+
ggml_compute_forward_dup_bytes(params, src0, dst);
7100+
return;
7101+
}
7102+
69497103
switch (src0->type) {
69507104
case GGML_TYPE_F16:
69517105
{
@@ -8404,10 +8558,12 @@ static void ggml_compute_forward_repeat(
84048558
struct ggml_tensor * dst) {
84058559
switch (src0->type) {
84068560
case GGML_TYPE_F16:
8561+
case GGML_TYPE_I16:
84078562
{
84088563
ggml_compute_forward_repeat_f16(params, src0, dst);
84098564
} break;
84108565
case GGML_TYPE_F32:
8566+
case GGML_TYPE_I32:
84118567
{
84128568
ggml_compute_forward_repeat_f32(params, src0, dst);
84138569
} break;
@@ -8550,6 +8706,7 @@ static void ggml_compute_forward_concat(
85508706
struct ggml_tensor* dst) {
85518707
switch (src0->type) {
85528708
case GGML_TYPE_F32:
8709+
case GGML_TYPE_I32:
85538710
{
85548711
ggml_compute_forward_concat_f32(params, src0, src1, dst);
85558712
} break;
@@ -10674,6 +10831,7 @@ static void ggml_compute_forward_get_rows(
1067410831
ggml_compute_forward_get_rows_f16(params, src0, src1, dst);
1067510832
} break;
1067610833
case GGML_TYPE_F32:
10834+
case GGML_TYPE_I32:
1067710835
{
1067810836
ggml_compute_forward_get_rows_f32(params, src0, src1, dst);
1067910837
} break;

tests/test-backend-ops.cpp

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
5858
int64_t hist[16];
5959
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size, hist);
6060
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
61+
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
62+
// This is going to create some weird integers though.
63+
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
6164
} else {
6265
GGML_ASSERT(false);
6366
}
@@ -87,8 +90,13 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
8790
tv.push_back(*(float *) &buf[i]);
8891
} else if (t->type == GGML_TYPE_I32) {
8992
tv.push_back((float)*(int32_t *) &buf[i]);
93+
} else if (t->type == GGML_TYPE_I16) {
94+
tv.push_back((float)*(int16_t *) &buf[i]);
95+
} else if (t->type == GGML_TYPE_I8) {
96+
tv.push_back((float)*(int8_t *) &buf[i]);
9097
} else if (quantized) {
91-
tt.to_float(&buf[i], vq.data(), bs);
98+
std::vector<float> vq(ggml_blck_size(t->type));
99+
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
92100
tv.insert(tv.end(), vq.begin(), vq.end());
93101
} else {
94102
GGML_ASSERT(false);
@@ -661,17 +669,26 @@ struct test_repeat : public test_case {
661669
struct test_dup : public test_case {
662670
const ggml_type type;
663671
const std::array<int64_t, 4> ne;
672+
const std::array<int64_t, 4> permute;
673+
bool _use_permute;
664674

665675
std::string vars() override {
666-
return VARS_TO_STR2(type, ne);
676+
std::string v = VARS_TO_STR2(type, ne);
677+
if (_use_permute) v += "," + VAR_TO_STR(permute);
678+
return v;
667679
}
668680

669681
test_dup(ggml_type type = GGML_TYPE_F32,
670-
std::array<int64_t, 4> ne = {10, 10, 10, 1})
671-
: type(type), ne(ne) {}
682+
std::array<int64_t, 4> ne = {10, 10, 10, 1},
683+
std::array<int64_t, 4> permute = {0, 0, 0, 0})
684+
: type(type), ne(ne), permute(permute),
685+
_use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
672686

673687
ggml_tensor * build_graph(ggml_context * ctx) override {
674688
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
689+
if (_use_permute) {
690+
src = ggml_permute(ctx, src, permute[0], permute[1], permute[2], permute[3]);
691+
}
675692
ggml_tensor * out = ggml_dup(ctx, src);
676693
return out;
677694
}
@@ -1450,14 +1467,26 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
14501467
}
14511468
}
14521469
}
1470+
for (int b : {1, 7}) {
1471+
for (bool v : {false, true}) {
1472+
test_cases.emplace_back(new test_get_rows(GGML_TYPE_I32, 256, 5, 4, b, v));
1473+
}
1474+
}
14531475

14541476
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 1}));
14551477
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {2, 1, 1, 1}));
14561478
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 2, 1, 1}));
14571479
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 2, 1}));
14581480
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 2}));
1481+
test_cases.emplace_back(new test_repeat(GGML_TYPE_I32, {10, 10, 10, 10}, {2, 1, 1, 1}));
1482+
test_cases.emplace_back(new test_repeat(GGML_TYPE_I16, {10, 10, 10, 10}, {1, 1, 1, 2}));
14591483

1460-
test_cases.emplace_back(new test_dup());
1484+
test_cases.emplace_back(new test_dup(GGML_TYPE_F32));
1485+
test_cases.emplace_back(new test_dup(GGML_TYPE_F16));
1486+
test_cases.emplace_back(new test_dup(GGML_TYPE_I32));
1487+
test_cases.emplace_back(new test_dup(GGML_TYPE_I16));
1488+
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {0, 2, 1, 3}));
1489+
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
14611490

14621491
for (ggml_type type : all_types) {
14631492
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
@@ -1565,7 +1594,8 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
15651594

15661595
test_cases.emplace_back(new test_alibi());
15671596
test_cases.emplace_back(new test_im2col());
1568-
test_cases.emplace_back(new test_concat());
1597+
test_cases.emplace_back(new test_concat(GGML_TYPE_F32));
1598+
test_cases.emplace_back(new test_concat(GGML_TYPE_I32));
15691599

15701600
for (ggml_sort_order order : {GGML_SORT_ASC, GGML_SORT_DESC}) {
15711601
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {8, 1, 1, 1}, order));

0 commit comments

Comments
 (0)