Skip to content

Commit 704f15d

Browse files
jessegrosstorvalds
authored andcommitted
flex_array: avoid divisions when accessing elements
On most architectures division is an expensive operation and accessing an element currently requires four of them. This performance penalty effectively precludes flex arrays from being used on any kind of fast path. However, two of these divisions can be handled at creation time and the others can be replaced by a reciprocal divide, completely avoiding real divisions on access. [[email protected]: rebase on top of changes to support 0 len elements] [[email protected]: initialize part_nr when array fits entirely in base] Signed-off-by: Jesse Gross <[email protected]> Signed-off-by: Eric Paris <[email protected]> Cc: Dave Hansen <[email protected]> Cc: David Rientjes <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 5bf54a9 commit 704f15d

File tree

2 files changed

+31
-22
lines changed

2 files changed

+31
-22
lines changed

include/linux/flex_array.h

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ struct flex_array {
2121
struct {
2222
int element_size;
2323
int total_nr_elements;
24+
int elems_per_part;
25+
u32 reciprocal_elems;
2426
struct flex_array_part *parts[];
2527
};
2628
/*

lib/flex_array.c

+29-22
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/slab.h>
2525
#include <linux/stddef.h>
2626
#include <linux/module.h>
27+
#include <linux/reciprocal_div.h>
2728

2829
struct flex_array_part {
2930
char elements[FLEX_ARRAY_PART_SIZE];
@@ -70,15 +71,15 @@ static inline int elements_fit_in_base(struct flex_array *fa)
7071
* Element size | Objects | Objects |
7172
* PAGE_SIZE=4k | 32-bit | 64-bit |
7273
* ---------------------------------|
73-
* 1 bytes | 4186112 | 2093056 |
74-
* 2 bytes | 2093056 | 1046528 |
75-
* 3 bytes | 1395030 | 697515 |
76-
* 4 bytes | 1046528 | 523264 |
77-
* 32 bytes | 130816 | 65408 |
78-
* 33 bytes | 126728 | 63364 |
79-
* 2048 bytes | 2044 | 1022 |
80-
* 2049 bytes | 1022 | 511 |
81-
* void * | 1046528 | 261632 |
74+
* 1 bytes | 4177920 | 2088960 |
75+
* 2 bytes | 2088960 | 1044480 |
76+
* 3 bytes | 1392300 | 696150 |
77+
* 4 bytes | 1044480 | 522240 |
78+
* 32 bytes | 130560 | 65408 |
79+
* 33 bytes | 126480 | 63240 |
80+
* 2048 bytes | 2040 | 1020 |
81+
* 2049 bytes | 1020 | 510 |
82+
* void * | 1044480 | 261120 |
8283
*
8384
* Since 64-bit pointers are twice the size, we lose half the
8485
* capacity in the base structure. Also note that no effort is made
@@ -88,11 +89,15 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
8889
gfp_t flags)
8990
{
9091
struct flex_array *ret;
92+
int elems_per_part = 0;
93+
int reciprocal_elems = 0;
9194
int max_size = 0;
9295

93-
if (element_size)
94-
max_size = FLEX_ARRAY_NR_BASE_PTRS *
95-
FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
96+
if (element_size) {
97+
elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
98+
reciprocal_elems = reciprocal_value(elems_per_part);
99+
max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part;
100+
}
96101

97102
/* max_size will end up 0 if element_size > PAGE_SIZE */
98103
if (total > max_size)
@@ -102,6 +107,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
102107
return NULL;
103108
ret->element_size = element_size;
104109
ret->total_nr_elements = total;
110+
ret->elems_per_part = elems_per_part;
111+
ret->reciprocal_elems = reciprocal_elems;
105112
if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
106113
memset(&ret->parts[0], FLEX_ARRAY_FREE,
107114
FLEX_ARRAY_BASE_BYTES_LEFT);
@@ -112,7 +119,7 @@ EXPORT_SYMBOL(flex_array_alloc);
112119
static int fa_element_to_part_nr(struct flex_array *fa,
113120
unsigned int element_nr)
114121
{
115-
return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
122+
return reciprocal_divide(element_nr, fa->reciprocal_elems);
116123
}
117124

118125
/**
@@ -141,12 +148,12 @@ void flex_array_free(struct flex_array *fa)
141148
EXPORT_SYMBOL(flex_array_free);
142149

143150
static unsigned int index_inside_part(struct flex_array *fa,
144-
unsigned int element_nr)
151+
unsigned int element_nr,
152+
unsigned int part_nr)
145153
{
146154
unsigned int part_offset;
147155

148-
part_offset = element_nr %
149-
FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
156+
part_offset = element_nr - part_nr * fa->elems_per_part;
150157
return part_offset * fa->element_size;
151158
}
152159

@@ -186,7 +193,7 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
186193
int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
187194
gfp_t flags)
188195
{
189-
int part_nr;
196+
int part_nr = 0;
190197
struct flex_array_part *part;
191198
void *dst;
192199

@@ -202,7 +209,7 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
202209
if (!part)
203210
return -ENOMEM;
204211
}
205-
dst = &part->elements[index_inside_part(fa, element_nr)];
212+
dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
206213
memcpy(dst, src, fa->element_size);
207214
return 0;
208215
}
@@ -217,7 +224,7 @@ EXPORT_SYMBOL(flex_array_put);
217224
*/
218225
int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
219226
{
220-
int part_nr;
227+
int part_nr = 0;
221228
struct flex_array_part *part;
222229
void *dst;
223230

@@ -233,7 +240,7 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
233240
if (!part)
234241
return -EINVAL;
235242
}
236-
dst = &part->elements[index_inside_part(fa, element_nr)];
243+
dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
237244
memset(dst, FLEX_ARRAY_FREE, fa->element_size);
238245
return 0;
239246
}
@@ -302,7 +309,7 @@ EXPORT_SYMBOL(flex_array_prealloc);
302309
*/
303310
void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
304311
{
305-
int part_nr;
312+
int part_nr = 0;
306313
struct flex_array_part *part;
307314

308315
if (!fa->element_size)
@@ -317,7 +324,7 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
317324
if (!part)
318325
return NULL;
319326
}
320-
return &part->elements[index_inside_part(fa, element_nr)];
327+
return &part->elements[index_inside_part(fa, element_nr, part_nr)];
321328
}
322329
EXPORT_SYMBOL(flex_array_get);
323330

0 commit comments

Comments
 (0)