Skip to content

Commit 8478e9f

Browse files
Tvrtko Ursulinpopcornmix
Tvrtko Ursulin
authored andcommitted
mm/mempolicy: Add MPOL_RANDOM
To help work around certain memory controller limitations or similar, a random NUMA allocation memory policy is added. Signed-off-by: Tvrtko Ursulin <[email protected]>
1 parent 93fee7a commit 8478e9f

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

include/uapi/linux/mempolicy.h

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ enum {
2424
MPOL_LOCAL,
2525
MPOL_PREFERRED_MANY,
2626
MPOL_WEIGHTED_INTERLEAVE,
27+
MPOL_RANDOM,
2728
MPOL_MAX, /* always last member of enum */
2829
};
2930

mm/mempolicy.c

+71
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
* preferred many Try a set of nodes first before normal fallback. This is
4242
* similar to preferred without the special case.
4343
*
44+
* random Allocate memory from a random node out of allowed set of
45+
* nodes.
46+
*
4447
* default Allocate on the local node first, or when on a VMA
4548
* use the process policy. This is what Linux always did
4649
* in a NUMA aware kernel and still does by, ahem, default.
@@ -452,6 +455,10 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
452455
.create = mpol_new_nodemask,
453456
.rebind = mpol_rebind_nodemask,
454457
},
458+
[MPOL_RANDOM] = {
459+
.create = mpol_new_nodemask,
460+
.rebind = mpol_rebind_nodemask,
461+
},
455462
};
456463

457464
static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
@@ -900,6 +907,7 @@ static void get_policy_nodemask(struct mempolicy *pol, nodemask_t *nodes)
900907
case MPOL_PREFERRED:
901908
case MPOL_PREFERRED_MANY:
902909
case MPOL_WEIGHTED_INTERLEAVE:
910+
case MPOL_RANDOM:
903911
*nodes = pol->nodes;
904912
break;
905913
case MPOL_LOCAL:
@@ -1917,6 +1925,27 @@ static unsigned int interleave_nodes(struct mempolicy *policy)
19171925
return nid;
19181926
}
19191927

1928+
static unsigned int read_once_policy_nodemask(struct mempolicy *pol, nodemask_t *mask);
1929+
1930+
static unsigned int random_nodes(struct mempolicy *policy)
1931+
{
1932+
unsigned int nid = first_node(policy->nodes);
1933+
unsigned int cpuset_mems_cookie;
1934+
nodemask_t nodemask;
1935+
unsigned int r;
1936+
1937+
r = get_random_u32_below(read_once_policy_nodemask(policy, &nodemask));
1938+
1939+
/* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */
1940+
do {
1941+
cpuset_mems_cookie = read_mems_allowed_begin();
1942+
while (r--)
1943+
nid = next_node_in(nid, policy->nodes);
1944+
} while (read_mems_allowed_retry(cpuset_mems_cookie));
1945+
1946+
return nid;
1947+
}
1948+
19201949
/*
19211950
* Depending on the memory policy provide a node from which to allocate the
19221951
* next slab entry.
@@ -1962,6 +1991,9 @@ unsigned int mempolicy_slab_node(void)
19621991
case MPOL_LOCAL:
19631992
return node;
19641993

1994+
case MPOL_RANDOM:
1995+
return random_nodes(policy);
1996+
19651997
default:
19661998
BUG();
19671999
}
@@ -2042,6 +2074,33 @@ static unsigned int interleave_nid(struct mempolicy *pol, pgoff_t ilx)
20422074
return nid;
20432075
}
20442076

2077+
static unsigned int random_nid(struct mempolicy *pol,
2078+
struct vm_area_struct *vma,
2079+
pgoff_t ilx)
2080+
{
2081+
nodemask_t nodemask;
2082+
unsigned int r, nnodes;
2083+
int i, nid;
2084+
2085+
nnodes = read_once_policy_nodemask(pol, &nodemask);
2086+
if (!nnodes)
2087+
return numa_node_id();
2088+
2089+
/*
2090+
* QQQ
2091+
* Can we say hash of vma+ilx is sufficiently random but still
2092+
* stable in case of reliance on stable, as it appears is with
2093+
* mpol_misplaced and interleaving?
2094+
*/
2095+
r = hash_long((unsigned long)vma + ilx,
2096+
ilog2(roundup_pow_of_two(nnodes)));
2097+
2098+
nid = first_node(nodemask);
2099+
for (i = 0; i < r; i++)
2100+
nid = next_node(nid, nodemask);
2101+
return nid;
2102+
}
2103+
20452104
/*
20462105
* Return a nodemask representing a mempolicy for filtering nodes for
20472106
* page allocation, together with preferred node id (or the input node id).
@@ -2085,6 +2144,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol,
20852144
weighted_interleave_nodes(pol) :
20862145
weighted_interleave_nid(pol, ilx);
20872146
break;
2147+
case MPOL_RANDOM:
2148+
*nid = random_nodes(pol);
2149+
break;
20882150
}
20892151

20902152
return nodemask;
@@ -2153,6 +2215,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
21532215
case MPOL_BIND:
21542216
case MPOL_INTERLEAVE:
21552217
case MPOL_WEIGHTED_INTERLEAVE:
2218+
case MPOL_RANDOM:
21562219
*mask = mempolicy->nodes;
21572220
break;
21582221

@@ -2633,6 +2696,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
26332696
case MPOL_PREFERRED:
26342697
case MPOL_PREFERRED_MANY:
26352698
case MPOL_WEIGHTED_INTERLEAVE:
2699+
case MPOL_RANDOM:
26362700
return !!nodes_equal(a->nodes, b->nodes);
26372701
case MPOL_LOCAL:
26382702
return true;
@@ -2824,6 +2888,10 @@ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf,
28242888
polnid = zonelist_node_idx(z);
28252889
break;
28262890

2891+
case MPOL_RANDOM:
2892+
polnid = random_nid(pol, vma, ilx);
2893+
break;
2894+
28272895
default:
28282896
BUG();
28292897
}
@@ -3169,6 +3237,7 @@ static const char * const policy_modes[] =
31693237
[MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave",
31703238
[MPOL_LOCAL] = "local",
31713239
[MPOL_PREFERRED_MANY] = "prefer (many)",
3240+
[MPOL_RANDOM] = "random",
31723241
};
31733242

31743243
/**
@@ -3231,6 +3300,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
32313300
break;
32323301
case MPOL_INTERLEAVE:
32333302
case MPOL_WEIGHTED_INTERLEAVE:
3303+
case MPOL_RANDOM:
32343304
/*
32353305
* Default to online nodes with memory if no nodelist
32363306
*/
@@ -3375,6 +3445,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
33753445
case MPOL_BIND:
33763446
case MPOL_INTERLEAVE:
33773447
case MPOL_WEIGHTED_INTERLEAVE:
3448+
case MPOL_RANDOM:
33783449
nodes = pol->nodes;
33793450
break;
33803451
default:

0 commit comments

Comments
 (0)