|
41 | 41 | * preferred many Try a set of nodes first before normal fallback. This is
|
42 | 42 | * similar to preferred without the special case.
|
43 | 43 | *
|
| 44 | + * random Allocate memory from a random node out of allowed set of |
| 45 | + * nodes. |
| 46 | + * |
44 | 47 | * default Allocate on the local node first, or when on a VMA
|
45 | 48 | * use the process policy. This is what Linux always did
|
46 | 49 | * in a NUMA aware kernel and still does by, ahem, default.
|
@@ -452,6 +455,10 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
|
452 | 455 | .create = mpol_new_nodemask,
|
453 | 456 | .rebind = mpol_rebind_nodemask,
|
454 | 457 | },
|
| 458 | + [MPOL_RANDOM] = { |
| 459 | + .create = mpol_new_nodemask, |
| 460 | + .rebind = mpol_rebind_nodemask, |
| 461 | + }, |
455 | 462 | };
|
456 | 463 |
|
457 | 464 | static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
|
@@ -900,6 +907,7 @@ static void get_policy_nodemask(struct mempolicy *pol, nodemask_t *nodes)
|
900 | 907 | case MPOL_PREFERRED:
|
901 | 908 | case MPOL_PREFERRED_MANY:
|
902 | 909 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 910 | + case MPOL_RANDOM: |
903 | 911 | *nodes = pol->nodes;
|
904 | 912 | break;
|
905 | 913 | case MPOL_LOCAL:
|
@@ -1917,6 +1925,27 @@ static unsigned int interleave_nodes(struct mempolicy *policy)
|
1917 | 1925 | return nid;
|
1918 | 1926 | }
|
1919 | 1927 |
|
| 1928 | +static unsigned int read_once_policy_nodemask(struct mempolicy *pol, nodemask_t *mask); |
| 1929 | + |
| 1930 | +static unsigned int random_nodes(struct mempolicy *policy) |
| 1931 | +{ |
| 1932 | + unsigned int nid = first_node(policy->nodes); |
| 1933 | + unsigned int cpuset_mems_cookie; |
| 1934 | + nodemask_t nodemask; |
| 1935 | + unsigned int r; |
| 1936 | + |
| 1937 | + r = get_random_u32_below(read_once_policy_nodemask(policy, &nodemask)); |
| 1938 | + |
| 1939 | + /* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */ |
| 1940 | + do { |
| 1941 | + cpuset_mems_cookie = read_mems_allowed_begin(); |
| 1942 | + while (r--) |
| 1943 | + nid = next_node_in(nid, policy->nodes); |
| 1944 | + } while (read_mems_allowed_retry(cpuset_mems_cookie)); |
| 1945 | + |
| 1946 | + return nid; |
| 1947 | +} |
| 1948 | + |
1920 | 1949 | /*
|
1921 | 1950 | * Depending on the memory policy provide a node from which to allocate the
|
1922 | 1951 | * next slab entry.
|
@@ -1962,6 +1991,9 @@ unsigned int mempolicy_slab_node(void)
|
1962 | 1991 | case MPOL_LOCAL:
|
1963 | 1992 | return node;
|
1964 | 1993 |
|
| 1994 | + case MPOL_RANDOM: |
| 1995 | + return random_nodes(policy); |
| 1996 | + |
1965 | 1997 | default:
|
1966 | 1998 | BUG();
|
1967 | 1999 | }
|
@@ -2042,6 +2074,33 @@ static unsigned int interleave_nid(struct mempolicy *pol, pgoff_t ilx)
|
2042 | 2074 | return nid;
|
2043 | 2075 | }
|
2044 | 2076 |
|
| 2077 | +static unsigned int random_nid(struct mempolicy *pol, |
| 2078 | + struct vm_area_struct *vma, |
| 2079 | + pgoff_t ilx) |
| 2080 | +{ |
| 2081 | + nodemask_t nodemask; |
| 2082 | + unsigned int r, nnodes; |
| 2083 | + int i, nid; |
| 2084 | + |
| 2085 | + nnodes = read_once_policy_nodemask(pol, &nodemask); |
| 2086 | + if (!nnodes) |
| 2087 | + return numa_node_id(); |
| 2088 | + |
| 2089 | + /* |
| 2090 | + * QQQ |
| 2091 | + * Can we say hash of vma+ilx is sufficiently random but still |
| 2092 | + * stable in case of reliance on stable, as it appears is with |
| 2093 | + * mpol_misplaced and interleaving? |
| 2094 | + */ |
| 2095 | + r = hash_long((unsigned long)vma + ilx, |
| 2096 | + ilog2(roundup_pow_of_two(nnodes))); |
| 2097 | + |
| 2098 | + nid = first_node(nodemask); |
| 2099 | + for (i = 0; i < r; i++) |
| 2100 | + nid = next_node(nid, nodemask); |
| 2101 | + return nid; |
| 2102 | +} |
| 2103 | + |
2045 | 2104 | /*
|
2046 | 2105 | * Return a nodemask representing a mempolicy for filtering nodes for
|
2047 | 2106 | * page allocation, together with preferred node id (or the input node id).
|
@@ -2085,6 +2144,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol,
|
2085 | 2144 | weighted_interleave_nodes(pol) :
|
2086 | 2145 | weighted_interleave_nid(pol, ilx);
|
2087 | 2146 | break;
|
| 2147 | + case MPOL_RANDOM: |
| 2148 | + *nid = random_nodes(pol); |
| 2149 | + break; |
2088 | 2150 | }
|
2089 | 2151 |
|
2090 | 2152 | return nodemask;
|
@@ -2153,6 +2215,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
|
2153 | 2215 | case MPOL_BIND:
|
2154 | 2216 | case MPOL_INTERLEAVE:
|
2155 | 2217 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 2218 | + case MPOL_RANDOM: |
2156 | 2219 | *mask = mempolicy->nodes;
|
2157 | 2220 | break;
|
2158 | 2221 |
|
@@ -2648,6 +2711,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
|
2648 | 2711 | case MPOL_PREFERRED:
|
2649 | 2712 | case MPOL_PREFERRED_MANY:
|
2650 | 2713 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 2714 | + case MPOL_RANDOM: |
2651 | 2715 | return !!nodes_equal(a->nodes, b->nodes);
|
2652 | 2716 | case MPOL_LOCAL:
|
2653 | 2717 | return true;
|
@@ -2839,6 +2903,10 @@ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf,
|
2839 | 2903 | polnid = zonelist_node_idx(z);
|
2840 | 2904 | break;
|
2841 | 2905 |
|
| 2906 | + case MPOL_RANDOM: |
| 2907 | + polnid = random_nid(pol, vma, ilx); |
| 2908 | + break; |
| 2909 | + |
2842 | 2910 | default:
|
2843 | 2911 | BUG();
|
2844 | 2912 | }
|
@@ -3184,6 +3252,7 @@ static const char * const policy_modes[] =
|
3184 | 3252 | [MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave",
|
3185 | 3253 | [MPOL_LOCAL] = "local",
|
3186 | 3254 | [MPOL_PREFERRED_MANY] = "prefer (many)",
|
| 3255 | + [MPOL_RANDOM] = "random", |
3187 | 3256 | };
|
3188 | 3257 |
|
3189 | 3258 | /**
|
@@ -3246,6 +3315,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
|
3246 | 3315 | break;
|
3247 | 3316 | case MPOL_INTERLEAVE:
|
3248 | 3317 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 3318 | + case MPOL_RANDOM: |
3249 | 3319 | /*
|
3250 | 3320 | * Default to online nodes with memory if no nodelist
|
3251 | 3321 | */
|
@@ -3390,6 +3460,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
3390 | 3460 | case MPOL_BIND:
|
3391 | 3461 | case MPOL_INTERLEAVE:
|
3392 | 3462 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 3463 | + case MPOL_RANDOM: |
3393 | 3464 | nodes = pol->nodes;
|
3394 | 3465 | break;
|
3395 | 3466 | default:
|
|
0 commit comments