|
41 | 41 | * preferred many Try a set of nodes first before normal fallback. This is
|
42 | 42 | * similar to preferred without the special case.
|
43 | 43 | *
|
| 44 | + * random Allocate memory from a random node out of allowed set of |
| 45 | + * nodes. |
| 46 | + * |
44 | 47 | * default Allocate on the local node first, or when on a VMA
|
45 | 48 | * use the process policy. This is what Linux always did
|
46 | 49 | * in a NUMA aware kernel and still does by, ahem, default.
|
@@ -452,6 +455,10 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
|
452 | 455 | .create = mpol_new_nodemask,
|
453 | 456 | .rebind = mpol_rebind_nodemask,
|
454 | 457 | },
|
| 458 | + [MPOL_RANDOM] = { |
| 459 | + .create = mpol_new_nodemask, |
| 460 | + .rebind = mpol_rebind_nodemask, |
| 461 | + }, |
455 | 462 | };
|
456 | 463 |
|
457 | 464 | static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
|
@@ -900,6 +907,7 @@ static void get_policy_nodemask(struct mempolicy *pol, nodemask_t *nodes)
|
900 | 907 | case MPOL_PREFERRED:
|
901 | 908 | case MPOL_PREFERRED_MANY:
|
902 | 909 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 910 | + case MPOL_RANDOM: |
903 | 911 | *nodes = pol->nodes;
|
904 | 912 | break;
|
905 | 913 | case MPOL_LOCAL:
|
@@ -1917,6 +1925,27 @@ static unsigned int interleave_nodes(struct mempolicy *policy)
|
1917 | 1925 | return nid;
|
1918 | 1926 | }
|
1919 | 1927 |
|
| 1928 | +static unsigned int read_once_policy_nodemask(struct mempolicy *pol, nodemask_t *mask); |
| 1929 | + |
| 1930 | +static unsigned int random_nodes(struct mempolicy *policy) |
| 1931 | +{ |
| 1932 | + unsigned int nid = first_node(policy->nodes); |
| 1933 | + unsigned int cpuset_mems_cookie; |
| 1934 | + nodemask_t nodemask; |
| 1935 | + unsigned int r; |
| 1936 | + |
| 1937 | + r = get_random_u32_below(read_once_policy_nodemask(policy, &nodemask)); |
| 1938 | + |
| 1939 | + /* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */ |
| 1940 | + do { |
| 1941 | + cpuset_mems_cookie = read_mems_allowed_begin(); |
| 1942 | + while (r--) |
| 1943 | + nid = next_node_in(nid, policy->nodes); |
| 1944 | + } while (read_mems_allowed_retry(cpuset_mems_cookie)); |
| 1945 | + |
| 1946 | + return nid; |
| 1947 | +} |
| 1948 | + |
1920 | 1949 | /*
|
1921 | 1950 | * Depending on the memory policy provide a node from which to allocate the
|
1922 | 1951 | * next slab entry.
|
@@ -1962,6 +1991,9 @@ unsigned int mempolicy_slab_node(void)
|
1962 | 1991 | case MPOL_LOCAL:
|
1963 | 1992 | return node;
|
1964 | 1993 |
|
| 1994 | + case MPOL_RANDOM: |
| 1995 | + return random_nodes(policy); |
| 1996 | + |
1965 | 1997 | default:
|
1966 | 1998 | BUG();
|
1967 | 1999 | }
|
@@ -2042,6 +2074,33 @@ static unsigned int interleave_nid(struct mempolicy *pol, pgoff_t ilx)
|
2042 | 2074 | return nid;
|
2043 | 2075 | }
|
2044 | 2076 |
|
| 2077 | +static unsigned int random_nid(struct mempolicy *pol, |
| 2078 | + struct vm_area_struct *vma, |
| 2079 | + pgoff_t ilx) |
| 2080 | +{ |
| 2081 | + nodemask_t nodemask; |
| 2082 | + unsigned int r, nnodes; |
| 2083 | + int i, nid; |
| 2084 | + |
| 2085 | + nnodes = read_once_policy_nodemask(pol, &nodemask); |
| 2086 | + if (!nnodes) |
| 2087 | + return numa_node_id(); |
| 2088 | + |
| 2089 | + /* |
| 2090 | + * QQQ |
| 2091 | + * Can we say hash of vma+ilx is sufficiently random but still |
| 2092 | + * stable in case of reliance on stable, as it appears is with |
| 2093 | + * mpol_misplaced and interleaving? |
| 2094 | + */ |
| 2095 | + r = hash_long((unsigned long)vma + ilx, |
| 2096 | + ilog2(roundup_pow_of_two(nnodes))); |
| 2097 | + |
| 2098 | + nid = first_node(nodemask); |
| 2099 | + for (i = 0; i < r; i++) |
| 2100 | + nid = next_node(nid, nodemask); |
| 2101 | + return nid; |
| 2102 | +} |
| 2103 | + |
2045 | 2104 | /*
|
2046 | 2105 | * Return a nodemask representing a mempolicy for filtering nodes for
|
2047 | 2106 | * page allocation, together with preferred node id (or the input node id).
|
@@ -2085,6 +2144,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol,
|
2085 | 2144 | weighted_interleave_nodes(pol) :
|
2086 | 2145 | weighted_interleave_nid(pol, ilx);
|
2087 | 2146 | break;
|
| 2147 | + case MPOL_RANDOM: |
| 2148 | + *nid = random_nodes(pol); |
| 2149 | + break; |
2088 | 2150 | }
|
2089 | 2151 |
|
2090 | 2152 | return nodemask;
|
@@ -2153,6 +2215,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
|
2153 | 2215 | case MPOL_BIND:
|
2154 | 2216 | case MPOL_INTERLEAVE:
|
2155 | 2217 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 2218 | + case MPOL_RANDOM: |
2156 | 2219 | *mask = mempolicy->nodes;
|
2157 | 2220 | break;
|
2158 | 2221 |
|
@@ -2633,6 +2696,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
|
2633 | 2696 | case MPOL_PREFERRED:
|
2634 | 2697 | case MPOL_PREFERRED_MANY:
|
2635 | 2698 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 2699 | + case MPOL_RANDOM: |
2636 | 2700 | return !!nodes_equal(a->nodes, b->nodes);
|
2637 | 2701 | case MPOL_LOCAL:
|
2638 | 2702 | return true;
|
@@ -2824,6 +2888,10 @@ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf,
|
2824 | 2888 | polnid = zonelist_node_idx(z);
|
2825 | 2889 | break;
|
2826 | 2890 |
|
| 2891 | + case MPOL_RANDOM: |
| 2892 | + polnid = random_nid(pol, vma, ilx); |
| 2893 | + break; |
| 2894 | + |
2827 | 2895 | default:
|
2828 | 2896 | BUG();
|
2829 | 2897 | }
|
@@ -3169,6 +3237,7 @@ static const char * const policy_modes[] =
|
3169 | 3237 | [MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave",
|
3170 | 3238 | [MPOL_LOCAL] = "local",
|
3171 | 3239 | [MPOL_PREFERRED_MANY] = "prefer (many)",
|
| 3240 | + [MPOL_RANDOM] = "random", |
3172 | 3241 | };
|
3173 | 3242 |
|
3174 | 3243 | /**
|
@@ -3231,6 +3300,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
|
3231 | 3300 | break;
|
3232 | 3301 | case MPOL_INTERLEAVE:
|
3233 | 3302 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 3303 | + case MPOL_RANDOM: |
3234 | 3304 | /*
|
3235 | 3305 | * Default to online nodes with memory if no nodelist
|
3236 | 3306 | */
|
@@ -3375,6 +3445,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
3375 | 3445 | case MPOL_BIND:
|
3376 | 3446 | case MPOL_INTERLEAVE:
|
3377 | 3447 | case MPOL_WEIGHTED_INTERLEAVE:
|
| 3448 | + case MPOL_RANDOM: |
3378 | 3449 | nodes = pol->nodes;
|
3379 | 3450 | break;
|
3380 | 3451 | default:
|
|
0 commit comments