|
10 | 10 | * See the COPYING file in the top-level directory.
|
11 | 11 | */
|
12 | 12 | #include "sysemu/hostmem.h"
|
13 |
| -#include "sysemu/sysemu.h" |
14 | 13 | #include "qapi/visitor.h"
|
| 14 | +#include "qapi-types.h" |
| 15 | +#include "qapi-visit.h" |
15 | 16 | #include "qapi/qmp/qerror.h"
|
16 | 17 | #include "qemu/config-file.h"
|
17 | 18 | #include "qom/object_interfaces.h"
|
18 | 19 |
|
| 20 | +#ifdef CONFIG_NUMA |
| 21 | +#include <numaif.h> |
| 22 | +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); |
| 23 | +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); |
| 24 | +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); |
| 25 | +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); |
| 26 | +#endif |
| 27 | + |
19 | 28 | static void
|
20 | 29 | host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque,
|
21 | 30 | const char *name, Error **errp)
|
@@ -53,6 +62,84 @@ host_memory_backend_set_size(Object *obj, Visitor *v, void *opaque,
|
53 | 62 | error_propagate(errp, local_err);
|
54 | 63 | }
|
55 | 64 |
|
| 65 | +static void |
| 66 | +host_memory_backend_get_host_nodes(Object *obj, Visitor *v, void *opaque, |
| 67 | + const char *name, Error **errp) |
| 68 | +{ |
| 69 | + HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 70 | + uint16List *host_nodes = NULL; |
| 71 | + uint16List **node = &host_nodes; |
| 72 | + unsigned long value; |
| 73 | + |
| 74 | + value = find_first_bit(backend->host_nodes, MAX_NODES); |
| 75 | + if (value == MAX_NODES) { |
| 76 | + return; |
| 77 | + } |
| 78 | + |
| 79 | + *node = g_malloc0(sizeof(**node)); |
| 80 | + (*node)->value = value; |
| 81 | + node = &(*node)->next; |
| 82 | + |
| 83 | + do { |
| 84 | + value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); |
| 85 | + if (value == MAX_NODES) { |
| 86 | + break; |
| 87 | + } |
| 88 | + |
| 89 | + *node = g_malloc0(sizeof(**node)); |
| 90 | + (*node)->value = value; |
| 91 | + node = &(*node)->next; |
| 92 | + } while (true); |
| 93 | + |
| 94 | + visit_type_uint16List(v, &host_nodes, name, errp); |
| 95 | +} |
| 96 | + |
| 97 | +static void |
| 98 | +host_memory_backend_set_host_nodes(Object *obj, Visitor *v, void *opaque, |
| 99 | + const char *name, Error **errp) |
| 100 | +{ |
| 101 | +#ifdef CONFIG_NUMA |
| 102 | + HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 103 | + uint16List *l = NULL; |
| 104 | + |
| 105 | + visit_type_uint16List(v, &l, name, errp); |
| 106 | + |
| 107 | + while (l) { |
| 108 | + bitmap_set(backend->host_nodes, l->value, 1); |
| 109 | + l = l->next; |
| 110 | + } |
| 111 | +#else |
| 112 | + error_setg(errp, "NUMA node binding are not supported by this QEMU"); |
| 113 | +#endif |
| 114 | +} |
| 115 | + |
| 116 | +static void |
| 117 | +host_memory_backend_get_policy(Object *obj, Visitor *v, void *opaque, |
| 118 | + const char *name, Error **errp) |
| 119 | +{ |
| 120 | + HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 121 | + int policy = backend->policy; |
| 122 | + |
| 123 | + visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); |
| 124 | +} |
| 125 | + |
| 126 | +static void |
| 127 | +host_memory_backend_set_policy(Object *obj, Visitor *v, void *opaque, |
| 128 | + const char *name, Error **errp) |
| 129 | +{ |
| 130 | + HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 131 | + int policy; |
| 132 | + |
| 133 | + visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); |
| 134 | + backend->policy = policy; |
| 135 | + |
| 136 | +#ifndef CONFIG_NUMA |
| 137 | + if (policy != HOST_MEM_POLICY_DEFAULT) { |
| 138 | + error_setg(errp, "NUMA policies are not supported by this QEMU"); |
| 139 | + } |
| 140 | +#endif |
| 141 | +} |
| 142 | + |
56 | 143 | static bool host_memory_backend_get_merge(Object *obj, Error **errp)
|
57 | 144 | {
|
58 | 145 | HostMemoryBackend *backend = MEMORY_BACKEND(obj);
|
@@ -162,6 +249,12 @@ static void host_memory_backend_init(Object *obj)
|
162 | 249 | object_property_add(obj, "size", "int",
|
163 | 250 | host_memory_backend_get_size,
|
164 | 251 | host_memory_backend_set_size, NULL, NULL, NULL);
|
| 252 | + object_property_add(obj, "host-nodes", "int", |
| 253 | + host_memory_backend_get_host_nodes, |
| 254 | + host_memory_backend_set_host_nodes, NULL, NULL, NULL); |
| 255 | + object_property_add(obj, "policy", "str", |
| 256 | + host_memory_backend_get_policy, |
| 257 | + host_memory_backend_set_policy, NULL, NULL, NULL); |
165 | 258 | }
|
166 | 259 |
|
167 | 260 | static void host_memory_backend_finalize(Object *obj)
|
@@ -204,6 +297,47 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
|
204 | 297 | if (!backend->dump) {
|
205 | 298 | qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
|
206 | 299 | }
|
| 300 | +#ifdef CONFIG_NUMA |
| 301 | + unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); |
| 302 | + /* lastbit == MAX_NODES means maxnode = 0 */ |
| 303 | + unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); |
| 304 | + /* ensure policy won't be ignored in case memory is preallocated |
| 305 | + * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so |
| 306 | + * this doesn't catch hugepage case. */ |
| 307 | + unsigned flags = MPOL_MF_STRICT; |
| 308 | + |
| 309 | + /* check for invalid host-nodes and policies and give more verbose |
| 310 | + * error messages than mbind(). */ |
| 311 | + if (maxnode && backend->policy == MPOL_DEFAULT) { |
| 312 | + error_setg(errp, "host-nodes must be empty for policy default," |
| 313 | + " or you should explicitly specify a policy other" |
| 314 | + " than default"); |
| 315 | + return; |
| 316 | + } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { |
| 317 | + error_setg(errp, "host-nodes must be set for policy %s", |
| 318 | + HostMemPolicy_lookup[backend->policy]); |
| 319 | + return; |
| 320 | + } |
| 321 | + |
| 322 | + /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 |
| 323 | + * as argument to mbind() due to an old Linux bug (feature?) which |
| 324 | + * cuts off the last specified node. This means backend->host_nodes |
| 325 | + * must have MAX_NODES+1 bits available. |
| 326 | + */ |
| 327 | + assert(sizeof(backend->host_nodes) >= |
| 328 | + BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); |
| 329 | + assert(maxnode <= MAX_NODES); |
| 330 | + if (mbind(ptr, sz, backend->policy, |
| 331 | + maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { |
| 332 | + error_setg_errno(errp, errno, |
| 333 | + "cannot bind memory to host NUMA nodes"); |
| 334 | + return; |
| 335 | + } |
| 336 | +#endif |
| 337 | + /* Preallocate memory after the NUMA policy has been instantiated. |
| 338 | + * This is necessary to guarantee memory is allocated with |
| 339 | + * specified NUMA policy in place. |
| 340 | + */ |
207 | 341 | if (backend->prealloc) {
|
208 | 342 | os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
|
209 | 343 | }
|
|
0 commit comments