Skip to content

Commit 4cf1b76

Browse files
Hu Taomstsirkin
Hu Tao
authored andcommitted
hostmem: add properties for NUMA memory policy
Signed-off-by: Hu Tao <[email protected]> [Raise errors on setting properties if !CONFIG_NUMA. Add BUILD_BUG_ON checks. - Paolo] Signed-off-by: Paolo Bonzini <[email protected]> Signed-off-by: Marcelo Tosatti <[email protected]> Signed-off-by: Hu Tao <[email protected]> Acked-by: Michael S. Tsirkin <[email protected]> Signed-off-by: Michael S. Tsirkin <[email protected]>
1 parent dbcb898 commit 4cf1b76

File tree

3 files changed

+159
-1
lines changed

3 files changed

+159
-1
lines changed

backends/hostmem.c

+135-1
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,21 @@
1010
* See the COPYING file in the top-level directory.
1111
*/
1212
#include "sysemu/hostmem.h"
13-
#include "sysemu/sysemu.h"
1413
#include "qapi/visitor.h"
14+
#include "qapi-types.h"
15+
#include "qapi-visit.h"
1516
#include "qapi/qmp/qerror.h"
1617
#include "qemu/config-file.h"
1718
#include "qom/object_interfaces.h"
1819

20+
#ifdef CONFIG_NUMA
21+
#include <numaif.h>
22+
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
23+
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
24+
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
25+
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
26+
#endif
27+
1928
static void
2029
host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque,
2130
const char *name, Error **errp)
@@ -53,6 +62,84 @@ host_memory_backend_set_size(Object *obj, Visitor *v, void *opaque,
5362
error_propagate(errp, local_err);
5463
}
5564

65+
static void
66+
host_memory_backend_get_host_nodes(Object *obj, Visitor *v, void *opaque,
67+
const char *name, Error **errp)
68+
{
69+
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
70+
uint16List *host_nodes = NULL;
71+
uint16List **node = &host_nodes;
72+
unsigned long value;
73+
74+
value = find_first_bit(backend->host_nodes, MAX_NODES);
75+
if (value == MAX_NODES) {
76+
return;
77+
}
78+
79+
*node = g_malloc0(sizeof(**node));
80+
(*node)->value = value;
81+
node = &(*node)->next;
82+
83+
do {
84+
value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
85+
if (value == MAX_NODES) {
86+
break;
87+
}
88+
89+
*node = g_malloc0(sizeof(**node));
90+
(*node)->value = value;
91+
node = &(*node)->next;
92+
} while (true);
93+
94+
visit_type_uint16List(v, &host_nodes, name, errp);
95+
}
96+
97+
static void
98+
host_memory_backend_set_host_nodes(Object *obj, Visitor *v, void *opaque,
99+
const char *name, Error **errp)
100+
{
101+
#ifdef CONFIG_NUMA
102+
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
103+
uint16List *l = NULL;
104+
105+
visit_type_uint16List(v, &l, name, errp);
106+
107+
while (l) {
108+
bitmap_set(backend->host_nodes, l->value, 1);
109+
l = l->next;
110+
}
111+
#else
112+
error_setg(errp, "NUMA node binding are not supported by this QEMU");
113+
#endif
114+
}
115+
116+
static void
117+
host_memory_backend_get_policy(Object *obj, Visitor *v, void *opaque,
118+
const char *name, Error **errp)
119+
{
120+
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
121+
int policy = backend->policy;
122+
123+
visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp);
124+
}
125+
126+
static void
127+
host_memory_backend_set_policy(Object *obj, Visitor *v, void *opaque,
128+
const char *name, Error **errp)
129+
{
130+
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
131+
int policy;
132+
133+
visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp);
134+
backend->policy = policy;
135+
136+
#ifndef CONFIG_NUMA
137+
if (policy != HOST_MEM_POLICY_DEFAULT) {
138+
error_setg(errp, "NUMA policies are not supported by this QEMU");
139+
}
140+
#endif
141+
}
142+
56143
static bool host_memory_backend_get_merge(Object *obj, Error **errp)
57144
{
58145
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@@ -162,6 +249,12 @@ static void host_memory_backend_init(Object *obj)
162249
object_property_add(obj, "size", "int",
163250
host_memory_backend_get_size,
164251
host_memory_backend_set_size, NULL, NULL, NULL);
252+
object_property_add(obj, "host-nodes", "int",
253+
host_memory_backend_get_host_nodes,
254+
host_memory_backend_set_host_nodes, NULL, NULL, NULL);
255+
object_property_add(obj, "policy", "str",
256+
host_memory_backend_get_policy,
257+
host_memory_backend_set_policy, NULL, NULL, NULL);
165258
}
166259

167260
static void host_memory_backend_finalize(Object *obj)
@@ -204,6 +297,47 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
204297
if (!backend->dump) {
205298
qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
206299
}
300+
#ifdef CONFIG_NUMA
301+
unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
302+
/* lastbit == MAX_NODES means maxnode = 0 */
303+
unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
304+
/* ensure policy won't be ignored in case memory is preallocated
305+
* before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
306+
* this doesn't catch hugepage case. */
307+
unsigned flags = MPOL_MF_STRICT;
308+
309+
/* check for invalid host-nodes and policies and give more verbose
310+
* error messages than mbind(). */
311+
if (maxnode && backend->policy == MPOL_DEFAULT) {
312+
error_setg(errp, "host-nodes must be empty for policy default,"
313+
" or you should explicitly specify a policy other"
314+
" than default");
315+
return;
316+
} else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
317+
error_setg(errp, "host-nodes must be set for policy %s",
318+
HostMemPolicy_lookup[backend->policy]);
319+
return;
320+
}
321+
322+
/* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
323+
* as argument to mbind() due to an old Linux bug (feature?) which
324+
* cuts off the last specified node. This means backend->host_nodes
325+
* must have MAX_NODES+1 bits available.
326+
*/
327+
assert(sizeof(backend->host_nodes) >=
328+
BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
329+
assert(maxnode <= MAX_NODES);
330+
if (mbind(ptr, sz, backend->policy,
331+
maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
332+
error_setg_errno(errp, errno,
333+
"cannot bind memory to host NUMA nodes");
334+
return;
335+
}
336+
#endif
337+
/* Preallocate memory after the NUMA policy has been instantiated.
338+
* This is necessary to guarantee memory is allocated with
339+
* specified NUMA policy in place.
340+
*/
207341
if (backend->prealloc) {
208342
os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
209343
}

include/sysemu/hostmem.h

+4
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
#ifndef QEMU_RAM_H
1313
#define QEMU_RAM_H
1414

15+
#include "sysemu/sysemu.h" /* for MAX_NODES */
1516
#include "qom/object.h"
1617
#include "qapi/error.h"
1718
#include "exec/memory.h"
1819
#include "qemu/option.h"
20+
#include "qemu/bitmap.h"
1921

2022
#define TYPE_MEMORY_BACKEND "memory-backend"
2123
#define MEMORY_BACKEND(obj) \
@@ -54,6 +56,8 @@ struct HostMemoryBackend {
5456
uint64_t size;
5557
bool merge, dump;
5658
bool prealloc, force_prealloc;
59+
DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
60+
HostMemPolicy policy;
5761

5862
MemoryRegion mr;
5963
};

qapi-schema.json

+20
Original file line numberDiff line numberDiff line change
@@ -3134,3 +3134,23 @@
31343134
'*cpus': ['uint16'],
31353135
'*mem': 'size',
31363136
'*memdev': 'str' }}
3137+
3138+
##
3139+
# @HostMemPolicy
3140+
#
3141+
# Host memory policy types
3142+
#
3143+
# @default: restore default policy, remove any nondefault policy
3144+
#
3145+
# @preferred: set the preferred host nodes for allocation
3146+
#
3147+
# @bind: a strict policy that restricts memory allocation to the
3148+
# host nodes specified
3149+
#
3150+
# @interleave: memory allocations are interleaved across the set
3151+
# of host nodes specified
3152+
#
3153+
# Since 2.1
3154+
##
3155+
{ 'enum': 'HostMemPolicy',
3156+
'data': [ 'default', 'preferred', 'bind', 'interleave' ] }

0 commit comments

Comments
 (0)