* [PATCH v6 5/7] xen/mm: Introduce NUMA-aware memory claim sets
2026-04-14 13:15 [PATCH v6 0/7] xen/mm: Introduce NUMA-aware claim sets for domains Bernhard Kaindl
` (3 preceding siblings ...)
2026-04-14 13:22 ` [PATCH v6 4/7] xen/mm: Split outstanding claims into global and node totals Bernhard Kaindl
@ 2026-04-14 13:22 ` Bernhard Kaindl
2026-04-14 13:22 ` [PATCH v6 6/7] tools/ocaml/libs/xc: Add an OCaml binding for NUMA-aware claims Bernhard Kaindl
2026-04-14 13:22 ` [PATCH v6 7/7] tools/tests/mem-claim: Add a test suite for the memory claim API Bernhard Kaindl
6 siblings, 0 replies; 15+ messages in thread
From: Bernhard Kaindl @ 2026-04-14 13:22 UTC (permalink / raw)
To: xen-devel
Cc: Bernhard Kaindl, Anthony PERARD, Juergen Gross, Andrew Cooper,
Michal Orzel, Jan Beulich, Julien Grall, Roger Pau Monné,
Stefano Stabellini, Daniel P. Smith, Marcus Granado,
Alejandro Vallejo
Extend Xen's memory claim handling to support claim sets spanning
multiple NUMA nodes.
Earlier review raised the concern that claims would need to evolve from a
single claim into a multi-node model. Roger Pau Monné described the core
requirement well:
> Ideally, we would need to introduce a new hypercall that allows
> making claims from multiple nodes in a single locked region,
> as to ensure success or failure in an atomic way.
-- Roger Pau Monné
Implement that model.
A claim set can contain multiple node-specific claims and an optional
global claim for memory that may come from any NUMA node. The new domctl
installs the full claim set atomically, and the allocator is updated so
that claim checks and claim consumption follow the new semantics.
This adds:
1. Installing multi-node claim sets atomically,
2. protecting claimed pages from other claim requests and allocations,
3. redeeming claims for allocations with claims,
4. while keeping the legacy claim interface without functional changes.
Suggested-by: Jan Beulich <jbeulich@suse.com>
Suggested-by: Marcus Granado <marcus.granado@citrix.com>
Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Bernhard Kaindl <bernhard.kaindl@citrix.com>
---
The v2 design document submitted ahead of this series may help with review.
It explains the background, design rationale, and implementation details.
Rendered version:
https://bernhard-xen.readthedocs.io/en/claim-sets-v2-design/designs/claims
Many thanks to everyone who contributed to the earlier work and review,
especially Alejandro Vallejo, Jan Beulich, Andrew Cooper, Roger Pau
Monné, Marcus Granado, and Edwin Török.
Thanks,
Bernhard
---
tools/include/xenctrl.h | 4 +
tools/libs/ctrl/xc_domain.c | 38 ++++
xen/common/domctl.c | 54 ++++++
xen/common/page_alloc.c | 277 +++++++++++++++++++++++++++-
xen/include/public/domctl.h | 32 ++++
xen/include/public/memory.h | 9 +
xen/include/xen/mm.h | 3 +
xen/include/xen/sched.h | 6 +
xen/xsm/flask/hooks.c | 1 +
xen/xsm/flask/policy/access_vectors | 1 +
10 files changed, 420 insertions(+), 5 deletions(-)
diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index d5dbf69c8968..a0a9f2143b32 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -2659,6 +2659,10 @@ int xc_domain_set_llc_colors(xc_interface *xch, uint32_t domid,
const uint32_t *llc_colors,
uint32_t num_llc_colors);
+int xc_domain_claim_memory(xc_interface *xch, uint32_t domid,
+ uint32_t nr_claims,
+ memory_claim_t *claims);
+
#if defined(__arm__) || defined(__aarch64__)
int xc_dt_overlay(xc_interface *xch, void *overlay_fdt,
uint32_t overlay_fdt_size, uint8_t overlay_op);
diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
index 01c0669c8863..32cd91187d44 100644
--- a/tools/libs/ctrl/xc_domain.c
+++ b/tools/libs/ctrl/xc_domain.c
@@ -1070,6 +1070,44 @@ int xc_domain_remove_from_physmap(xc_interface *xch,
return xc_memory_op(xch, XENMEM_remove_from_physmap, &xrfp, sizeof(xrfp));
}
+/* Claim the guest memory for a domain before starting the domain build */
+int xc_domain_claim_memory(xc_interface *xch,
+ uint32_t domid,
+ uint32_t nr_claims,
+ memory_claim_t *claims)
+{
+ struct xen_domctl domctl = {};
+ DECLARE_HYPERCALL_BOUNCE(claims, sizeof(*claims) * nr_claims,
+ XC_HYPERCALL_BUFFER_BOUNCE_IN);
+ int ret;
+
+ if ( xc_hypercall_bounce_pre(xch, claims) )
+ return -1;
+
+ domctl.cmd = XEN_DOMCTL_claim_memory;
+ domctl.domain = domid;
+ domctl.u.claim_memory.nr_claims = nr_claims;
+ set_xen_guest_handle(domctl.u.claim_memory.claims, claims);
+
+ ret = do_domctl(xch, &domctl);
+
+ xc_hypercall_bounce_post(xch, claims);
+
+ return ret;
+}
+
+/*
+ * Legacy API for claiming pages, replaced by xc_domain_claim_memory()
+ *
+ * Note: This hypercall is deprecated by xc_domain_claim_memory()
+ * which provides the same claim semantics described above, and thus can be
+ * used as drop-in replacement and is extended for NUMA-node-specific claims.
+ * This hypercall should not be used by new code.
+ *
+ * See the following documentation pages for more information:
+ * docs/guest-guide/dom/DOMCTL_claim_memory.rst
+ * docs/guest-guide/mem/XENMEM_claim_pages.rst
+ */
int xc_domain_claim_pages(xc_interface *xch,
uint32_t domid,
unsigned long nr_pages)
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 2cc5a4ff32fd..16d239c75bde 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -51,6 +51,51 @@ static int xenctl_bitmap_to_nodemask(nodemask_t *nodemask,
MAX_NUMNODES);
}
+/* Claim memory for a domain (or if a claim exists, release the claim) */
+static int claim_memory(struct domain *d,
+ const struct xen_domctl_claim_memory *uinfo)
+{
+ memory_claim_t *claims;
+ int rc = -EFAULT;
+
+ /* alloc_color_heap_page() does not handle claims, reject LLC coloring. */
+ if ( llc_coloring_enabled )
+ return -EOPNOTSUPP;
+
+ if ( !uinfo->nr_claims || uinfo->pad )
+ return -EINVAL;
+
+ /* Only calls for the supported number of nodes + a global claim can pass */
+ if ( uinfo->nr_claims > MAX_NUMNODES + 1 )
+ return -E2BIG;
+
+ /*
+ * Under domctl_lock, domain_kill() sets d->is_dying and retires claims.
+ * If it set, this is in the past and we should reject the claim request.
+ */
+ if ( d->is_dying )
+ return -ESRCH;
+
+ claims = xmalloc_array(memory_claim_t, uinfo->nr_claims);
+ if ( claims == NULL )
+ return -ENOMEM;
+
+ if ( copy_from_guest(claims, uinfo->claims, uinfo->nr_claims) )
+ goto out;
+
+ rc = -EINVAL; /* Default error code for invalid claim args */
+ if ( claims[0].target == XEN_DOMCTL_CLAIM_MEMORY_LEGACY &&
+ uinfo->nr_claims == 1 )
+ /* Implement installing a legacy claim for backwards compatibility */
+ rc = domain_set_outstanding_pages(d, claims[0].pages);
+ else
+ /* domain_install_claim_set() performs validation of the claim set */
+ rc = domain_install_claim_set(d, uinfo->nr_claims, claims);
+ out:
+ xfree(claims);
+ return rc;
+}
+
void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
{
struct vcpu *v;
@@ -865,6 +910,15 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
copyback = true;
break;
+ case XEN_DOMCTL_claim_memory:
+ /* Use the same XSM hook as XENMEM_claim_pages */
+ ret = xsm_claim_pages(XSM_PRIV, d);
+ if ( ret )
+ break;
+
+ ret = claim_memory(d, &op->u.claim_memory);
+ break;
+
default:
ret = arch_do_domctl(op, d, u_domctl);
break;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index ee4942f93373..6101bd6be9a9 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -502,6 +502,9 @@ static long node_avail_pages[MAX_NUMNODES];
static DEFINE_SPINLOCK(heap_lock);
static long outstanding_claims; /* total outstanding claims by all domains */
+/* Sum of the outstanding claims of all domains on that node. */
+static long node_outstanding_claims[MAX_NUMNODES];
+
static unsigned long avail_heap_pages(
unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
{
@@ -544,6 +547,164 @@ static unsigned long deduct_global_claims(struct domain *d,
return reduction;
}
+/* Release outstanding claims on a specific node */
+static unsigned long deduct_node_claims(struct domain *d, nodeid_t node,
+ unsigned long release)
+{
+ unsigned long consumed = 0;
+
+ /* If the allocation was larger than the claims, do not release beyond it */
+ if ( d->claims[node] ) /* Release the claims for this node */
+ {
+ /* Use min_t for clarity to make the comparison type explicit */
+ consumed = min_t(unsigned long, release, d->claims[node]);
+ d->claims[node] -= consumed;
+
+ ASSERT(consumed <= outstanding_claims);
+ outstanding_claims -= consumed;
+
+ ASSERT(consumed <= node_outstanding_claims[node]);
+ node_outstanding_claims[node] -= consumed;
+
+ ASSERT(consumed <= d->node_claims);
+ d->node_claims -= consumed;
+ }
+ return consumed;
+}
+
+/* Release all outstanding claims on all online nodes */
+static void cancel_all_node_claims(struct domain *d)
+{
+ nodeid_t node;
+
+ for_each_online_node ( node )
+ {
+ if ( !d->node_claims )
+ return;
+ deduct_node_claims(d, node, d->claims[node]);
+ }
+}
+
+/* Validate a claim set and install it if valid. */
+int domain_install_claim_set(struct domain *d, unsigned int nr_claims,
+ memory_claim_t *claim_set)
+{
+ nodemask_t nodes;
+ uint32_t target;
+ uint64_t pages, tot_pages, max_pages, node_requests = 0, global_request = 0;
+ bool global_target_seen = false;
+ int ret = -EINVAL;
+
+ nrspin_lock(&d->page_alloc_lock);
+ spin_lock(&heap_lock);
+
+ /*
+ * Phase 1: Check and process the individual claim entries.
+ * Result: Extracted global claims and the sum of the node-local claims.
+ */
+ nodes_clear(nodes);
+ for ( int i = 0; i < nr_claims; i++ )
+ {
+ target = claim_set[i].target;
+ pages = claim_set[i].pages;
+ if ( claim_set[i].pad ) /* Check before handling global claims */
+ goto out;
+
+ if ( target == XEN_DOMCTL_CLAIM_MEMORY_GLOBAL )
+ {
+ if ( global_target_seen ) /* Only one global target allowed */
+ goto out;
+
+ global_request = pages; /* Will be checked with the total claims */
+ global_target_seen = true;
+ continue;
+ }
+
+ if ( target >= MAX_NUMNODES || !node_online(target) )
+ {
+ ret = -ENOENT; /* Node not valid or not online */
+ goto out;
+ }
+ if ( nodemask_test(target, &nodes) || /* Duplicate node in claim set */
+ /* Check for overflow before we add the pages to node_requests */
+ pages > ((typeof(d->node_claims))-1) - node_requests )
+ goto out;
+ node_set(target, nodes);
+
+ ASSERT(node_outstanding_claims[target] >= 0 &&
+ node_avail_pages[target] >= node_outstanding_claims[target]);
+ if ( pages > (uint64_t)(node_avail_pages[target] -
+ node_outstanding_claims[target]) +
+ d->claims[target] ) /* current claim deduction */
+ {
+ ret = -ENOMEM; /* Claim exceeds available memory on node */
+ goto out;
+ }
+ node_requests += pages; /* Will be checked with the total claims */
+ }
+
+ max_pages = d->max_pages;
+ tot_pages = domain_tot_pages(d);
+ /*
+ * Check that the total of claims and current pages fits in d->max_pages.
+ * Subtract sequentially from max_pages instead of adding the claims:
+ * each subtraction is safe because the prior comparison guarantees
+ * the subtrahend doesn't exceed the remaining budget. This avoids
+ * overflow regardless of the type widths of the claims and max_pages,
+ * as long as the claims are non-negative (which they are, as unsigned)
+ * and max_pages is not negative, which we check as well here.
+ */
+ if ( global_request > max_pages ||
+ node_requests > max_pages - global_request ||
+ tot_pages > max_pages - global_request - node_requests )
+ goto out; /* claims + domain_tot_pages exceeds max_pages: EINVAL */
+
+ /* States the invariants we are committed to for the next check */
+ ASSERT(outstanding_claims >= 0 && total_avail_pages >= outstanding_claims);
+ /*
+ * Ensure swapping claims fits into the global unclaimed pages:
+ *
+ * Compare the new claims (global_request + node_requests) against the
+ * available unclaimed memory (total_avail_pages - outstanding_claims)
+ * plus the domain's current claims (d->global_claims + d->node_claims)
+ * because they will be deducted when installing the new claim state.
+ */
+ if ( global_request + node_requests >
+ (uint64_t)(total_avail_pages - outstanding_claims) + d->global_claims +
+ d->node_claims )
+ {
+ ret = -ENOMEM;
+ goto out; /* Claim update exceeds available unclaimed memory: ENOMEM */
+ }
+
+ /* Phase 3: Remove the old claims before installing the new claims */
+ deduct_global_claims(d, d->global_claims);
+ cancel_all_node_claims(d);
+
+ /* Phase 4: Install new claims: Global claims and overall node claims */
+ outstanding_claims += global_request + node_requests;
+ d->global_claims = global_request;
+ d->node_claims = node_requests;
+
+ /* Phase 5: Install the individual node claims */
+ for ( int i = 0; i < nr_claims; i++ )
+ {
+ target = claim_set[i].target;
+ pages = claim_set[i].pages;
+ if ( target >= MAX_NUMNODES ) /* Skip non-node claim targets */
+ continue;
+
+ /* Install the node claim and update the node's total claims claims. */
+ d->claims[target] = pages;
+ node_outstanding_claims[target] += pages;
+ }
+ ret = 0;
+out:
+ spin_unlock(&heap_lock);
+ nrspin_unlock(&d->page_alloc_lock);
+ return ret;
+}
+
int domain_set_outstanding_pages(struct domain *d, unsigned long pages)
{
int ret = -ENOMEM;
@@ -560,6 +721,7 @@ int domain_set_outstanding_pages(struct domain *d, unsigned long pages)
/* pages==0 means "unset" the claim. */
if ( pages == 0 )
{
+ cancel_all_node_claims(d);
deduct_global_claims(d, d->global_claims);
ret = 0;
goto out;
@@ -603,6 +765,85 @@ out:
return ret;
}
+/*
+ * Redeem the claims to cover a successful allocation.
+ *
+ * The claims for an allocation are redeemed in order of specificity:
+ *
+ * 1.) Firstly, the claim for the node the allocation was from (if any),
+ * 2.) Secondly, followed by global claims as a fallback (if any), and
+ * 3.) Finally, other remaining node claims from other nodes (if any).
+ *
+ * This order ensures that the most specific claims are redeemed first,
+ * and that the global claim is only used as a fallback if the node-specific
+ * claim is insufficient to cover the allocation or non-existant in case
+ * the domain doesn't have a claim on the alloc_node. This preserves the
+ * intent of the claims system to allow domains to reserve memory on
+ * specific nodes and only use global claims as a fallback when necessary.
+ *
+ * Finally, if the domain doesn't have sufficient claims on the alloc_node
+ * and the global fallback claim doesn't cover the remains of the allocation,
+ * but as the domain still has other claims, redeem the remaining portion
+ * of the allocation from those claims.
+ *
+ * This final step ensures that the domain's total claims (global + node
+ * claims) are always redeemed to cover the allocation, which in turn
+ * prevents domain_tot_pages(d) + d->global_claims + d->node_claims
+ * from exceeding the domain's d->max_pages limit.
+ *
+ * If this final step was not taken, then it would be possible for a domain to
+ * block off more than its share of system memory as defined by d->max_pages.
+ *
+ * Note: We can't reliably check d->max_pages here, at least not under the
+ * d->page_alloc_lock as taking it here would violate the locking order,
+ * but we can ensure that the domain's claims are always redeemed to cover
+ * the allocation, which is sufficient to preserve the intent of d->max_pages
+ * cap the domain's memory usage and claims to d->max_pages. While we could
+ * add checking it without lock as a best effort check, a domain builder is
+ * not supposed to claim memory on one set of nodes and then allocate from
+ * another set of nodes, so it would not be meaningful to add such a check,
+ * and the final check covers our bases to handle such misbehaviour anyway.
+ */
+static void redeem_claims_for_allocation(struct domain *d,
+ unsigned long pages_to_redeem,
+ nodeid_t alloc_node)
+{
+ ASSERT(spin_is_locked(&heap_lock));
+
+ /* 1.) Redeem the allocation from the alloc_node's claim insofar possible */
+ if ( d->claims[alloc_node] )
+ {
+ pages_to_redeem -= deduct_node_claims(d, alloc_node, pages_to_redeem);
+ if ( pages_to_redeem == 0 )
+ return; /* The allocation is fully consumed by alloc_node claim */
+ }
+
+ /* 2.) Redeem the unredeemed allocation from the global / fallback claims */
+ pages_to_redeem -= deduct_global_claims(d, pages_to_redeem);
+
+ if ( pages_to_redeem == 0 || !d->node_claims )
+ return; /* Allocation fully consumed or the domain has no claims left */
+
+ /*
+ * 3.) Finally, if the domain doesn't have sufficient claims on alloc_node
+ * and the global fallback claim didn't cover the remains of the allocation,
+ * consume the remaining portion of the allocation from the domain's other
+ * node claims. See the function comment for the details behind this step.
+ */
+ pages_to_redeem = min(pages_to_redeem, d->node_claims + 0UL);
+ if ( pages_to_redeem )
+ {
+ nodeid_t node;
+
+ for_each_online_node ( node )
+ {
+ pages_to_redeem -= deduct_node_claims(d, node, pages_to_redeem);
+ if ( pages_to_redeem == 0 )
+ break;
+ }
+ }
+}
+
#ifdef CONFIG_SYSCTL
void get_outstanding_claims(uint64_t *free_pages, uint64_t *outstanding_pages)
{
@@ -888,7 +1129,7 @@ static bool claims_permit_request(const struct domain *d,
unsigned long avail_pages,
unsigned long competing_claims,
unsigned int memflags,
- unsigned long requested_pages)
+ unsigned long requested_pages, nodeid_t node)
{
unsigned long unclaimed_pages, applicable_claims;
@@ -915,6 +1156,24 @@ static bool claims_permit_request(const struct domain *d,
* Allow the request to proceed when combination of unclaimed pages and the
* claims held by the domain cover the shortfall for the requested_pages.
*/
+ if ( node == NUMA_NO_NODE )
+ /*
+ * We are checking global claims: For the global claims check,
+ * we apply the domain's global claims and node claims: The domain
+ * can use any of its claims to allow the global check to proceed.
+ *
+ * get_free_buddy() will repeat this check using a node which
+ * will check if either the node has enough unclaimed pages
+ * or the domain enough claims for the node (see below).
+ */
+ applicable_claims = d->node_claims + d->global_claims;
+ else
+ /*
+ * We are checking node-specific claims: For the node-specific claims
+ * check, we may only apply the domain's claim on that specific node.
+ */
+ applicable_claims = d->claims[node];
+
return requested_pages <= unclaimed_pages + applicable_claims;
}
@@ -967,9 +1226,15 @@ static struct page_info *get_free_buddy(unsigned int zone_lo,
*/
for ( ; ; )
{
+ /* Ensure the target node and the claims permit this allocation */
+ if ( !claims_permit_request(d, node_avail_pages[node],
+ node_outstanding_claims[node], memflags,
+ 1UL << order, node) )
+ goto try_next_node;
+
zone = zone_hi;
do {
- /* Check if target node can support the allocation. */
+ /* Check if this target zone on node can support the allocation. */
if ( !avail[node] || (avail[node][zone] < (1UL << order)) )
continue;
@@ -996,6 +1261,8 @@ static struct page_info *get_free_buddy(unsigned int zone_lo,
}
} while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
+ try_next_node:
+ /* If MEMF_exact_node was passed, we may not skip to a different node */
if ( (memflags & MEMF_exact_node) && req_node != NUMA_NO_NODE )
return NULL;
@@ -1063,7 +1330,7 @@ static struct page_info *alloc_heap_pages(
* is made by a domain with sufficient unclaimed pages.
*/
if ( !claims_permit_request(d, total_avail_pages, outstanding_claims,
- memflags, request) )
+ memflags, request, NUMA_NO_NODE) )
{
spin_unlock(&heap_lock);
return NULL;
@@ -1113,7 +1380,7 @@ static struct page_info *alloc_heap_pages(
total_avail_pages -= request;
ASSERT(total_avail_pages >= 0);
- if ( d && d->global_claims && !(memflags & MEMF_no_refcount) )
+ if ( d && !(memflags & MEMF_no_refcount) )
{
/*
* Adjust claims in the same locked region where total_avail_pages is
@@ -1128,7 +1395,7 @@ static struct page_info *alloc_heap_pages(
* the domain being destroyed before creation is finished. Losing part
* of the claim makes no difference.
*/
- deduct_global_claims(d, request);
+ redeem_claims_for_allocation(d, request, node);
}
check_low_mem_virq();
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 8f6708c0a7cd..a4f08248e6ee 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1276,6 +1276,36 @@ struct xen_domctl_get_domain_state {
uint64_t unique_id; /* Unique domain identifier. */
};
+struct xen_memory_claim {
+ uint64_aligned_t pages; /* Number of pages to claim */
+ uint32_t target; /* Node and/or claim type like legacy or a global claim */
+ uint32_t pad; /* Explicit padding: Reserved, initialize to 0 on input */
+};
+typedef struct xen_memory_claim memory_claim_t;
+DEFINE_XEN_GUEST_HANDLE(memory_claim_t);
+
+/* Special claim targets for the target field of memory_claim_t */
+#define XEN_DOMCTL_CLAIM_MEMORY_LEGACY 0x40000000U /* No node, legacy claim */
+#define XEN_DOMCTL_CLAIM_MEMORY_GLOBAL 0x80000000U /* No node, global claim */
+
+/*
+ * XEN_DOMCTL_claim_memory
+ *
+ * Claim memory for a guest domain. It sets aside an amount of memory
+ * (it is not pre-allocated) for the purpose of satisfying future memory
+ * allocation requests for building the guest's physical address space.
+ * Memory can be claimed on a set of nodes, or globally (without node
+ * affinity), or with legacy behavior. The set of claims is a claim set. See
+ * docs/guest-guide/dom/DOMCTL_claim_memory.rsti and docs/designs/claims/.
+ */
+struct xen_domctl_claim_memory {
+ /* IN: Array of struct xen_memory_claim */
+ XEN_GUEST_HANDLE_64(memory_claim_t) claims;
+ /* IN: Number of claims in the claims array handle. */
+ uint32_t nr_claims;
+ uint32_t pad; /* Explicit padding: Reserved, initialize to 0 on input */
+};
+
struct xen_domctl {
/* Stable domctl ops: interface_version is required to be 0. */
uint32_t cmd;
@@ -1368,6 +1398,7 @@ struct xen_domctl {
#define XEN_DOMCTL_gsi_permission 88
#define XEN_DOMCTL_set_llc_colors 89
#define XEN_DOMCTL_get_domain_state 90 /* stable interface */
+#define XEN_DOMCTL_claim_memory 91
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -1436,6 +1467,7 @@ struct xen_domctl {
#endif
struct xen_domctl_set_llc_colors set_llc_colors;
struct xen_domctl_get_domain_state get_domain_state;
+ struct xen_domctl_claim_memory claim_memory;
uint8_t pad[128];
} u;
};
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index bd9fc37b5297..ccec095f8660 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -569,6 +569,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
* for 10, only 7 additional pages are claimed.
*
* Caller must be privileged or the hypercall fails.
+ *
+ * Note: This hypercall is deprecated by XEN_DOMCTL_claim_memory
+ * which provides the same claim semantics described above, and thus can be
+ * used as drop-in replacement and is extended for NUMA-node-specific claims.
+ * This hypercall should not be used by new code.
+ *
+ * See the following documentation pages for more information:
+ * docs/guest-guide/dom/DOMCTL_claim_memory.rst
+ * docs/guest-guide/mem/XENMEM_claim_pages.rst
*/
#define XENMEM_claim_pages 24
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index b80bec00c124..b2d6a6f266ba 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -69,6 +69,7 @@
#include <xen/list.h>
#include <xen/spinlock.h>
#include <xen/perfc.h>
+#include <public/domctl.h>
#include <public/memory.h>
struct page_info;
@@ -131,6 +132,8 @@ int populate_pt_range(unsigned long virt, unsigned long nr_mfns);
/* Claim handling */
unsigned long __must_check domain_adjust_tot_pages(struct domain *d,
long pages);
+int domain_install_claim_set(struct domain *d, unsigned int nr_claims,
+ memory_claim_t *claims);
int domain_set_outstanding_pages(struct domain *d, unsigned long pages);
void get_outstanding_claims(uint64_t *free_pages, uint64_t *outstanding_pages);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 2995c99aa34a..d31f4546e3b0 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -417,6 +417,12 @@ struct domain
/* All claims are protected by the heap_lock */
unsigned int global_claims; /* Global Host-level claims */
unsigned int node_claims; /* Sum of the Node-specific claims */
+ /*
+ * struct domain is allocated in a dedicated page with more than enough
+ * spare capacity. It's efficient to use that page to store the
+ * per-node claims in it instead allocating a separate array for it.
+ */
+ unsigned int claims[MAX_NUMNODES]; /* Array of per-NUMA-node claims */
unsigned int max_pages; /* maximum value for domain_tot_pages() */
unsigned int extra_pages; /* pages not included in domain_tot_pages() */
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 28522dcbd271..9e999ac1ba73 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -651,6 +651,7 @@ static int cf_check flask_domctl(struct domain *d, unsigned int cmd,
/* These have individual XSM hooks (common/domctl.c) */
case XEN_DOMCTL_getdomaininfo:
+ case XEN_DOMCTL_claim_memory:
case XEN_DOMCTL_scheduler_op:
case XEN_DOMCTL_irq_permission:
case XEN_DOMCTL_iomem_permission:
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index bbb9c117ec4a..e7defa117a16 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -216,6 +216,7 @@ class domain2
# XEN_DOMCTL_scheduler_op with XEN_DOMCTL_SCHEDOP_putinfo
setscheduler
# XENMEM_claim_pages
+# XEN_DOMCTL_claim_memory
setclaim
# XEN_DOMCTL_cacheflush
cacheflush
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH v6 7/7] tools/tests/mem-claim: Add a test suite for the memory claim API
2026-04-14 13:15 [PATCH v6 0/7] xen/mm: Introduce NUMA-aware claim sets for domains Bernhard Kaindl
` (5 preceding siblings ...)
2026-04-14 13:22 ` [PATCH v6 6/7] tools/ocaml/libs/xc: Add an OCaml binding for NUMA-aware claims Bernhard Kaindl
@ 2026-04-14 13:22 ` Bernhard Kaindl
6 siblings, 0 replies; 15+ messages in thread
From: Bernhard Kaindl @ 2026-04-14 13:22 UTC (permalink / raw)
To: xen-devel; +Cc: Bernhard Kaindl, Anthony PERARD
Add a comprehensive test suite for the xc_domain_claim_memory() API
that covers argument validation and the protection semantics of
memory claims across domains and NUMA nodes.
The suite verifies claim behaviour both globally and per-node, and
checks how claims interact with allocations and replacement semantics.
Key checks include:
- Argument validation and correct errno handling for invalid inputs.
- NUMA-aware claim sets, covering node-specific and global targets.
- Replacement semantics after allocations (global and node-scoped).
- Inter-domain protection and claim movement across NUMA nodes.
Signed-off-by: Bernhard Kaindl <bernhard.kaindl@citrix.com>
---
tools/tests/mem-claim/.gitignore | 1 +
tools/tests/mem-claim/Makefile | 17 +-
tools/tests/mem-claim/accounting-1.h | 401 +++++++++
tools/tests/mem-claim/input-phase1.h | 171 ++++
tools/tests/mem-claim/input-phase2.h | 91 ++
tools/tests/mem-claim/libtestclaims.c | 995 ++++++++++++++++++++++
tools/tests/mem-claim/libtestclaims.h | 202 +++++
tools/tests/mem-claim/test-claim-memory.c | 129 +++
8 files changed, 2000 insertions(+), 7 deletions(-)
create mode 100644 tools/tests/mem-claim/accounting-1.h
create mode 100644 tools/tests/mem-claim/input-phase1.h
create mode 100644 tools/tests/mem-claim/input-phase2.h
create mode 100644 tools/tests/mem-claim/libtestclaims.c
create mode 100644 tools/tests/mem-claim/libtestclaims.h
create mode 100644 tools/tests/mem-claim/test-claim-memory.c
diff --git a/tools/tests/mem-claim/.gitignore b/tools/tests/mem-claim/.gitignore
index cfcee00b819b..08365576b22b 100644
--- a/tools/tests/mem-claim/.gitignore
+++ b/tools/tests/mem-claim/.gitignore
@@ -1 +1,2 @@
test-mem-claim
+test-claim-memory
diff --git a/tools/tests/mem-claim/Makefile b/tools/tests/mem-claim/Makefile
index 76ba3e3c8bef..77decd4b0d05 100644
--- a/tools/tests/mem-claim/Makefile
+++ b/tools/tests/mem-claim/Makefile
@@ -1,14 +1,16 @@
XEN_ROOT = $(CURDIR)/../../..
include $(XEN_ROOT)/tools/Rules.mk
-TARGET := test-mem-claim
+TARGETS := test-mem-claim test-claim-memory
+
+LIB_OBJ := libtestclaims.o
.PHONY: all
-all: $(TARGET)
+all: $(TARGETS)
.PHONY: clean
clean:
- $(RM) -- *.o $(TARGET) $(DEPS_RM)
+ $(RM) -- *.o $(TARGETS) $(DEPS_RM)
.PHONY: distclean
distclean: clean
@@ -17,11 +19,11 @@ distclean: clean
.PHONY: install
install: all
$(INSTALL_DIR) $(DESTDIR)$(LIBEXEC)/tests
- $(INSTALL_PROG) $(TARGET) $(DESTDIR)$(LIBEXEC)/tests
+ $(INSTALL_PROG) $(TARGETS) $(DESTDIR)$(LIBEXEC)/tests
.PHONY: uninstall
uninstall:
- $(RM) -- $(DESTDIR)$(LIBEXEC)/tests/$(TARGET)
+ $(RM) -- $(patsubst %,$(DESTDIR)$(LIBEXEC)/tests/%,$(TARGETS))
CFLAGS += $(CFLAGS_xeninclude)
CFLAGS += $(CFLAGS_libxenctrl)
@@ -32,7 +34,8 @@ LDFLAGS += $(APPEND_LDFLAGS)
%.o: Makefile
-$(TARGET): test-mem-claim.o
- $(CC) -o $@ $< $(LDFLAGS)
+
+$(TARGETS): %: %.o $(LIB_OBJ)
+ $(CC) -o $@ $^ $(LDFLAGS)
-include $(DEPS_INCLUDE)
diff --git a/tools/tests/mem-claim/accounting-1.h b/tools/tests/mem-claim/accounting-1.h
new file mode 100644
index 000000000000..70bae2215702
--- /dev/null
+++ b/tools/tests/mem-claim/accounting-1.h
@@ -0,0 +1,401 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * claim-memory-allocations.h - Test claiming memory and claims protection
+ * with NUMA-aware claim sets.
+ *
+ * Check claiming memory and allocation against claims with NUMA-aware
+ * claim sets, including:
+ *
+ * - Claiming all or nearly all free memory on a node or globally and
+ * verifying the claim is reflected in physinfo and that memory can
+ * be allocated against the claim.
+ * - Exercise xc_domain_claim_memory() with different valid claim sets.
+ * - Exercise xc_domain_claim_pages() and verify that claims are reflected
+ * in the outstanding pages in Xen.
+ */
+#include "libtestclaims.h"
+
+/*
+ * A1-1: basic single-node claim is tracked in outstanding pages and released
+ * when the domain is destroyed.
+ *
+ * Smoke test: claims half the primary node's free pages, verifies they
+ * appear in physinfo.outstanding_pages, then returns. The fixture destroys
+ * the domain, which releases the claim, and verifies outstanding pages returns
+ * to the pre-test baseline.
+ *
+ * Skipped when NUMA is disabled (num_nodes == 1 and no per-node free pages) or
+ * when there are fewer than 2 free pages available on the primary node.
+ */
+static int test_basic_node_claim(struct test_ctx *ctx)
+{
+ uint64_t pre_existing_claims, free_pages;
+ memory_claim_t claim;
+
+ /* Get the free memory on the test node */
+ ctx->target1 = ctx->env->primary_node;
+ lib_get_node_free_pages(ctx, ctx->target1, &free_pages, NULL);
+
+ if ( free_pages < 2 )
+ return lib_fail(
+ ctx, "need >= 2 free pages on node %" PRIu64 ", got %" PRIu64,
+ ctx->target1, free_pages);
+
+ /*
+ * Leave one page of slack between the claim and the free pages
+ * so claim does not fail due to a small concurrent allocation.
+ */
+ ctx->alloc_pages = free_pages - 1;
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "node=%" PRIu64 " claim_pages=%" PRIu64 " free_pages=%" PRIu64,
+ ctx->target1, ctx->alloc_pages, free_pages);
+
+ /*
+ * Capture the baseline after domain creation (domain creation doesn't
+ * change outstanding pages, so this equals the fixture's baseline).
+ */
+ rc = lib_get_total_claims(ctx, &pre_existing_claims);
+ if ( rc )
+ return rc;
+
+ claim = (memory_claim_t){.pages = ctx->alloc_pages, .target = ctx->target1};
+ rc = lib_claim_memory(ctx, ctx->dom_1, 1, &claim, "set basic node claim");
+ if ( rc )
+ return rc;
+
+ return lib_check_claim(
+ ctx, pre_existing_claims, ctx->alloc_pages,
+ "check node claim is reflected in outstanding pages");
+
+ /* Domain teardown releases claim; fixture verifies baseline is restored. */
+}
+
+/*
+ * A1-2: global claim is replaced atomically after an allocation.
+ *
+ * Sets an initial global claim, allocates one extent (consuming part of it),
+ * then sets a smaller replacement claim and verifies the outstanding count
+ * reflects the new absolute target.
+ */
+static int test_global_replace_after_alloc(struct test_ctx *ctx)
+{
+ unsigned long free_pages;
+ unsigned long initial_pages;
+
+ /* Get the global free memory for sizing the initial claim */
+ lib_get_global_free_pages(ctx, &free_pages);
+
+ if ( free_pages < 2 )
+ return lib_fail(ctx, "need >= 2 free pages global, got %" PRIu64,
+ free_pages);
+
+ /*
+ * This test needs two valid claim targets: an initial larger claim
+ * and a smaller replacement target after consuming one claimed page.
+ */
+ initial_pages = free_pages;
+ ctx->alloc_pages = free_pages / 2;
+
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "alloc_pages=%" PRIu64 " initial=%" PRIu64 " "
+ "replacement=%" PRIu64 " global_free=%" PRIu64,
+ ctx->alloc_pages, initial_pages, ctx->alloc_pages, free_pages);
+
+ rc = lib_claim_memory(ctx, ctx->dom_1, 1, /* one claim */
+ &(memory_claim_t){
+ .pages = initial_pages,
+ .target = XEN_DOMCTL_CLAIM_MEMORY_GLOBAL,
+ },
+ "set initial global replacement claim");
+ if ( rc )
+ return rc;
+
+ lib_set_step(ctx, "Allocate one extent to consume part of claim");
+ rc = lib_populate_success(ctx, (lib_populate_args_t){
+ .domid = ctx->dom_1,
+ .nr_extents = 1,
+ });
+ if ( rc )
+ return rc;
+
+ rc = lib_claim_memory(ctx, ctx->dom_1, 1, /* one claim */
+ &(memory_claim_t){
+ .pages = ctx->alloc_pages,
+ .target = XEN_DOMCTL_CLAIM_MEMORY_GLOBAL,
+ },
+ "replace global claim with a new absolute target");
+ if ( rc )
+ return rc;
+
+ return lib_release_all_claims(ctx, ctx->dom_1);
+}
+
+/*
+ * A1-3: node-specific claim is replaced atomically after an allocation.
+ *
+ * Same as C-1 but scoped to the primary NUMA node:
+ * Sets an initial node claim, allocates one extent (consuming part of it),
+ * then sets a smaller replacement claim and verifies the outstanding count
+ * reflects the new absolute target.
+ */
+static int test_node_replace_after_alloc(struct test_ctx *ctx)
+{
+ uint64_t free_pages, initial_pages;
+
+ /* Node used for claim sizing, claiming and allocation */
+ ctx->target1 = ctx->env->primary_node;
+
+ /* Get the free memory on the test node for sizing the initial claim */
+ lib_get_node_free_pages(ctx, ctx->target1, &free_pages, NULL);
+ if ( free_pages < 2 )
+ return lib_skip_test(
+ ctx, "need >= 2 pages on node %" PRIu64 ", got %" PRIu64,
+ ctx->target1, free_pages);
+ /*
+ * This test needs two valid claim targets: an initial larger claim
+ * and a smaller replacement target after consuming one claimed page.
+ *
+ * Leave one page of slack between the claim and the free pages
+ * so claim does not fail due to a small concurrent allocation.
+ */
+ initial_pages = free_pages - 1;
+ ctx->alloc_pages = free_pages / 2;
+
+ /* Logging of test parameters */
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "node=%" PRIu64 " init=%" PRIu64 " replacement=%" PRIu64
+ " free=%" PRIu64,
+ ctx->target1, initial_pages, ctx->alloc_pages, free_pages);
+
+ /* Create the claim with initial_pages */
+ rc = lib_claim_memory(ctx, ctx->dom_1, 1, /* one claim */
+ &(memory_claim_t){
+ .pages = initial_pages,
+ .target = ctx->target1,
+ },
+ "set initial node-specific replacement claim");
+ if ( rc )
+ return rc;
+
+ lib_set_step(ctx, "Allocate one extent to consume part of claim");
+ rc =
+ lib_populate_success(ctx, (lib_populate_args_t){
+ .domid = ctx->dom_1,
+ .nr_extents = 1,
+ .flags = XENMEMF_exact_node(ctx->target1),
+ });
+ if ( rc )
+ return rc;
+
+ /* Update the claim with ctx->alloc_pages */
+ rc = lib_claim_memory(ctx, ctx->dom_1, 1, /* one claim */
+ &(memory_claim_t){
+ .pages = ctx->alloc_pages,
+ .target = ctx->target1,
+ },
+ "replace node claim with a new absolute target");
+ if ( rc )
+ return rc;
+
+ return lib_release_all_claims(ctx, ctx->dom_1);
+}
+
+/*
+ * A1-4: legacy xc_domain_claim_pages() global claim is tracked in outstanding
+ * pages, reduced by an allocation, and released when the domain is destroyed.
+ */
+static int test_legacy_global_claim(struct test_ctx *ctx)
+{
+ uint64_t pre_existing_claims, free_pages;
+
+ /* Get the global free memory for sizing the claim */
+ lib_get_global_free_pages(ctx, &free_pages);
+
+ ctx->alloc_pages = free_pages / 2;
+ snprintf(ctx->result->params, sizeof(ctx->result->params), "claim=%" PRIu64,
+ ctx->alloc_pages);
+
+ rc = lib_get_total_claims(ctx, &pre_existing_claims);
+ if ( rc )
+ return rc;
+
+ rc = lib_claim_pages_legacy(ctx, ctx->dom_1, ctx->alloc_pages,
+ "set legacy global claim");
+ if ( rc )
+ return rc;
+
+ rc = lib_check_claim(ctx, pre_existing_claims, ctx->alloc_pages,
+ "claim is added the outstanding pages");
+ if ( rc )
+ return rc;
+
+ lib_set_step(ctx, "allocate extents to redeem a part of claim");
+ rc = lib_populate_success(ctx, (lib_populate_args_t){
+ .domid = ctx->dom_1,
+ .nr_extents = 10,
+ });
+ if ( rc )
+ return rc;
+
+ return lib_check_claim(
+ ctx, pre_existing_claims, ctx->alloc_pages - 10,
+ "allocated against claim, outstanding pages reduced");
+ /* Teardown releases the claim; fixture verifies baseline is restored. */
+}
+
+/*
+ * A1-5: Test blocking allocation with claims and claim movement between nodes.
+ *
+ * This test performs a sequence of claims and allocations to verify that claims
+ * block allocations on the claimed node, that moving a claim to another node
+ * allows allocation on the original node, and that the new node is now blocked
+ * by the claim until it is released.
+ *
+ * To achieve this, the test creates a helper domain used for allocation
+ * attempts, then:
+ *
+ * Claims most free pages on the primary node, verifies allocation is blocked,
+ * then moves the claim to the secondary node, verifies the original allocation
+ * can now succeed on the primary node.
+ *
+ * It then verifies allocation is now blocked on the secondary node,
+ * releases the claim, and verifies the allocation can now succeed on the
+ * secondary node as well.
+ *
+ * Requires at least two online NUMA nodes.
+ */
+static int test_move_claim_between_nodes(struct test_ctx *ctx)
+{
+ uint64_t free_src, free_dst, spare_pages = 10;
+
+ if ( !ctx->env->have_secondary_node )
+ return lib_skip_test(ctx, "Requires at least two online NUMA nodes.");
+
+ ctx->target1 = ctx->env->primary_node;
+ ctx->target2 = ctx->env->secondary_node;
+
+ lib_get_node_free_pages(ctx, ctx->target1, &free_src, NULL);
+ lib_get_node_free_pages(ctx, ctx->target2, &free_dst, NULL);
+
+ if ( free_src < spare_pages + 1 || free_dst < spare_pages + 1 )
+ return lib_fail(ctx, "Need more pages, got %" PRIu64 "/%" PRIu64 ".",
+ free_src, free_dst);
+
+ lib_set_step(ctx, "Claim most memory on source node.");
+ rc = lib_claim_all_on_node(ctx, ctx->dom_1, ctx->target1, spare_pages);
+ if ( rc )
+ return rc;
+
+ lib_set_step(ctx, "The claim blocks the allocation on the source node.");
+ rc =
+ lib_populate_failure(ctx, (lib_populate_args_t){
+ .domid = ctx->dom_2,
+ .nr_extents = spare_pages * 2,
+ .flags = XENMEMF_exact_node(ctx->target1),
+ });
+ if ( rc )
+ return rc;
+
+ lib_set_step(ctx, "Move the claim to most memory on the destination node.");
+ rc = lib_claim_all_on_node(ctx, ctx->dom_1, ctx->target2, spare_pages);
+ if ( rc )
+ return rc;
+
+ lib_set_step(ctx, "Moved claim no longer blocks allocs on source node.");
+ rc =
+ lib_populate_success(ctx, (lib_populate_args_t){
+ .domid = ctx->dom_2,
+ .start = spare_pages * 2,
+ .nr_extents = spare_pages * 2,
+ .flags = XENMEMF_exact_node(ctx->target1),
+ });
+ if ( rc )
+ return rc;
+
+ lib_set_step(ctx, "Moved claim now blocks allocs on destination node.");
+ rc =
+ lib_populate_failure(ctx, (lib_populate_args_t){
+ .domid = ctx->dom_2,
+ .nr_extents = spare_pages * 2,
+ .flags = XENMEMF_exact_node(ctx->target2),
+ });
+ if ( rc )
+ return rc;
+
+ rc = lib_release_all_claims(ctx, ctx->dom_1);
+ if ( rc )
+ return rc;
+
+ lib_set_step(ctx, "Claim released, allocs on destination node succeed.");
+ rc =
+ lib_populate_success(ctx, (lib_populate_args_t){
+ .domid = ctx->dom_2,
+ .start = spare_pages * 2,
+ .nr_extents = spare_pages * 2,
+ .flags = XENMEMF_exact_node(ctx->target2),
+ });
+ if ( rc )
+ return rc;
+
+ return lib_release_all_claims(ctx, ctx->dom_1);
+}
+
+/*
+ * A1-6: Check that a calling xc_domain_claim_pages(claim_pages = 0)
+ * resets the claims to the baseline.
+ */
+static int test_zero_claim_resets_claim(struct test_ctx *ctx)
+{
+ uint64_t pre_existing_claims;
+
+ rc = lib_get_total_claims(ctx, &pre_existing_claims);
+ if ( rc )
+ return rc;
+
+ /* Make a claim first to move outstanding away from the baseline. */
+ rc = lib_claim_pages_legacy(ctx, ctx->dom_1, 8,
+ "zero claim to reset outstanding to baseline");
+ if ( rc )
+ return rc;
+
+ /* Now set a zero claim to reset outstanding back to the baseline. */
+ rc = lib_claim_pages_legacy(ctx, ctx->dom_1, 0,
+ "zero claim to reset outstanding to baseline");
+ if ( rc )
+ return rc;
+
+ return lib_check_claim(ctx, pre_existing_claims, 0,
+ "check zero claim resets outstanding to baseline");
+}
+
+/*
+ * A1-7: Check that a calling xc_domain_claim_memory(claim_pages = 0)
+ * resets the claims to the baseline.
+ */
+static int test_zero_claim_memory_resets(struct test_ctx *ctx)
+{
+ uint64_t pre_existing_claims;
+
+ rc = lib_get_total_claims(ctx, &pre_existing_claims);
+ if ( rc )
+ return rc;
+
+ /* Make a claim first to move outstanding away from the baseline. */
+ rc = lib_claim_memory(
+ ctx, ctx->dom_1, 1,
+ &(memory_claim_t){.pages = 8, .target = ctx->env->primary_node},
+ "make a claim to move outstanding away from baseline");
+ if ( rc )
+ return rc;
+
+ /* Now set a zero claim to reset outstanding back to the baseline. */
+ rc = lib_claim_memory(
+ ctx, ctx->dom_1, 1,
+ &(memory_claim_t){.pages = 0, .target = XEN_DOMCTL_CLAIM_MEMORY_GLOBAL},
+ "set a zero claim to reset outstanding to baseline");
+ if ( rc )
+ return rc;
+
+ return lib_check_claim(ctx, pre_existing_claims, 0,
+ "check zero claim resets outstanding to baseline");
+}
diff --git a/tools/tests/mem-claim/input-phase1.h b/tools/tests/mem-claim/input-phase1.h
new file mode 100644
index 000000000000..5a97bb5eeb20
--- /dev/null
+++ b/tools/tests/mem-claim/input-phase1.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * input-phase1.h - Test Phase 1 argument validation for memory claims,
+ * including invalid claim parameters and claiming more pages than are free.
+ *
+ * I1-1 - I1-9: invalid claims are rejected with appropriate error codes.
+ *
+ * This file contains test cases to validate argument handling when dealing
+ * with NUMA-aware claim sets.
+ *
+ * Tests various invalid claim parameters (non-present node, too many claims,
+ * node id above UINT8_MAX, pages above INT32_MAX, non-zero pad, zero claim
+ * count, null claims pointer with non-zero count, and non-null claims pointer
+ * with zero count) and verify they are rejected with the expected error code.
+ */
+#include "libtestclaims.h"
+
+static int test_reject_non_present_node(struct test_ctx *ctx)
+{
+ memory_claim_t claim = {
+ .pages = 1, .target = ctx->env->num_nodes, /* Out-of-range node id */
+ };
+
+ ctx->target1 = ctx->env->primary_node;
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "node=%u num_nodes=%u", claim.target, ctx->env->num_nodes);
+
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, 1, &claim, ENOENT,
+ "reject claim on a non-present NUMA node");
+}
+
+static int test_reject_too_many_claims(struct test_ctx *ctx)
+{
+ const uint32_t nr_claims = 0x100; /* Xen does not support such nr_claims */
+ memory_claim_t claims[nr_claims];
+
+ ctx->target1 = ctx->env->primary_node;
+ for ( uint32_t i = 0; i < nr_claims; i++ )
+ claims[i] = ((memory_claim_t){
+ .pages = 1,
+ .target = ctx->env->primary_node,
+ });
+
+ snprintf(ctx->result->params, sizeof(ctx->result->params), "nr_claims=%u",
+ nr_claims);
+
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, nr_claims, claims, E2BIG,
+ "reject claim list larger than the supported maximum");
+}
+
+static int test_reject_node_gt_uint8_max(struct test_ctx *ctx)
+{
+ memory_claim_t claim = {
+ .pages = 1,
+ .target = UINT8_MAX + 1U,
+ };
+
+ ctx->target1 = ctx->env->primary_node;
+ snprintf(ctx->result->params, sizeof(ctx->result->params), "node=%u",
+ claim.target);
+
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, 1, &claim, ENOENT,
+ "reject claim with node id above UINT8_MAX");
+}
+
+static int test_reject_pages_gt_int32_max(struct test_ctx *ctx)
+{
+ memory_claim_t claim = {
+ .pages = INT32_MAX + 1UL,
+ .target = ctx->env->primary_node,
+ };
+
+ ctx->target1 = ctx->env->primary_node;
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "pages=%" PRIu64 " node=%u", claim.pages, claim.target);
+
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, 1, &claim, ENOMEM,
+ "reject claim with pages larger than INT32_MAX");
+}
+
+static int test_reject_nonzero_pad(struct test_ctx *ctx)
+{
+ memory_claim_t claim = {
+ .pages = 1,
+ .target = ctx->env->primary_node,
+ .pad = 1,
+ };
+
+ ctx->target1 = ctx->env->primary_node;
+ snprintf(ctx->result->params, sizeof(ctx->result->params), "node=%u pad=%u",
+ claim.target, claim.pad);
+
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, 1, &claim, EINVAL,
+ "reject claim with non-zero padding");
+}
+
+static int test_reject_zero_claim_count(struct test_ctx *ctx)
+{
+ snprintf(ctx->result->params, sizeof(ctx->result->params), "nr_claims=0");
+
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, 0, NULL, EINVAL,
+ "reject xc_domain_claim_memory() with nr_claims == 0");
+}
+
+static int test_null_claims_nonzero_count(struct test_ctx *ctx)
+{
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "nr_claims=1 claims=NULL");
+
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, 1, NULL, EFAULT,
+ "reject xc_domain_claim_memory() with claims=NULL and nr_claims == 1");
+}
+
+static int test_zero_count_valid_pointer(struct test_ctx *ctx)
+{
+ memory_claim_t claim = {
+ .pages = 1,
+ .target = ctx->env->primary_node,
+ };
+
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "nr_claims=0 claims=valid node=%u", claim.target);
+
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, 0, &claim, EINVAL,
+ "reject xc_domain_claim_memory() with !nr_claims but a claims pointer");
+}
+
+/*
+ * I1-9: Check both xc_domain_claim_pages() and xc_domain_claim_memory()
+ * with pages > free pages fail with ENOMEM.
+ */
+static int test_claim_pages_gt_free_enomem(struct test_ctx *ctx)
+{
+ uint64_t free_pages;
+
+ /* Get the global free memory for sizing the claim */
+ lib_get_global_free_pages(ctx, &free_pages);
+ ctx->alloc_pages = free_pages + 1;
+
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "claim=%" PRIu64 " global=%" PRIu64, ctx->alloc_pages, free_pages);
+
+ rc = lib_claim_pages_legacy_failure(
+ ctx, ctx->dom_1, ctx->alloc_pages, ENOMEM,
+ "reject xc_domain_claim_pages() with pages > global free page");
+ if ( rc )
+ return rc;
+
+ rc = lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, 1,
+ &(memory_claim_t){.pages = ctx->alloc_pages,
+ .target = XEN_DOMCTL_CLAIM_MEMORY_GLOBAL},
+ ENOMEM, "reject claim_memory() with pages > global free pages");
+
+ /* Get the free pages on the primary node and fail to claim more than it */
+ lib_get_node_free_pages(ctx, ctx->env->primary_node, &free_pages, NULL);
+ rc = lib_expect_claim_memory_failure(
+ ctx, ctx->dom_1, 1,
+ &(memory_claim_t){.pages = free_pages + 1,
+ .target = ctx->env->primary_node},
+ ENOMEM, "reject claim_memory() with pages > node free pages");
+ return rc;
+}
diff --git a/tools/tests/mem-claim/input-phase2.h b/tools/tests/mem-claim/input-phase2.h
new file mode 100644
index 000000000000..d57a7e8a37c0
--- /dev/null
+++ b/tools/tests/mem-claim/input-phase2.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * input-phase2.h - Test argument validation for memory claims
+ *
+ * This file contains test cases to validate argument handling when dealing
+ * with NUMA-aware claim sets.
+ */
+#include "libtestclaims.h"
+
+static int d2_claim_expect_enomem_global(struct test_ctx *ctx, uint64_t claims)
+{
+ rc = lib_claim_pages_legacy_failure(
+ ctx, ctx->dom_2, claims, ENOMEM,
+ "expect ENOMEM for xc_domain_claim_pages() with claims > spare page");
+ if ( rc )
+ return rc;
+
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_2, 1,
+ /* Request more than the spare to ensure failure */
+ &(memory_claim_t){.pages = claims,
+ .target = XEN_DOMCTL_CLAIM_MEMORY_GLOBAL},
+ ENOMEM, "expect ENOMEM for claim_memory() with claims > spare pages");
+}
+
+static int d2_claim_expect_enomem_node(struct test_ctx *ctx, uint64_t claims)
+{
+ return lib_expect_claim_memory_failure(
+ ctx, ctx->dom_2, 1,
+ /* Request more than the spare to ensure failure */
+ &(memory_claim_t){.pages = claims, .target = ctx->target1}, ENOMEM,
+ "expect ENOMEM for claim_memory() with claims > spare pages");
+}
+
+/*
+ * I2-1
+ *
+ * Create a legacy global claim for d1 using claim_pages and assert that
+ * claim calls for d2 that exceed the unclaimed memory fail with ENOMEM.
+ */
+static int test_claim_pages_causes_enomem(struct test_ctx *ctx)
+{
+ uint64_t free_pages;
+
+ /* Get the global free memory for sizing the claim */
+ lib_get_global_free_pages(ctx, &free_pages);
+ ctx->alloc_pages = free_pages - SPARE_PAGES;
+
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "claim=%" PRIu64 " global=%" PRIu64, ctx->alloc_pages, free_pages);
+
+ rc = lib_claim_pages_legacy(
+ ctx, ctx->dom_1, ctx->alloc_pages,
+ "dom_1: claim nearly all global memory with claim_pages");
+ if ( rc )
+ return rc;
+
+ rc = d2_claim_expect_enomem_global(ctx, SPARE_PAGES * 2);
+ if ( !rc )
+ rc = d2_claim_expect_enomem_node(ctx, SPARE_PAGES * 2);
+ return rc;
+}
+
+/*
+ * I2-2
+ *
+ * Create a global claim for d1 using claim_memory and assert that
+ * claim calls for d2 that exceed the unclaimed memory fail with ENOMEM.
+ */
+static int test_claim_memory_causes_enomem(struct test_ctx *ctx)
+{
+ if ( lib_claim_all_on_host(ctx, ctx->dom_1, SPARE_PAGES) )
+ return -1;
+ rc = d2_claim_expect_enomem_global(ctx, SPARE_PAGES * 2);
+ if ( !rc )
+ rc = d2_claim_expect_enomem_node(ctx, SPARE_PAGES * 2);
+ return rc;
+}
+
+/*
+ * I2-3
+ *
+ * Create a primary-node claim for d1 using claim_memory and assert that
+ * claim calls for d2 that exceed the unclaimed memory fail with ENOMEM.
+ */
+static int test_claim_prima_causes_enomem(struct test_ctx *ctx)
+{
+ if ( lib_claim_all_on_node(ctx, ctx->dom_1, ctx->target1, SPARE_PAGES) )
+ return -1;
+ return d2_claim_expect_enomem_node(ctx, SPARE_PAGES * 2);
+}
diff --git a/tools/tests/mem-claim/libtestclaims.c b/tools/tests/mem-claim/libtestclaims.c
new file mode 100644
index 000000000000..c4c1a63e1856
--- /dev/null
+++ b/tools/tests/mem-claim/libtestclaims.c
@@ -0,0 +1,995 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Functional system test framework for testing memory claims in Xen.
+ *
+ * See test-claim-memory.c for details on a test suite using this framework
+ * and the verifications it is designed to perform on behalf of a test suite.
+ *
+ * Besides providing the test suite with functionality to validate the
+ * effects of memory claims on the system, such as querying memory state,
+ * creating and destroying test domains, performing claim operations and
+ * populating memory, it also provides a consistent way to manage test
+ * state and record failures with detailed messages that include the
+ * current step, test parameters, and a snapshot of relevant memory
+ * state to aid in diagnosing issues when a test fails.
+ *
+ * It also provides the framework to select test cases, run them, and
+ * restore the system to a clean state after tests by destroying test
+ * domains to release populated memory and claims, even if a test fails
+ * partway through.
+ *
+ * The functions cover:
+ *
+ * - Managing the test_ctx structure which holds the test environment,
+ * configuration, and results.
+ *
+ * - Providing helper functions to:
+ * - Create and destroy domains for testing, which are needed to make claims
+ * - Query the system's memory state in terms of free pages and outstanding
+ * claims, which are used for sizing claims and verifying their effects.
+ * - Perform claim operations and check their effects on the system.
+ * - Populate memory to test the blocking effects of claims.
+ * - Record failures with detailed messages that include the current step,
+ * test parameters, and a snapshot of relevant memory state.
+ *
+ * - Cleanup the test environment by destroying domains after tests, ensuring
+ * that claims are released and the system is left in a clean state even
+ * if a test fails partway through.
+ *
+ * - Providing a consistent way to skip tests when preconditions are not met,
+ * such as insufficient free memory or lack of multiple NUMA nodes.
+ *
+ * - Ensuring that all interactions with Xen are checked for errors, and that
+ * any failures are reported with detailed diagnostics.
+ *
+ * - Test cases should use the provided helper functions to perform all
+ * operations that interact with Xen or manage test state to ensure
+ * consistent failure reporting and cleanup.
+ */
+#include <err.h>
+#include <errno.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <xen-tools/common-macros.h>
+
+#include "libtestclaims.h"
+
+int rc;
+static int step;
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h'},
+ {"list", no_argument, NULL, 'l'},
+ {"test", required_argument, NULL, 't'},
+ {"verbose", no_argument, NULL, 'v'},
+ {NULL, 0, NULL, 0 },
+};
+
+/* --- diagnostics helpers --- */
+
+/* Append formatted text to a buffer, ensuring it is always null-terminated. */
+void lib_appendf(char *buf, size_t size, const char *fmt, ...)
+{
+ va_list ap;
+ size_t used = strlen(buf);
+
+ if ( used >= size )
+ return;
+
+ va_start(ap, fmt);
+ vsnprintf(buf + used, size - used, fmt, ap);
+ va_end(ap);
+}
+
+/* Print debug information if verbose mode is enabled. */
+void lib_debugf(struct test_ctx *ctx, const char *fmt, ...)
+{
+ va_list ap;
+
+ if ( !ctx->cfg->verbose )
+ return;
+
+ fputs(" debug: ", stdout);
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+
+ fputc('\n', stdout);
+}
+
+/*
+ * Set the current test step description, which is included in failure reports.
+ * If verbose mode is enabled, also print the step to stdout.
+ */
+void lib_set_step(struct test_ctx *ctx, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(ctx->step, sizeof(ctx->step), fmt, ap);
+ va_end(ap);
+
+ if ( ctx->cfg->verbose )
+ printf(" step %d: %s\n", ++step, ctx->step);
+}
+
+/*
+ * Record a test failure with a formatted message and errno, and include the
+ * current step, test parameters, and a snapshot of relevant memory state in
+ * the details.
+ */
+static void append_snapshot(struct test_ctx *ctx)
+{
+ xc_physinfo_t physinfo;
+ unsigned int nodes[2] = {ctx->target1, ctx->target2};
+
+ xc_physinfo(ctx->env->xch, &physinfo);
+ ctx_appendf(ctx,
+ "\n snapshot: free_pages=%" PRIu64
+ ", outstanding_pages=%" PRIu64,
+ physinfo.free_pages, physinfo.outstanding_pages);
+
+ /* Include their free/total pages at the time of failure in the snapshot. */
+ for ( size_t i = 0; i < ARRAY_SIZE(nodes); i++ )
+ {
+ unsigned long free_pages, total_pages;
+ unsigned int node = nodes[i];
+
+ if ( node == INVALID_NODE )
+ continue;
+ if ( i == 1 && node == nodes[0] )
+ continue;
+
+ lib_get_node_free_pages(ctx, node, &free_pages, &total_pages);
+ ctx_appendf(ctx, "\n snapshot: node%u free=%lu total=%lu", node,
+ free_pages, total_pages);
+ }
+}
+
+/*
+ * Record a test failure with a formatted message and the given errno.
+ *
+ * Include the current step, test parameters, and a snapshot of relevant
+ * memory state in the details.
+ */
+int lib_fail_with_errno(struct test_ctx *ctx, int errnum, const char *fmt, ...)
+{
+ va_list ap;
+
+ ctx->result->status = TEST_FAILED;
+ ctx->result->details[0] = '\0';
+
+ ctx_appendf(ctx, "step=%s", ctx->step[0] ? ctx->step : "(not set)");
+ ctx_appendf(ctx, "\n dom_1=%u dom_2=%u target1=%s target2=%s",
+ ctx->dom_1, ctx->dom_2,
+ ctx->target1 == INVALID_NODE ? "n/a" : "set",
+ ctx->target2 == INVALID_NODE ? "n/a" : "set");
+
+ if ( ctx->target1 != INVALID_NODE )
+ ctx_appendf(ctx, " (%" PRIu64 ")", ctx->target1);
+ if ( ctx->target2 != INVALID_NODE )
+ ctx_appendf(ctx, " (%" PRIu64 ")", ctx->target2);
+
+ ctx_appendf(ctx, "\n alloc_pages=%lu", ctx->alloc_pages);
+
+ ctx_appendf(ctx, "\n cause: ");
+ va_start(ap, fmt);
+ vsnprintf(ctx->result->details + strlen(ctx->result->details),
+ sizeof(ctx->result->details) - strlen(ctx->result->details), fmt,
+ ap);
+ va_end(ap);
+
+ if ( errnum )
+ ctx_appendf(ctx, "\n errno=%d (%s)", errnum, strerror(errnum));
+
+ append_snapshot(ctx);
+ return -1;
+}
+
+/*
+ * Record a test failure with a formatted message and the current errno.
+ *
+ * Calls lib_fail_with_errno() to do the actual recording, passing the current
+ * errno.
+ */
+int lib_fail(struct test_ctx *ctx, const char *fmt, ...)
+{
+ va_list ap;
+ int saved_errno = errno;
+ char message[1024];
+
+ va_start(ap, fmt);
+ vsnprintf(message, sizeof(message), fmt, ap);
+ va_end(ap);
+
+ return lib_fail_with_errno(ctx, saved_errno, "%s", message);
+}
+
+/*
+ * Record that a test was skipped with a formatted message.
+ *
+ * Include the message in the details to explain why the test was skipped.
+ */
+int lib_skip_test(struct test_ctx *ctx, const char *fmt, ...)
+{
+ va_list ap;
+
+ ctx->result->status = TEST_SKIPPED;
+ ctx->result->details[0] = '\0';
+
+ va_start(ap, fmt);
+ vsnprintf(ctx->result->details, sizeof(ctx->result->details), fmt, ap);
+ va_end(ap);
+
+ return 1;
+}
+
+/* --- memory-state queries --- */
+
+/* Get the number of free and total pages for a specific NUMA node. */
+int lib_get_node_free_pages(struct test_ctx *ctx, unsigned int node,
+ uint64_t *free_pages, uint64_t *total_pages)
+{
+ struct test_env *env = ctx->env;
+
+ if ( node >= env->num_nodes ) /* Check node validity */
+ return lib_fail(ctx, "Invalid node %u/%u", node, env->num_nodes);
+
+ if ( xc_numainfo(env->xch, &env->num_nodes, env->meminfo, NULL) )
+ return lib_fail(ctx, "xc_numainfo failed to get node memory info");
+
+ *free_pages = env->meminfo[node].memfree / XC_PAGE_SIZE;
+ if ( total_pages )
+ *total_pages = env->meminfo[node].memsize / XC_PAGE_SIZE;
+ return 0;
+}
+
+/* Get the total number of free pages available across all nodes. */
+int lib_get_global_free_pages(struct test_ctx *ctx, uint64_t *free_pages)
+{
+ struct test_env *env = ctx->env;
+ uint64_t free_bytes;
+
+ if ( xc_availheap(env->xch, 0, 0, -1, &free_bytes) )
+ return lib_fail(ctx, "xc_availheap failed to get global pages");
+
+ *free_pages = free_bytes / XC_PAGE_SIZE;
+ return 0;
+}
+
+/* Get the current number of outstanding pages. */
+int lib_get_total_claims(struct test_ctx *ctx,
+ uint64_t *outstanding_pages_global)
+{
+ xc_physinfo_t physinfo;
+
+ if ( xc_physinfo(ctx->env->xch, &physinfo) )
+ return lib_fail(ctx, "xc_physinfo failed to get outstanding pages");
+ *outstanding_pages_global = physinfo.outstanding_pages;
+ return 0;
+}
+
+/* --- claim check operations --- */
+
+/* Check the current outstanding pages against the expected value. */
+int lib_check_claim(struct test_ctx *ctx, uint64_t baseline_outstanding,
+ uint64_t expected_delta, const char *reason)
+{
+ xc_physinfo_t physinfo;
+ uint64_t expected = baseline_outstanding + expected_delta;
+
+ lib_set_step(ctx, "%s", reason);
+ if ( xc_physinfo(ctx->env->xch, &physinfo) )
+ return lib_fail(ctx, "xc_physinfo failed to get outstanding pages");
+
+ if ( physinfo.outstanding_pages != expected )
+ return lib_fail_with_errno(
+ ctx, 0, "expected outstanding_pages=%" PRIu64 ", got %" PRIu64,
+ expected, physinfo.outstanding_pages);
+ return 0;
+}
+
+/* --- domain lifecycle --- */
+
+/*
+ * Create a domain with the specified configuration and label.
+ * Record a failure if the creation or maxmem setting fails.
+ *
+ * On success, the new domain ID is stored in *domid.
+ */
+int lib_create_domain(struct test_ctx *ctx, uint32_t *domid, const char *label)
+{
+ struct xen_domctl_createdomain create = ctx->env->create_template;
+
+ lib_set_step(ctx, "create %s domain", label);
+ *domid = DOMID_INVALID;
+ if ( xc_domain_create(ctx->env->xch, domid, &create) )
+ return lib_fail(ctx, "xc_domain_create(%s) failed", label);
+
+ lib_set_step(ctx, "set maxmem for %s domain", label);
+ if ( xc_domain_setmaxmem(ctx->env->xch, *domid, -1) )
+ {
+ lib_destroy_domain(ctx, domid, label);
+ return lib_fail(ctx, "xc_domain_setmaxmem(%s) failed", label);
+ }
+
+ return 0;
+}
+
+/*
+ * Destroy the specified domain, if it is valid.
+ * Add the destroy step with the given label to the current test description.
+ * Record a failure if the destroy operation fails.
+ *
+ * This should be called during test cleanup to ensure domains are destroyed
+ * and claims are released even if a test fails partway through.
+ */
+int lib_destroy_domain(struct test_ctx *ctx, uint32_t *domid, const char *label)
+{
+ if ( *domid == DOMID_INVALID )
+ return 0;
+
+ lib_set_step(ctx, "destroy %s domain", label);
+ rc = xc_domain_destroy(ctx->env->xch, *domid);
+ *domid = DOMID_INVALID;
+ if ( rc )
+ {
+ if ( ctx->result->status == TEST_FAILED )
+ {
+ ctx_appendf(ctx,
+ "\n cleanup: xc_domain_destroy(%s) failed: %d (%s)",
+ label, errno, strerror(errno));
+ return -1;
+ }
+
+ return lib_fail(ctx, "xc_domain_destroy(%s) failed", label);
+ }
+
+ return 0;
+}
+
+/* --- claim operations --- */
+
+/*
+ * Attempt to claim memory with the specified parameters.
+ * Record the failure if the claim operation fails.
+ */
+int lib_claim_memory(struct test_ctx *ctx, uint32_t domid, uint32_t nr_claims,
+ memory_claim_t *claims, const char *reason)
+{
+ lib_set_step(ctx, "%s", reason);
+ rc = xc_domain_claim_memory(ctx->env->xch, domid, nr_claims, claims);
+ if ( rc )
+ {
+ uint64_t outstanding_pages, node_free, total_pages;
+
+ lib_get_node_free_pages(ctx, ctx->target1, &node_free, &total_pages);
+ lib_get_total_claims(ctx, &outstanding_pages);
+
+ return lib_fail(ctx,
+ "xc_domain_claim_memory failed: node=%" PRIu64
+ "\n total_outstanding: %" PRIu64
+ "\n total_pages: %" PRIu64
+ "\n node free_pages: %" PRIu64,
+ ctx->target1, outstanding_pages, total_pages,
+ node_free);
+ }
+ return rc;
+}
+
+/*
+ * Attempt to claim memory with the specified parameters, expecting it to fail
+ * with the specified errno. Record a failure if it does not fail as expected.
+ */
+int lib_expect_claim_memory_failure(struct test_ctx *ctx, uint32_t domid,
+ uint32_t nr_claims, memory_claim_t *claims,
+ int expected_errno, const char *reason)
+{
+ lib_set_step(ctx, "%s", reason);
+ rc = xc_domain_claim_memory(ctx->env->xch, domid, nr_claims, claims);
+ if ( rc == -1 && errno == expected_errno )
+ return 0;
+
+ return lib_fail_with_errno(ctx, errno,
+ "expected xc_domain_claim_memory() to fail with "
+ "errno=%d (%s), got rc=%d",
+ expected_errno, strerror(expected_errno), rc);
+}
+
+/*
+ * Release all claims for the specified domain by setting a global claim with
+ * zero pages. Record the failure if the claim release operation fails.
+ */
+int lib_release_all_claims(struct test_ctx *ctx, uint32_t domid)
+{
+ memory_claim_t claim = {
+ .pages = 0,
+ .target = XEN_DOMCTL_CLAIM_MEMORY_GLOBAL,
+ };
+
+ lib_set_step(ctx, "release all claims with global zero claim");
+ rc = xc_domain_claim_memory(ctx->env->xch, domid, 1, &claim);
+ if ( rc )
+ return lib_fail(ctx, "xc_domain_claim_memory(..., global=0) failed");
+ return 0;
+}
+
+/*
+ * Claim all available memory on the host except for a specified number
+ * of pages to spare. Record the failure if the claim operation fails.
+ */
+int lib_claim_all_on_host(struct test_ctx *ctx, uint32_t domid,
+ unsigned int spare)
+{
+ unsigned long free_pages;
+ unsigned long claim_pages = 0;
+
+ lib_set_step(ctx, "claim all except %u pages on host", spare);
+ lib_get_global_free_pages(ctx, &free_pages);
+ if ( free_pages <= spare )
+ return lib_fail(ctx, "Not enough free pages @ host, spare=%u, free=%lu",
+ spare, free_pages);
+
+ claim_pages += free_pages - spare;
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "claim all pages except %u pages on host claim=%lu free=%lu",
+ spare, claim_pages, free_pages);
+ return lib_claim_memory(
+ ctx, domid, 1,
+ &(memory_claim_t){.pages = claim_pages,
+ .target = XEN_DOMCTL_CLAIM_MEMORY_GLOBAL},
+ ctx->result->params);
+}
+
+/*
+ * Claim all available memory on the specified node except for a specified
+ * number of pages to spare. Record the failure if the claim operation fails.
+ *
+ * This is used to set up a claim that exhausts the free memory on a specific
+ * node, which is needed to test the behavior of claims that target specific
+ * nodes and the global claim accounting that tracks them. The spare pages are
+ * needed to ensure that the claim can be successfully installed and leave some
+ * free pages on the node leave a little breathing room for other allocations
+ * and not cause the test to fail because of a minor fluctuation in free memory
+ * that causes unexpected differences in the expected vs actual page counts.
+ *
+ * This function also checks that the delta of global outstanding claims before
+ * and after the claim matches the expected delta based on the number of pages
+ * claimed, accounting for any existing claims the domain had before making the
+ * claim, since these existing claims are already part of the global outstanding
+ * claims, so we need to account for them in the expected delta when we check
+ * the global delta after the claim.
+ *
+ * It also checks that the domain's outstanding claims after the claim matches
+ * the number of pages claimed, ensuring that the claim was correctly installed.
+ */
+int lib_claim_all_on_node(struct test_ctx *ctx, uint32_t domid,
+ unsigned int node, unsigned int spare)
+{
+ uint64_t global_before, global_after, free_pages, claim_pages = 0;
+ uint64_t dom_claims_before;
+ xc_domaininfo_t info;
+
+ lib_set_step(ctx, "claim all except %u pages on node %u", spare, node);
+ lib_get_node_free_pages(ctx, node, &free_pages, NULL);
+ if ( free_pages <= spare )
+ return lib_fail(ctx, "Not enough pages @ node %u, spare=%u, free=%lu",
+ node, spare, free_pages);
+
+ claim_pages += free_pages - spare;
+ snprintf(ctx->result->params, sizeof(ctx->result->params),
+ "claim all pages except %u pages on node %u claim=%lu free=%lu",
+ spare, node, claim_pages, free_pages);
+
+ /*
+ * Get the domain's outstanding claims before making the claim to check the
+ * delta after the claim: If the domain had existing claims, these claims
+ * are already part of the global outstanding claims, so we need to account
+ * for them when we check the global delta after the claim to ensure it
+ * matches the expected delta.
+ */
+ if ( xc_domain_getinfo_single(ctx->env->xch, domid, &info) )
+ return lib_fail(ctx, "xc_domain_getinfo failed to get domain claims");
+ dom_claims_before = info.outstanding_pages;
+
+ /*
+ * Get the global outstanding claims before making the claim to check the
+ * delta after the claim.
+ */
+ lib_get_total_claims(ctx, &global_before);
+ rc = lib_claim_memory(
+ ctx, domid, 1, &(memory_claim_t){.pages = claim_pages, .target = node},
+ ctx->result->params);
+ if ( rc )
+ return rc;
+ lib_get_total_claims(ctx, &global_after);
+
+ if ( xc_domain_getinfo_single(ctx->env->xch, domid, &info) )
+ return lib_fail(ctx, "xc_domain_getinfo failed to get domain claims");
+
+ if ( info.outstanding_pages != claim_pages )
+ return lib_fail(ctx, "unexpected claims installation: exp=%lu, got=%lu",
+ claim_pages, info.outstanding_pages);
+
+ if ( global_after - global_before != claim_pages - dom_claims_before )
+ return lib_fail(ctx, "unexp. global delta @ node %u: exp=%lu, got=%lu",
+ node, claim_pages, global_before - global_after);
+ return 0;
+}
+
+/*
+ * Attempt to claim memory with the legacy xc_domain_claim_pages() API.
+ * Record the failure if the claim operation fails.
+ */
+int lib_claim_pages_legacy(struct test_ctx *ctx, uint32_t domid,
+ unsigned long nr_pages, const char *reason)
+{
+ lib_set_step(ctx, "%s", reason);
+ rc = xc_domain_claim_pages(ctx->env->xch, domid, nr_pages);
+ if ( rc )
+ return lib_fail(ctx, "xc_domain_claim_pages(%lu) failed", nr_pages);
+ return 0;
+}
+
+/*
+ * Attempt to claim memory with the legacy xc_domain_claim_pages() API.
+ * Expect it to fail with the specified errno.
+ * Record a failure on success or if it fails with an unexpected errno.
+ */
+int lib_claim_pages_legacy_failure(struct test_ctx *ctx, uint32_t domid,
+ unsigned long request, int expected_errno,
+ const char *reason)
+{
+ uint64_t outstanding_claims, free_pages;
+
+ /*
+ * Sanity check: Check that there are not enough free pages for installing
+ * the claim, as this is the only condition under which the claim install
+ * is expected to fail with ENOMEM. If this fails, the previous actions
+ * of the test did not properly set up the expected conditions for the
+ * claim to fail, so we record this as the cause of failure.
+ */
+ lib_get_total_claims(ctx, &outstanding_claims);
+ lib_get_global_free_pages(ctx, &free_pages);
+ if ( request < free_pages - outstanding_claims )
+ return lib_fail(ctx,
+ "too many unclaimed pages to expect ENOMEM: "
+ "request=%lu, free=%" PRIu64 " - claims=%" PRIu64
+ "=%" PRIu64,
+ request, free_pages, outstanding_claims,
+ free_pages - outstanding_claims);
+ lib_set_step(ctx, "%s", reason);
+ rc = xc_domain_claim_pages(ctx->env->xch, domid, request);
+ if ( rc == -1 && errno == expected_errno )
+ return 0;
+
+ return lib_fail_with_errno(ctx, errno,
+ "expected xc_domain_claim_pages() to fail "
+ "with errno=%d(%s), got rc=%d",
+ expected_errno, strerror(expected_errno), rc);
+}
+
+/* --- physmap population --- */
+
+/*
+ * Private helper function to populate extents at the specified GPFN
+ * with the xc_domain_populate_physmap() API, and return the result code.
+ *
+ * Tests may use its callers lib_populate_success() or lib_populate_failure()
+ * which record the failure the actual result did not match the expectation.
+ */
+static int lib_populate_physmap(struct test_ctx *ctx, lib_populate_args_t args)
+{
+ xen_pfn_t *frames;
+
+ frames = calloc(args.nr_extents, sizeof(*frames));
+ if ( !frames )
+ return lib_fail(ctx, "calloc(%lu) failed", args.nr_extents);
+
+ for ( unsigned long i = 0; i < args.nr_extents; i++ )
+ frames[i] = args.start + i;
+
+ errno = 0;
+ rc = xc_domain_populate_physmap_exact(ctx->env->xch, args.domid,
+ args.nr_extents, args.order,
+ args.flags, frames);
+ free(frames);
+ return rc;
+}
+
+/*
+ * Populate extents at the specified GPFN with checking if it
+ * succeeded. Record the failure with diagnostics if it did not.
+ */
+int lib_populate_success(struct test_ctx *ctx, lib_populate_args_t args)
+{
+ rc = lib_populate_physmap(ctx, args);
+ if ( rc )
+ return lib_fail(ctx, "expected populate to succeed for node %u",
+ XENMEMF_get_node(args.flags));
+ return 0;
+}
+
+/*
+ * Attempt to populate extents at the specified GPFN with checking if it
+ * failed. Record a failure with diagnostics if it did not fail as expected.
+ */
+int lib_populate_failure(struct test_ctx *ctx, lib_populate_args_t args)
+{
+ rc = lib_populate_physmap(ctx, args);
+ if ( rc == 0 )
+ return lib_fail_with_errno(
+ ctx, 0, "expected exact-node populate to fail for node %u",
+ XENMEMF_get_node(args.flags));
+ return 0;
+}
+
+/* --- test runner --- */
+
+static double timespec_diff_ms(const struct timespec *start,
+ const struct timespec *end)
+{
+ double sec = (double)(end->tv_sec - start->tv_sec);
+ double nsec = (double)(end->tv_nsec - start->tv_nsec);
+
+ return sec * 1000.0 + nsec / 1e6;
+}
+
+static void usage(FILE *stream, const char *prog)
+{
+ fprintf(stream,
+ "Usage: %s [OPTIONS]\n\n"
+ "Options:\n"
+ " -l, --list List available test IDs and exit\n"
+ " -t, --test ID Run only the specified test ID (repeatable)\n"
+ " -v, --verbose Print per-step progress\n"
+ " -h, --help Show this help text\n",
+ prog);
+}
+
+int lib_print_available_tests(const struct test_case *cases, size_t num_cases)
+{
+ puts("Available tests:");
+ for ( size_t i = 0; i < num_cases; i++ )
+ printf(" %s %s\n", cases[i].id, cases[i].name);
+ return 0;
+}
+
+/*
+ * Parse command-line arguments to configure the test run.
+ * It populates the runtime_config struct with the parsed configuration,
+ * including test IDs and the verbose flag.
+ *
+ * It supports filtering tests by test ID and enabling verbose output.
+ * If --list is specified, prints available tests and exits.
+ * By default, all tests will be run with concise output.
+ * If cfg.list_only is set, the caller should exit after this function returns.
+ *
+ * Returns 0 on success, or 1 on failure (invalid arguments)
+ */
+int lib_parse_args(int argc, char *argv[], struct runtime_config *cfg)
+{
+ int opt;
+
+ while ( (opt = getopt_long(argc, argv, "hlt:v", long_options, NULL)) != -1 )
+ {
+ switch ( opt )
+ {
+ case 'h':
+ usage(stdout, argv[0]);
+ case 'l':
+ cfg->list_only = true;
+ break;
+
+ case 't':
+ if ( cfg->nr_selected_ids >= ARRAY_SIZE(cfg->selected_ids) )
+ errx(1, "too many --test selectors (max %zu)",
+ ARRAY_SIZE(cfg->selected_ids));
+ cfg->selected_ids[cfg->nr_selected_ids++] = optarg;
+ break;
+
+ case 'v':
+ cfg->verbose = true;
+ break;
+
+ default:
+ usage(stderr, argv[0]);
+ return 1;
+ }
+ }
+
+ if ( cfg->list_only )
+ return 0;
+
+ printf("========= testcase program: %s ==========\n", argv[0]);
+ if ( cfg->nr_selected_ids )
+ {
+ printf("Selected %zu test(s):\n", cfg->nr_selected_ids);
+ for ( size_t i = 0; i < cfg->nr_selected_ids; i++ )
+ printf(" %s\n", cfg->selected_ids[i]);
+ }
+ return 0;
+}
+
+/*
+ * Run a single test case, capturing results and ensuring cleanup.
+ *
+ * Outstanding claims are tracked baseline_outstanding are are checked
+ * to be reset to the baseline at the end of the test, ensuring that all
+ * claims are released after each test case.
+ *
+ * Returns 0 on success, or -1 on failure with result details populated.
+ */
+int lib_run_one_test(struct test_env *env, const struct runtime_config *cfg,
+ const struct test_case *test, struct test_result *result)
+{
+ struct test_ctx ctx = {
+ .env = env,
+ .cfg = cfg,
+ .result = result,
+ .dom_1 = DOMID_INVALID,
+ .dom_2 = DOMID_INVALID,
+ .target1 = env->primary_node,
+ .target2 = INVALID_NODE,
+ .alloc_pages = 0,
+ .step = "",
+ };
+ uint64_t baseline_outstanding;
+ struct timespec start, end;
+
+ if ( env->num_nodes >= 2 )
+ ctx.target2 = env->secondary_node;
+
+ result->test = test;
+ result->status = TEST_PASSED;
+ result->params[0] = '\0';
+ result->details[0] = '\0';
+ result->duration_ms = 0.0;
+
+ /*
+ * Fixture: capture baseline, create the primary domain, run the test
+ * body, then always destroy any remaining test domains and verify
+ * outstanding pages returned to baseline.
+ */
+ rc = lib_get_total_claims(&ctx, &baseline_outstanding);
+ if ( rc )
+ goto out;
+
+ rc = lib_create_domain(&ctx, &ctx.dom_1, "primary");
+ if ( rc )
+ goto out;
+
+ rc = lib_create_domain(&ctx, &ctx.dom_2, "secondary");
+ if ( rc )
+ goto out;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ errno = 0;
+ rc = test->test(&ctx); /* Run the test body */
+
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ result->duration_ms = timespec_diff_ms(&start, &end);
+
+ if ( rc > 0 && result->status == TEST_SKIPPED )
+ rc = 0;
+
+out:
+ /* Cleanup test domains without affecting the return code if rc != 0 */
+ if ( lib_destroy_domain(&ctx, &ctx.dom_2, "helper") && !rc )
+ rc = -1;
+
+ if ( lib_destroy_domain(&ctx, &ctx.dom_1, "primary") && !rc )
+ rc = -1;
+
+ if ( !rc &&
+ lib_check_claim(&ctx, baseline_outstanding, 0,
+ "check cleanup restored baseline claimed pages") )
+ rc = -1;
+
+ if ( rc < 0 )
+ result->status = TEST_FAILED;
+
+ return rc;
+}
+
+/*
+ * Run all test cases, filtering based on the runtime configuration, and print
+ * results to stdout. Each test case is run with lib_run_one_test() which
+ * captures detailed diagnostics on failure.
+ */
+void lib_run_tests(struct test_env *env, char *argv0,
+ const struct runtime_config *cfg,
+ const struct test_case *test_cases,
+ unsigned int num_test_cases, struct test_result *results)
+{
+ for ( size_t i = 0; i < num_test_cases; i++ )
+ {
+ struct test_result *result = &results[i];
+
+ if ( !test_is_selected(cfg, &test_cases[i]) )
+ continue;
+
+ lib_run_one_test(env, cfg, &test_cases[i], result);
+
+ /* Print a summary: test, result, including parameters and duration. */
+ printf("%s::%s [%s] %s (%.2f ms)\n", argv0, result->test->id,
+ result->params[0] ? result->params : "default",
+ status_name(result->status), result->duration_ms);
+
+ if ( result->status == TEST_FAILED || result->status == TEST_SKIPPED )
+ printf(" %s\n", result->details);
+ }
+}
+
+/*
+ * Print a concise summary of test results, including counts of passed, failed,
+ * and skipped tests, and details for any failures or skips.
+ */
+int lib_summary(const struct test_result *results, unsigned int num_results)
+{
+ unsigned int passed = 0, failed = 0, skipped = 0;
+
+ puts("================== short test summary info =================");
+ for ( size_t i = 0; i < num_results; i++ )
+ {
+ if ( !results[i].test )
+ continue;
+
+ printf("%s %s %s\n", status_name(results[i].status),
+ results[i].test->id, results[i].test->name);
+
+ switch ( results[i].status )
+ {
+ case TEST_PASSED:
+ passed++;
+ break;
+ case TEST_FAILED:
+ failed++;
+ printf(" %s\n", results[i].details);
+ break;
+ case TEST_SKIPPED:
+ skipped++;
+ printf(" %s\n", results[i].details);
+ break;
+ }
+ }
+ printf("============ %u passed, %u failed, %u skipped ============\n",
+ passed, failed, skipped);
+ return failed;
+}
+
+/* Update the create_template structure based on the host's capabilities */
+static void fixup_create_template(struct xen_domctl_createdomain *create,
+ const xc_physinfo_t *physinfo)
+{
+#if defined(__x86_64__) || defined(__i386__)
+ if ( !(physinfo->capabilities & XEN_SYSCTL_PHYSCAP_hap) )
+ create->flags &= ~XEN_DOMCTL_CDF_hap;
+
+ if ( !(physinfo->capabilities &
+ (XEN_SYSCTL_PHYSCAP_hap | XEN_SYSCTL_PHYSCAP_shadow)) ||
+ !(physinfo->capabilities & XEN_SYSCTL_PHYSCAP_hvm) )
+ {
+ create->flags &= ~XEN_DOMCTL_CDF_hvm;
+ create->arch.emulation_flags = 0;
+ }
+#else
+ (void)physinfo;
+#endif
+}
+
+/*
+ * Initialise the test environment by opening the Xen control interface,
+ * querying the number of NUMA nodes, and populating memory information.
+ * Returns 0 on success, or -1 on failure with errno set.
+ */
+int lib_initialise_test_env(struct test_env *env)
+{
+ xc_physinfo_t physinfo;
+
+ env->xch = xc_interface_open(NULL, NULL, 0);
+ if ( !env->xch )
+ err(1, "xc_interface_open");
+
+ /*
+ * Get the number of nodes to allocate xc_meminfo_t structures for.
+ * If NUMA is disabled, this will return one node, so we can still
+ * run tests that don't require > 1 NUMA node on non-NUMA hosts.
+ */
+ xc_numainfo(env->xch, &env->num_nodes, NULL, NULL);
+
+ /* Allocate memory for xc_meminfo_t structures */
+ env->meminfo = calloc(env->num_nodes, sizeof(*env->meminfo));
+ if ( !env->meminfo )
+ err(1, "calloc");
+
+ /* Populate meminfo structures with current data */
+ xc_numainfo(env->xch, &env->num_nodes, env->meminfo, NULL);
+ xc_physinfo(env->xch, &physinfo);
+
+ /* Initialise the create_template structure */
+ env->create_template = (struct xen_domctl_createdomain){
+ .flags = XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap,
+ .max_vcpus = 1,
+ .max_grant_frames = 1,
+ .grant_opts = XEN_DOMCTL_GRANT_version(1),
+#if defined(__x86_64__) || defined(__i386__)
+ .arch = { .emulation_flags = XEN_X86_EMU_LAPIC, },
+#endif
+ };
+ /* Update the create_template structure based on the host's capabilities */
+ fixup_create_template(&env->create_template, &physinfo);
+
+ env->primary_node = 0;
+ env->secondary_node = 0;
+ env->have_secondary_node = false;
+
+ /*
+ * Pick the node with the most free memory as the primary node, and if
+ * there's a second node, pick the one with the next most free memory as
+ * the secondary.
+ */
+ for ( unsigned int i = 1; i < env->num_nodes; i++ )
+ {
+ if ( env->meminfo[i].memfree > env->meminfo[env->primary_node].memfree )
+ {
+ env->secondary_node = env->primary_node;
+ env->primary_node = i;
+ env->have_secondary_node = true;
+ }
+ else if ( !env->have_secondary_node ||
+ env->meminfo[i].memfree >
+ env->meminfo[env->secondary_node].memfree )
+ {
+ env->secondary_node = i;
+ env->have_secondary_node = true;
+ }
+ }
+
+ if ( env->num_nodes < 2 )
+ env->have_secondary_node = false;
+ else if ( env->secondary_node == env->primary_node )
+ {
+ for ( unsigned int i = 0; i < env->num_nodes; i++ )
+ {
+ if ( i != env->primary_node )
+ {
+ env->secondary_node = i;
+ env->have_secondary_node = true;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* Free allocated memory and close the Xen control interface */
+void lib_release_test_env(struct test_env *env)
+{
+ free(env->meminfo);
+ env->meminfo = NULL;
+
+ if ( env->xch )
+ {
+ xc_interface_close(env->xch);
+ env->xch = NULL;
+ }
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/tests/mem-claim/libtestclaims.h b/tools/tests/mem-claim/libtestclaims.h
new file mode 100644
index 000000000000..66aa6bc2da10
--- /dev/null
+++ b/tools/tests/mem-claim/libtestclaims.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Header file for the functional system test framework
+ * testing for memory claims in the Xen hypervisor.
+ *
+ * This header declares the interface for the test framework implemented
+ * in libtestclaims.c.
+ *
+ * It includes the definitions of the test environment, test context, and
+ * helper functions for performing memory claim operations, querying memory
+ * state, managing test domains, and recording test results.
+ */
+#ifndef _LIBTEST_MEM_CLAIMS_
+#define _LIBTEST_MEM_CLAIMS_
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <xenctrl.h>
+
+#define MAX_SELECTED_TESTS 32
+#define INVALID_NODE UINT_MAX
+#define SPARE_PAGES 200
+
+struct test_env {
+ xc_interface *xch;
+ struct xen_domctl_createdomain create_template;
+ unsigned int num_nodes;
+ unsigned int primary_node;
+ unsigned int secondary_node;
+ bool have_secondary_node;
+ xc_meminfo_t *meminfo;
+};
+
+struct runtime_config {
+ const char *selected_ids[MAX_SELECTED_TESTS];
+ size_t nr_selected_ids;
+ bool list_only;
+ bool verbose;
+};
+
+enum test_status {
+ TEST_PASSED,
+ TEST_FAILED,
+ TEST_SKIPPED,
+};
+
+struct test_case;
+
+struct test_result {
+ const struct test_case *test;
+ enum test_status status;
+ char params[256];
+ char details[4096];
+ double duration_ms;
+};
+
+struct test_ctx {
+ struct test_env *env;
+ const struct runtime_config *cfg;
+ struct test_result *result;
+ uint32_t dom_1;
+ uint32_t dom_2;
+ uint64_t target1;
+ uint64_t target2;
+ uint64_t alloc_pages;
+ char step[160];
+};
+
+struct lib_populate_physmap_args {
+ uint32_t domid;
+ xen_pfn_t start;
+ unsigned long nr_extents;
+ unsigned int order;
+ unsigned int flags;
+};
+typedef struct lib_populate_physmap_args lib_populate_args_t;
+
+/*
+ * test_fn_t: the test body. Called after the fixture has created
+ * ctx->domid and captured a baseline outstanding-pages count. Tests needing
+ * extra domains should create and destroy them explicitly.
+ * Returns 0 on pass, -1 on fail, 1 on skip.
+ */
+typedef int (*test_fn_t)(struct test_ctx *ctx);
+
+struct test_case {
+ const char *id;
+ const char *name;
+ test_fn_t test;
+};
+
+/* --- diagnostics helpers --- */
+void lib_appendf(char *buf, size_t size, const char *fmt, ...)
+__attribute__((format(printf, 3, 4)));
+
+/* Append a formatted string to ctx->result->details. */
+#define ctx_appendf(ctx, ...) \
+ lib_appendf((ctx)->result->details, sizeof((ctx)->result->details), \
+ __VA_ARGS__)
+void lib_debugf(struct test_ctx *ctx, const char *fmt, ...)
+__attribute__((format(printf, 2, 3)));
+void lib_set_step(struct test_ctx *ctx, const char *fmt, ...)
+__attribute__((format(printf, 2, 3)));
+int lib_fail_with_errno(struct test_ctx *ctx, int errnum, const char *fmt, ...)
+__attribute__((format(printf, 3, 4)));
+int lib_fail(struct test_ctx *ctx, const char *fmt, ...)
+__attribute__((format(printf, 2, 3)));
+int lib_skip_test(struct test_ctx *ctx, const char *fmt, ...)
+__attribute__((format(printf, 2, 3)));
+
+/* --- memory-state queries --- */
+int lib_get_node_free_pages(struct test_ctx *ctx, unsigned int node,
+ unsigned long *free_pages,
+ unsigned long *total_pages);
+int lib_get_global_free_pages(struct test_ctx *ctx, unsigned long *free_pages);
+int lib_get_total_claims(struct test_ctx *ctx,
+ uint64_t *outstanding_pages_global);
+int lib_check_claim(struct test_ctx *ctx, uint64_t baseline_outstanding,
+ uint64_t expected_delta, const char *reason);
+
+/* --- domain lifecycle --- */
+int lib_create_domain(struct test_ctx *ctx, uint32_t *domid, const char *label);
+int lib_destroy_domain(struct test_ctx *ctx, uint32_t *domid,
+ const char *label);
+
+/* --- claim operations --- */
+int lib_claim_memory(struct test_ctx *ctx, uint32_t domid, uint32_t nr_claims,
+ memory_claim_t *claims, const char *reason);
+int lib_expect_claim_memory_failure(struct test_ctx *ctx, uint32_t domid,
+ uint32_t nr_claims, memory_claim_t *claims,
+ int expected_errno, const char *reason);
+int lib_release_all_claims(struct test_ctx *ctx, uint32_t domid);
+int lib_claim_pages_legacy(struct test_ctx *ctx, uint32_t domid,
+ unsigned long nr_pages, const char *reason);
+int lib_claim_pages_legacy_failure(struct test_ctx *ctx, uint32_t domid,
+ unsigned long nr_pages, int expected_errno,
+ const char *reason);
+int lib_claim_all_on_host(struct test_ctx *ctx, uint32_t domid,
+ unsigned int spare);
+int lib_claim_all_on_node(struct test_ctx *ctx, uint32_t domid, uint32_t node,
+ uint32_t spare);
+
+/* --- physmap --- */
+int lib_populate_success(struct test_ctx *ctx, lib_populate_args_t args);
+int lib_populate_failure(struct test_ctx *ctx, lib_populate_args_t args);
+
+/* --- test runner --- */
+int lib_print_available_tests(const struct test_case *cases, size_t num_cases);
+int lib_parse_args(int argc, char *argv[], struct runtime_config *cfg);
+int lib_run_one_test(struct test_env *env, const struct runtime_config *cfg,
+ const struct test_case *test, struct test_result *result);
+void lib_run_tests(struct test_env *env, char *argv0,
+ const struct runtime_config *cfg,
+ const struct test_case *test_cases,
+ unsigned int num_test_cases, struct test_result *results);
+int lib_summary(const struct test_result *results, unsigned int num_results);
+int lib_initialise_test_env(struct test_env *env);
+void lib_release_test_env(struct test_env *env);
+unsigned long lib_default_alloc_pages(unsigned long free_pages);
+
+extern int rc;
+
+static inline const char *status_name(enum test_status status)
+{
+ switch ( status )
+ {
+ case TEST_PASSED:
+ return "PASSED";
+ case TEST_FAILED:
+ return "FAILED";
+ case TEST_SKIPPED:
+ return "SKIPPED";
+ }
+ return "UNKNOWN";
+}
+
+static inline bool test_is_selected(const struct runtime_config *cfg,
+ const struct test_case *test)
+{
+ if ( !cfg->nr_selected_ids )
+ return true;
+
+ for ( size_t i = 0; i < cfg->nr_selected_ids; i++ )
+ if ( !strcmp(cfg->selected_ids[i], test->id) )
+ return true;
+ return false;
+}
+
+#endif /* _LIBTEST_MEM_CLAIMS_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/tests/mem-claim/test-claim-memory.c b/tools/tests/mem-claim/test-claim-memory.c
new file mode 100644
index 000000000000..4ea94515ed22
--- /dev/null
+++ b/tools/tests/mem-claim/test-claim-memory.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Functional system test suite for testing memory claims in Xen.
+ *
+ * It is designed to test the xc_domain_claim_memory() API and
+ * to reconfirm the xc_domain_claim_pages() API and interacts
+ * with the running Xen hypervisor in Dom0 using libxenctrl.
+ *
+ * The verifications performed by the test cases include:
+ *
+ * - Validating that claims can be successfully made with valid parameters
+ * and that they have the expected effects on the system's memory state,
+ * such as increasing the number of outstanding claimed pages.
+ *
+ * - Validating that invalid claim attempts are rejected with the expected
+ * error codes, such as EINVAL for invalid parameters or ENOMEM when
+ * claiming more pages than are free.
+ *
+ * - Validating the effects of memory claims on the system, such as blocking
+ * effects when claiming more pages than are free or left unclaimed by
+ * other domains, and the guarantees provided by claims such as reserved
+ * claimed pages not being allocated to other domains.
+ *
+ * For the need to perform these verifications, the test cases interact
+ * with the Xen hypervisor to query the system's memory state, create and
+ * destroy test domains, perform claim operations, and populate memory to
+ * test the blocking effects of claims.
+ *
+ * As the act of testing the blocking effects of claims involves allocating
+ * memory from the system, other operations that interact with the system's
+ * memory state should be avoided or kept to a minimum during the test run
+ * to avoid interference with the test results.
+ *
+ * During these interactions, the test cases record successes and failures
+ * with detailed messages that include the current step, test parameters,
+ * and a snapshot of relevant memory state to aid in diagnosing issues
+ * when a test fails.
+ *
+ * The test suite also ensures that domains are destroyed after tests to
+ * clean up claims and leave the system in a clean state, even if a test
+ * fails partway through.
+ *
+ * Some test cases that require multiple NUMA nodes can be skipped if the
+ * system does not have a 2nd NUMA node, allowing the test suite to be run
+ * on single-node systems as well.
+ *
+ * It is designed to run on a quiet system as it stakes claims on the system's
+ * memory and verifies their effects, by allocating against the running system
+ * Xen hypervisor in Dom0 using libxenctrl.
+ */
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <xen-tools/common-macros.h>
+
+#include "libtestclaims.h"
+#include "accounting-1.h"
+#include "input-phase1.h"
+#include "input-phase2.h"
+
+/* Short helper to declare test cases more concisely. */
+#define CASE(ID, NAME, FN) \
+ { \
+ .id = (ID), .name = (NAME), .test = (FN) \
+ }
+
+/*
+ * List of test cases. lib_run_tests() iterates over this list to run tests.
+ *
+ * Tests are identified by their id (e.g. "A1-1") and have a descriptive name
+ * and a function pointer to the test implementation.
+ */
+static const struct test_case cases[] = {
+ CASE("A1-1", "basic_node_claim", test_basic_node_claim),
+ CASE("A1-2", "global_replace_after_alloc", test_global_replace_after_alloc),
+ CASE("A1-3", "node_replace_after_alloc", test_node_replace_after_alloc),
+ CASE("A1-4", "legacy_global_claim", test_legacy_global_claim),
+ CASE("A1-5", "move_claim_between_nodes", test_move_claim_between_nodes),
+ CASE("A1-6", "zero_claim_resets_claim", test_zero_claim_resets_claim),
+ CASE("A1-7", "zero_claim_memory_reset", test_zero_claim_memory_resets),
+ CASE("I1-1", "reject_non_present_node", test_reject_non_present_node),
+ CASE("I1-2", "reject_too_many_claims", test_reject_too_many_claims),
+ CASE("I1-3", "reject_node_gt_uint8_max", test_reject_node_gt_uint8_max),
+ CASE("I1-4", "reject_pages_gt_int32_max", test_reject_pages_gt_int32_max),
+ CASE("I1-5", "reject_nonzero_pad", test_reject_nonzero_pad),
+ CASE("I1-6", "reject_zero_claim_count", test_reject_zero_claim_count),
+ CASE("I1-7", "null_claims_nonzero_count", test_null_claims_nonzero_count),
+ CASE("I1-8", "zero_count_with_pointer", test_zero_count_valid_pointer),
+ CASE("I1-9", "claim_pages_gt_free_enomem", test_claim_pages_gt_free_enomem),
+ CASE("I2-1", "claim_pages_causes_enomem", test_claim_pages_causes_enomem),
+ CASE("I2-2", "claim_memory_causes_enomem", test_claim_memory_causes_enomem),
+ CASE("I2-3", "claim_prima_causes_enomem", test_claim_prima_causes_enomem),
+};
+
+/* Test entry point */
+int main(int argc, char **argv)
+{
+ struct test_result results[ARRAY_SIZE(cases)] = {};
+ struct runtime_config cfg = {};
+ struct test_env env = {};
+ int retval;
+
+ retval = lib_parse_args(argc, argv, &cfg);
+ if ( cfg.list_only )
+ return lib_print_available_tests(cases, ARRAY_SIZE(cases));
+ if ( !retval )
+ {
+ lib_initialise_test_env(&env);
+ lib_run_tests(&env, argv[0], &cfg, cases, ARRAY_SIZE(cases), results);
+ retval = lib_summary(results, ARRAY_SIZE(results));
+ lib_release_test_env(&env);
+ }
+ return retval ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread