Xen project Mailing List

[PATCH v3 2/2] xen/x86: Change stub page allocation/free

From: Jason Andryuk <jason.andryuk@xxxxxxx>

Date: Mon, 8 Jun 2026 20:06:38 -0400

Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass (sender ip is 165.204.84.17) smtp.rcpttodomain=lists.xenproject.org smtp.mailfrom=amd.com; dmarc=pass (p=quarantine sp=quarantine pct=100) action=none header.from=amd.com; dkim=none (message not signed); arc=none (0)

Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=yTwb8ekE7mBmp1WJz2KP3LjX28ggFVhtoF/cTlRbrXY=; b=nPG2dmPiAf3w8VJsZ/9wkW2FuG2yq0Z0HBlt9j0Az0uA5fErROFRvZjpq1IaSC6xwUWJOrEjHORr+Dq/AwBhtfEfFUBPTKF+A/s7/yHdTVw6ZYexqAwL4trGadqwJRvi3pF9M+C+776B/qcq3EHT0LHrwi0W+eCD1j9C3TsbIlQikNIryLmycriFCdG5OGPdew0+XMc0YFUIT/36TeaLhARcpiK83fIQnMr1JWwvjN3s3PxNh7X0IdTl6q2peCYQ3xHXLAuOEjt7av3YllIejz2EmAkKI0BMHvPf1l5xQ/IT5bZurF3Iwyv+oevMavWa3TthBXIWgpLHDZIe4SfDrg==

Arc-seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=nn4xTzilSUUyPxo9EYBK1kyL8PH96khgDf5KkXISvlRmAzSMy+wG8y8n/6kuFxsag1+LU56B/ePs15rl2um4eZAqVlQA1rYA2pJKMPqwRGctIlm1UEMf2ikWI4LRUm9Um8uZ1W8ahOGtHze7ur6YVeFbMYzL82/VxmrF5UI2P0aiHKa1jJlVnlifAtpj+PYcg9lomZfC8N9n1tUlumnum6PmN/20aUZ0SLHO4eU4eZq8jE/vOqWsHsQEEX44ALpmVFWZ5DWeOaSE/6nMo3mpFFV2Geu4oMEKrWL++Z4GHZGPUkXUwqHLHC61P3dDrKw1w5vKAKwvWsGS24dndXUgVQ==

Authentication-results: eu.smtp.expurgate.cloud; dkim=pass header.s=selector1 header.d=amd.com header.i="@amd.com" header.h="From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck"

Cc: Jason Andryuk <jason.andryuk@xxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Teddy Astie <teddy.astie@xxxxxxxxxx>

Delivery-date: Tue, 09 Jun 2026 00:09:27 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Today the inline tracking of the stub page is problematic. 0xcc is used to indicate unused, but it is also a "clear value." A !CONFIG_PV build with smt=0 will bring up CPU0, bring up CPU1, bring down CPU1, and free the in-use stub page. Subsequent CPU onlining can write to the re-used page. The new approach uses a global, CPU-indexed array of stub pages. However, to handle NUMA aware allocations, we cannot allocate all the pages in advance because the NUMA information is not available. Keep track of 1 current page for each NUMA node, allocated on demand, and allocate the stub buffers out of those pages. The current NUMA allocation approach is opportunistic sharing among the groups of 32 processors. The new approach will allocate buffers densely in a NUMA node. stub pages are no longer freed. They remain referenced in the global CPU-indexed array and are re-used if the CPU is re-onlined. stubs and node_stubs don't have an explicit lock. During boot they are accessed single threaded. During runtime, &cpu_add_remove_lock serializes access. Fixes: 7a66ac8d1633 ("x86: move syscall trampolines off the stack") Signed-off-by: Jason Andryuk <jason.andryuk@xxxxxxx> --- I'm not sure how to test the NUMA part - I don't have an NUMA system. Also, if NUMA is active, is a cpu node of NUMA_NO_NODE still possible? I used the MAX_NUMNODES + 1 array sizing to handle that, but it's not obvious to me if that is necessary. Roger mentioned removing the per-cpu stubs.mfn. We'd need to replace that with exposing the stubs array for traps and the emulator. I have no idea if that will be an improvement and am looking for agreement on this patch before attempting. --- xen/arch/x86/include/asm/stubs.h | 2 +- xen/arch/x86/setup.c | 3 +- xen/arch/x86/smpboot.c | 110 +++++++++++++++++++++---------- 3 files changed, 77 insertions(+), 38 deletions(-) diff --git a/xen/arch/x86/include/asm/stubs.h b/xen/arch/x86/include/asm/stubs.h index a520928e9a..9d776f81dd 100644 --- a/xen/arch/x86/include/asm/stubs.h +++ b/xen/arch/x86/include/asm/stubs.h @@ -32,6 +32,6 @@ struct stubs { }; DECLARE_PER_CPU(struct stubs, stubs); -unsigned long alloc_stub_page(unsigned int cpu, unsigned long *mfn); +unsigned long assign_stub_page(unsigned int cpu); #endif /* X86_ASM_STUBS_H */ diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 19ee857abf..0cac94cbdb 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -2089,8 +2089,7 @@ void asmlinkage __init noreturn __start_xen(void) init_idle_domain(); - this_cpu(stubs.addr) = alloc_stub_page(smp_processor_id(), - &this_cpu(stubs).mfn); + this_cpu(stubs.addr) = assign_stub_page(0); BUG_ON(!this_cpu(stubs.addr)); bsp_traps_reinit(); /* Needs stubs allocated, must be before presmp_initcalls. */ diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index d7619f534b..d9cd90389d 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -641,41 +641,96 @@ static int do_boot_cpu(int apicid, int cpu) return rc; } -#define STUB_BUF_CPU_OFFS(cpu) (((cpu) & (STUBS_PER_PAGE - 1)) * STUB_BUF_SIZE) +/* + * Indexed by CPU. `pg` may be shared by up to STUBS_PER_PAGE CPUs. Offset + * is the byte offset into the stub page for the CPU's stub buffer. + */ +struct stub_info { + struct page_info *pg; + unsigned int offset; +}; +struct stub_info __read_mostly stubs[NR_CPUS]; -unsigned long alloc_stub_page(unsigned int cpu, unsigned long *mfn) +/* + * Index by NUMA node. + * + * `pg` is the current stub page for the node. + * `next` is the next available stub index (STUBS_PER_PAGE available). + * + * if `pg` is NULL, allocate a new one. + * if `pg` is !NULL, use `pg` and stub `next` + * When STUBS_PER_PAGE are all assigned, clear `pg` and `next`. + */ +struct stub_node { + struct page_info *pg; + unsigned int next; +}; +struct stub_node stub_nodes[MAX_NUMNODES + 1]; + +nodeid_t cpu_to_stub_node(unsigned int cpu) { - unsigned long stub_va; + nodeid_t node = cpu_to_node(cpu); + + return node == NUMA_NO_NODE ? MAX_NUMNODES : node; +} + +static struct page_info *alloc_stub_page(unsigned int cpu) +{ + nodeid_t node = cpu_to_stub_node(cpu); + unsigned int stub_idx; struct page_info *pg; BUILD_BUG_ON(STUBS_PER_PAGE & (STUBS_PER_PAGE - 1)); - if ( *mfn ) - pg = mfn_to_page(_mfn(*mfn)); - else + if ( !stub_nodes[node].pg ) { - nodeid_t node = cpu_to_node(cpu); unsigned int memflags = node != NUMA_NO_NODE ? MEMF_node(node) : 0; - pg = alloc_domheap_page(NULL, memflags); - if ( !pg ) - return 0; + stub_nodes[node].pg = alloc_domheap_page(NULL, memflags); + stub_nodes[node].next = 0; + + if ( !stub_nodes[node].pg ) + return NULL; - unmap_domain_page(memset(__map_domain_page(pg), 0xcc, PAGE_SIZE)); + unmap_domain_page(memset(__map_domain_page(stub_nodes[node].pg), + 0xcc, PAGE_SIZE)); } + stub_idx = stub_nodes[node].next++; + pg = stub_nodes[node].pg; + stubs[cpu].pg = stub_nodes[node].pg; + stubs[cpu].offset = stub_idx * STUB_BUF_SIZE; + if ( stub_nodes[node].next == STUBS_PER_PAGE ) + { + stub_nodes[node].pg = NULL; + stub_nodes[node].next = 0; + } + + return pg; +} + +unsigned long assign_stub_page(unsigned int cpu) +{ + unsigned long stub_va; + struct page_info *pg = stubs[cpu].pg; + + if ( !pg ) + pg = alloc_stub_page(cpu); + + if ( !pg ) + return 0; + stub_va = XEN_VIRT_END - FIXADDR_X_SIZE - (cpu + 1) * PAGE_SIZE; if ( map_pages_to_xen(stub_va, page_to_mfn(pg), 1, PAGE_HYPERVISOR_RX | MAP_SMALL_PAGES) ) - { - if ( !*mfn ) - free_domheap_page(pg); stub_va = 0; + else + { + per_cpu(stubs.mfn, cpu) = mfn_x(page_to_mfn(pg)); + stub_va += stubs[cpu].offset; } - else if ( !*mfn ) - *mfn = mfn_x(page_to_mfn(pg)); - return stub_va ? stub_va + STUB_BUF_CPU_OFFS(cpu) : 0; + return stub_va; } void cpu_exit_clear(unsigned int cpu) @@ -990,19 +1045,12 @@ static void cpu_smpboot_free(unsigned int cpu, bool remove) { mfn_t mfn = _mfn(per_cpu(stubs.mfn, cpu)); unsigned char *stub_page = map_domain_page(mfn); - unsigned int i; - memset(stub_page + STUB_BUF_CPU_OFFS(cpu), 0xcc, STUB_BUF_SIZE); - for ( i = 0; i < STUBS_PER_PAGE; ++i ) - if ( stub_page[i * STUB_BUF_SIZE] != 0xcc ) - break; + memset(stub_page + stubs[cpu].offset, 0xcc, STUB_BUF_SIZE); unmap_domain_page(stub_page); destroy_xen_mappings(per_cpu(stubs.addr, cpu) & PAGE_MASK, (per_cpu(stubs.addr, cpu) | ~PAGE_MASK) + 1); per_cpu(stubs.addr, cpu) = 0; - per_cpu(stubs.mfn, cpu) = 0; - if ( i == STUBS_PER_PAGE ) - free_domheap_page(mfn_to_page(mfn)); } if ( IS_ENABLED(CONFIG_PV32) ) @@ -1041,7 +1089,7 @@ void *cpu_alloc_stack(unsigned int cpu) static int cpu_smpboot_alloc(unsigned int cpu) { struct cpu_info *info; - unsigned int i, memflags = 0; + unsigned int memflags = 0; nodeid_t node = cpu_to_node(cpu); seg_desc_t *gdt; unsigned long stub_va; @@ -1091,15 +1139,7 @@ static int cpu_smpboot_alloc(unsigned int cpu) memcpy(per_cpu(idt, cpu), bsp_idt, sizeof(bsp_idt)); disable_each_ist(per_cpu(idt, cpu)); - for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); - i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) - if ( cpu_online(i) && cpu_to_node(i) == node ) - { - per_cpu(stubs.mfn, cpu) = per_cpu(stubs.mfn, i); - break; - } - BUG_ON(i == cpu); - stub_va = alloc_stub_page(cpu, &per_cpu(stubs.mfn, cpu)); + stub_va = assign_stub_page(cpu); if ( !stub_va ) goto out; per_cpu(stubs.addr, cpu) = stub_va; -- 2.54.0

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.