|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH] xen/arm: skip holes in physical address space when setting up frametable
On 17/04/2026 13:55, Luca Fancellu wrote:
> HI Michal,
>
>> On 17 Apr 2026, at 10:11, Michal Orzel <michal.orzel@xxxxxxx> wrote:
>>
>> Refactor setup_frametable_mappings() into init_frametable(), modeled
>> after x86's implementation. Instead of mapping one contiguous frametable
>> covering ram_start to ram_end (including holes), iterate the
>> pdx_group_valid bitmap to allocate and map frametable memory only for
>> valid PDX groups, skipping gaps in the physical address space. At the
>> moment we don't really take into account pdx_group_valid bitmap.
>>
>> This reduces memory consumption on systems with sparse RAM layouts by
>> not allocating frametable entries for non-existent memory regions.
>>
>> A file-local pdx_to_page() override is needed because the generic macro
>> in xen/include/xen/pdx.h does not account for ARM's non-zero
>> frametable_base_pdx.
>>
>> Update the MPU implementation to match the new init_frametable()
>> signature. Since MPU has no virtual address translation (ma == va),
>> hole-skipping is not possible and the frametable remains a single
>> contiguous allocation.
>>
>> Signed-off-by: Michal Orzel <michal.orzel@xxxxxxx>
>> ---
>> We've been using this approach at AMD for a while now. Without this we would
>> not
>> be able to boot some of our boards that have huge holes in the PA space, so I
>> consider this patch a great improvement.
>>
>> Two things to consider as a follow-up in the future:
>> - change generic pdx_to_page, page_to_pdx to take into account offset that
>> on x86 is zero but on other arches it is not. The page list code is
>> for now unaffected because the offset cancels out,
>> - use the same on RISCV.
>> ---
>> xen/arch/arm/arm32/mmu/mm.c | 3 +-
>> xen/arch/arm/include/asm/mm.h | 4 +-
>> xen/arch/arm/mm.c | 2 +-
>> xen/arch/arm/mmu/mm.c | 77 ++++++++++++++++++++++++-----------
>> xen/arch/arm/mpu/mm.c | 23 ++++++-----
>> 5 files changed, 70 insertions(+), 39 deletions(-)
>>
>> diff --git a/xen/arch/arm/arm32/mmu/mm.c b/xen/arch/arm/arm32/mmu/mm.c
>> index 5e4766ddcf65..0b595baa11b3 100644
>> --- a/xen/arch/arm/arm32/mmu/mm.c
>> +++ b/xen/arch/arm/arm32/mmu/mm.c
>> @@ -178,8 +178,7 @@ void __init setup_mm(void)
>>
>> setup_directmap_mappings(mfn_x(directmap_mfn_start), xenheap_pages);
>>
>> - /* Frame table covers all of RAM region, including holes */
>> - setup_frametable_mappings(ram_start, ram_end);
>> + init_frametable(ram_start);
>>
>> /*
>> * The allocators may need to use map_domain_page() (such as for
>> diff --git a/xen/arch/arm/include/asm/mm.h b/xen/arch/arm/include/asm/mm.h
>> index 72a692862420..2eb8465aa904 100644
>> --- a/xen/arch/arm/include/asm/mm.h
>> +++ b/xen/arch/arm/include/asm/mm.h
>> @@ -196,8 +196,8 @@ extern void *early_fdt_map(paddr_t fdt_paddr);
>> extern void remove_early_mappings(void);
>> /* Prepare the memory subystem to bring-up the given secondary CPU */
>> extern int prepare_secondary_mm(int cpu);
>> -/* Map a frame table to cover physical addresses ps through pe */
>> -extern void setup_frametable_mappings(paddr_t ps, paddr_t pe);
>> +/* Map a frame table */
>
> NIT: Would /* Initialize the frame table */ fit better the new helper
> description?
>
>> +void init_frametable(paddr_t ram_start);
>> /* Helper function to setup memory management */
>> void setup_mm_helper(void);
>> /* map a physical range in virtual memory */
>> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
>> index 6df8b616e464..e6b651956927 100644
>> --- a/xen/arch/arm/mm.c
>> +++ b/xen/arch/arm/mm.c
>> @@ -62,7 +62,7 @@ void __init setup_mm(void)
>>
>> setup_mm_helper();
>>
>> - setup_frametable_mappings(ram_start, ram_end);
>> + init_frametable(ram_start);
>>
>> init_staticmem_pages();
>> init_sharedmem_pages();
>> diff --git a/xen/arch/arm/mmu/mm.c b/xen/arch/arm/mmu/mm.c
>> index 6604f3bf4e6a..4b4da349c16c 100644
>> --- a/xen/arch/arm/mmu/mm.c
>> +++ b/xen/arch/arm/mmu/mm.c
>> @@ -8,16 +8,37 @@
>> #include <xen/pdx.h>
>> #include <xen/string.h>
>>
>> -/* Map a frame table to cover physical addresses ps through pe */
>> -void __init setup_frametable_mappings(paddr_t ps, paddr_t pe)
>> +#undef pdx_to_page
>> +#define pdx_to_page(pdx) gcc11_wrap(frame_table + ((pdx) -
>> frametable_base_pdx))
>> +
>> +static void __init
>> +init_frametable_chunk(unsigned long pdx_s, unsigned long pdx_e)
>> {
>> - unsigned long nr_pdxs = mfn_to_pdx(mfn_add(maddr_to_mfn(pe), -1)) -
>> - mfn_to_pdx(maddr_to_mfn(ps)) + 1;
>> - unsigned long frametable_size = nr_pdxs * sizeof(struct page_info);
>> - mfn_t base_mfn;
>> - const unsigned long mapping_size = frametable_size < MB(32) ? MB(2)
>> - : MB(32);
>> + unsigned long nr_pdxs = pdx_e - pdx_s;
>> + unsigned long chunk_size = nr_pdxs * sizeof(struct page_info);
>> + const unsigned long mapping_size = chunk_size < MB(32) ? MB(2) : MB(32);
>> + unsigned long virt;
>> int rc;
>> + mfn_t base_mfn;
>> +
>> + /* Round up to 2M or 32M boundary, as appropriate. */
>> + chunk_size = ROUNDUP(chunk_size, mapping_size);
>> + base_mfn = alloc_boot_pages(chunk_size >> PAGE_SHIFT, 32 << (20 - 12));
>> +
>> + virt = (unsigned long)pdx_to_page(pdx_s);
>> + rc = map_pages_to_xen(virt, base_mfn, chunk_size >> PAGE_SHIFT,
>> + PAGE_HYPERVISOR_RW | _PAGE_BLOCK);
>> + if ( rc )
>> + panic("Unable to setup the frametable mappings\n");
>> +
>> + memset(pdx_to_page(pdx_s), 0, nr_pdxs * sizeof(struct page_info));
>> + memset(pdx_to_page(pdx_e), -1,
>> + chunk_size - nr_pdxs * sizeof(struct page_info));
>> +}
>> +
>> +void __init init_frametable(paddr_t ram_start)
>> +{
>> + unsigned int sidx, nidx, max_idx;
>>
>> /*
>> * The size of paddr_t should be sufficient for the complete range of
>> @@ -26,24 +47,34 @@ void __init setup_frametable_mappings(paddr_t ps,
>> paddr_t pe)
>> BUILD_BUG_ON((sizeof(paddr_t) * BITS_PER_BYTE) < PADDR_BITS);
>> BUILD_BUG_ON(sizeof(struct page_info) != PAGE_INFO_SIZE);
>>
>> - if ( frametable_size > FRAMETABLE_SIZE )
>> - panic("The frametable cannot cover the physical region %#"PRIpaddr"
>> - %#"PRIpaddr"\n",
>> - ps, pe);
>> + max_idx = DIV_ROUND_UP(max_pdx, PDX_GROUP_COUNT);
>> + frametable_base_pdx = mfn_to_pdx(maddr_to_mfn(ram_start));
>>
>> - frametable_base_pdx = mfn_to_pdx(maddr_to_mfn(ps));
>> - /* Round up to 2M or 32M boundary, as appropriate. */
>> - frametable_size = ROUNDUP(frametable_size, mapping_size);
>> - base_mfn = alloc_boot_pages(frametable_size >> PAGE_SHIFT, 32<<(20-12));
>> + /*
>> + * pdx_to_page(pdx_s) in init_frametable_chunk must be page-aligned
>> + * for map_pages_to_xen(). Aligning to PDX_GROUP_COUNT guarantees this
>> + * because PDX_GROUP_COUNT * sizeof(page_info) is always a multiple of
>> + * PAGE_SIZE by construction.
>> + */
>> + frametable_base_pdx = ROUNDDOWN(frametable_base_pdx, PDX_GROUP_COUNT);
>
> We are now rounding down frametable_base_pdx which before this patch it was
> the start of the ram,
> but in xen/xen/arch/arm/include/asm/mm.h, mfn_valid(mfn) is using
> frametable_base_pdx to check for
> mfn validity, this means that we could pass an mfn before the start of the
> ram and if __mfn_valid is happy,
> we are getting a regression.
>
> Can this happen or am I missing something?
mfn_valid() can indeed return true for an MFN below ram_start that falls
in the same PDX group, but this is safe. init_frametable_chunk() maps
and zeroes the frametable for that range, so mfn_to_page() won't fault.
The zeroed page_info has count_info == 0 and no owner, so any get_page()
call on it will fail — the page is effectively inert.
~Michal
>
>>
>> - rc = map_pages_to_xen(FRAMETABLE_VIRT_START, base_mfn,
>> - frametable_size >> PAGE_SHIFT,
>> - PAGE_HYPERVISOR_RW | _PAGE_BLOCK);
>> - if ( rc )
>> - panic("Unable to setup the frametable mappings.\n");
>> + if ( (max_pdx - frametable_base_pdx) > FRAMETABLE_NR )
>> + panic("Frametable too small\n");
>> +
>> + for ( sidx = (frametable_base_pdx / PDX_GROUP_COUNT); ; sidx = nidx )
>> + {
>> + unsigned int eidx;
>> +
>> + eidx = find_next_zero_bit(pdx_group_valid, max_idx, sidx);
>> + nidx = find_next_bit(pdx_group_valid, max_idx, eidx);
>> +
>> + if ( nidx >= max_idx )
>> + break;
>> +
>> + init_frametable_chunk(sidx * PDX_GROUP_COUNT, eidx *
>> PDX_GROUP_COUNT);
>> + }
>>
>> - memset(&frame_table[0], 0, nr_pdxs * sizeof(struct page_info));
>> - memset(&frame_table[nr_pdxs], -1,
>> - frametable_size - (nr_pdxs * sizeof(struct page_info)));
>> + init_frametable_chunk(sidx * PDX_GROUP_COUNT, max_pdx);
>> }
>>
>> /*
>> diff --git a/xen/arch/arm/mpu/mm.c b/xen/arch/arm/mpu/mm.c
>> index aff88bd3a9c1..9c568831c128 100644
>> --- a/xen/arch/arm/mpu/mm.c
>> +++ b/xen/arch/arm/mpu/mm.c
>> @@ -186,16 +186,15 @@ static int is_mm_attr_match(pr_t *region, unsigned int
>> attributes)
>> return 0;
>> }
>>
>> -/* Map a frame table to cover physical addresses ps through pe */
>> -void __init setup_frametable_mappings(paddr_t ps, paddr_t pe)
>> +/*
>> + * Allocate a contiguous frame table covering ram_start through max_pdx.
>> + * Unlike the MMU version, MPU cannot skip holes because there is no virtual
>> + * address translation (ma == va).
>> + */
>> +void __init init_frametable(paddr_t ram_start)
>> {
>> + unsigned long nr_pdxs, frametable_size;
>> mfn_t base_mfn;
>> - paddr_t aligned_ps = ROUNDUP(ps, PAGE_SIZE);
>> - paddr_t aligned_pe = ROUNDDOWN(pe, PAGE_SIZE);
>> -
>> - unsigned long nr_pdxs = mfn_to_pdx(mfn_add(maddr_to_mfn(aligned_pe),
>> -1)) -
>> - mfn_to_pdx(maddr_to_mfn(aligned_ps)) + 1;
>> - unsigned long frametable_size = nr_pdxs * sizeof(struct page_info);
>>
>> /*
>> * The size of paddr_t should be sufficient for the complete range of
>> @@ -204,11 +203,13 @@ void __init setup_frametable_mappings(paddr_t ps,
>> paddr_t pe)
>> BUILD_BUG_ON((sizeof(paddr_t) * BITS_PER_BYTE) < PADDR_BITS);
>> BUILD_BUG_ON(sizeof(struct page_info) != PAGE_INFO_SIZE);
>>
>> + frametable_base_pdx = mfn_to_pdx(maddr_to_mfn(ram_start));
>> + nr_pdxs = max_pdx - frametable_base_pdx;
>> + frametable_size = nr_pdxs * sizeof(struct page_info);
>> +
>> if ( frametable_size > FRAMETABLE_SIZE )
>> - panic("The frametable cannot cover the physical region %#"PRIpaddr"
>> - %#"PRIpaddr"\n",
>> - ps, pe);
>> + panic("Frametable too small\n");
>>
>> - frametable_base_pdx = paddr_to_pdx(aligned_ps);
>> frametable_size = ROUNDUP(frametable_size, PAGE_SIZE);
>>
>> base_mfn = alloc_boot_pages(frametable_size >> PAGE_SHIFT, 1);
>> --
>> 2.43.0
>>
>
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |