[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH] device-tree: size first hwdom bank for boot modules



From: Mykola Kvach <mykola_kvach@xxxxxxxx>

With LLC coloring enabled, the hardware domain memory comes from
allocate_hwdom_memory(), not from the fixed direct-map banks used when
coloring is off.

Commit de99f3263555 ("device-tree: Improve hwdom memory allocation for
DMA") made that allocator sort free host regions by ascending address so
Dom0 gets DMA-capable low memory first. The first bank filter still only
required 128MB. That can select a low region which is large enough for
the heuristic, but not large enough for place_modules() to put the Dom0
kernel, generated DTB and initrd contiguously in bank 0.

Ask arch code for any additional first-bank size requirement. On Arm,
compute it from the actual Dom0 kernel placement, rounded initrd size and
generated DTB size hint. For 64-bit Image kernels, include the text offset
from the candidate bank start, because the returned requirement is compared
with a bank size measured from that start. The hint covers both the normal
Device Tree path and the minimal DTB created for ACPI boot.

Check the first-bank threshold against the size which will actually be
assigned to Dom0, after capping the host region by the remaining unassigned
Dom0 memory. Otherwise a large host region could pass the test but still
produce a first guest bank too small for place_modules().

Use the typed min()/max() helpers for this normal allocation arithmetic;
MIN()/MAX() are intended for preprocessor-style contexts and skip the type
checking provided by the lowercase helpers.

This keeps the DMA-oriented allocation policy from de99f3263555 while
preventing a too-small bank 0 from reaching place_modules().

Fixes: de99f3263555 ("device-tree: Improve hwdom memory allocation for DMA")
Signed-off-by: Mykola Kvach <mykola_kvach@xxxxxxxx>
---
Test/setup notes:

The failure was reproduced on a Renesas H3ULCB/R-Car H3 (r8a7795)
arm64 board booted through U-Boot/TFTP and using huge initrd.

Relevant Xen command line excerpt:
  dom0_mem=2048M llc-coloring=on

Boot module layout from Xen:
  MODULE[2]: 0x0000000084000040-0x000000008e75d92f Ramdisk
  MODULE[3]: 0x00000000a0000000-0x00000000a3ffffff Kernel
  MODULE[4]: 0x00000000a4000000-0x00000000a400ffff XSM Policy

The initrd is about 168MB. With LLC coloring enabled and the low-address
allocation policy from de99f3263555, Dom0 can receive a 192MB first bank:
  d0 BANK[0] 0x00000048000000-0x00000054000000 (192MB)

That bank satisfies the old 128MB minimum but is too small for the
rounded Dom0 kernel, generated DTB and initrd placement. The observed
failure before this patch was:
  Panic on CPU 0:
  Not enough memory in the first bank for the kernel+dtb+initrd

With this patch, the same boot skips the too-small low region for bank 0
and reaches Dom0:
  d0 BANK[0] 0x00000057000000-0x00000084000000 (720MB)
  d0 BANK[1] 0x0000008e800000-0x000000c0000000 (792MB)
  d0 BANK[2] 0x00000500000000-0x00000521800000 (536MB)
  d0: extended region 0: 0x48000000->0x54000000
  Loading zImage from 0x00000000a0000000 to 0x57000000-0x5b000000
  Loading d0 initrd from 0x0000000084000040 to 0x5f200000-0x6995d8f0
  Loading d0 DTB to 0x5f000000-0x5f011c80
  Linux version 5.10.194-yocto-standard
---
 xen/arch/arm/acpi/domain_build.c        |  2 --
 xen/arch/arm/domain_build.c             |  8 ++++++
 xen/arch/arm/include/asm/domain_build.h |  4 +++
 xen/arch/arm/include/asm/kernel.h       |  8 ++++++
 xen/arch/arm/kernel.c                   | 35 +++++++++++++++++++++++++
 xen/common/device-tree/domain-build.c   | 27 ++++++++++++++-----
 xen/include/xen/fdt-kernel.h            |  8 ++++++
 7 files changed, 83 insertions(+), 9 deletions(-)

diff --git a/xen/arch/arm/acpi/domain_build.c b/xen/arch/arm/acpi/domain_build.c
index 249d899c33..db16f7fa94 100644
--- a/xen/arch/arm/acpi/domain_build.c
+++ b/xen/arch/arm/acpi/domain_build.c
@@ -26,8 +26,6 @@
 #undef virt_to_mfn
 #define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
 
-#define ACPI_DOM0_FDT_MIN_SIZE 4096
-
 static int __init acpi_iomem_deny_access(struct domain *d)
 {
     acpi_status status;
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 1efddc60ef..226e053c68 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -115,6 +115,14 @@ int __init parse_arch_dom0_param(const char *s, const char 
*e)
                              (IS_ENABLED(CONFIG_STATIC_SHM) ?         \
                               (NR_SHMEM_BANKS * (160 + 16)) : 0))
 
+paddr_t __init dom0_get_fdt_size_hint(void)
+{
+    if ( !acpi_disabled )
+        return ACPI_DOM0_FDT_MIN_SIZE;
+
+    return fdt_totalsize(device_tree_flattened) + DOM0_FDT_EXTRA_SIZE;
+}
+
 unsigned int __init dom0_max_vcpus(void)
 {
     if ( opt_dom0_max_vcpus == 0 )
diff --git a/xen/arch/arm/include/asm/domain_build.h 
b/xen/arch/arm/include/asm/domain_build.h
index df8b361b3d..45687c5d6f 100644
--- a/xen/arch/arm/include/asm/domain_build.h
+++ b/xen/arch/arm/include/asm/domain_build.h
@@ -19,6 +19,10 @@ int prepare_acpi(struct domain *d, struct kernel_info 
*kinfo);
 
 int add_ext_regions(unsigned long s_gfn, unsigned long e_gfn, void *data);
 
+#define ACPI_DOM0_FDT_MIN_SIZE 4096
+
+paddr_t dom0_get_fdt_size_hint(void);
+
 #if defined(CONFIG_MPU) && defined(CONFIG_ARM_64)
 /* Utility function to determine if an Armv8-R processor supports VMSA. */
 bool has_v8r_vmsa_support(void);
diff --git a/xen/arch/arm/include/asm/kernel.h 
b/xen/arch/arm/include/asm/kernel.h
index 21f4273fa1..17c5b9bce4 100644
--- a/xen/arch/arm/include/asm/kernel.h
+++ b/xen/arch/arm/include/asm/kernel.h
@@ -8,12 +8,20 @@
 
 #include <asm/domain.h>
 
+#include <xen/types.h>
+
+struct kernel_info;
+
 struct arch_kernel_info
 {
     /* Enable pl011 emulation */
     bool vpl011;
 };
 
+#define arch_get_min_first_bank_size arch_get_min_first_bank_size
+paddr_t arch_get_min_first_bank_size(struct kernel_info *info,
+                                     paddr_t bank_start);
+
 #endif /* #ifdef __ARCH_ARM_KERNEL_H__ */
 
 /*
diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
index b72585b7fe..3644663e2f 100644
--- a/xen/arch/arm/kernel.c
+++ b/xen/arch/arm/kernel.c
@@ -128,6 +128,41 @@ static paddr_t __init kernel_zimage_place(struct 
kernel_info *info)
     return load_addr;
 }
 
+static paddr_t __init kernel_placement_size(paddr_t load_addr, paddr_t len)
+{
+    return ROUNDUP(load_addr + len, MB(2)) - load_addr;
+}
+
+paddr_t __init arch_get_min_first_bank_size(struct kernel_info *info,
+                                            paddr_t bank_start)
+{
+    const struct boot_module *mod = info->bd.initrd;
+    const paddr_t initrd_len = ROUNDUP(mod ? mod->size : 0, MB(2));
+    const paddr_t dtb_len = ROUNDUP(dom0_get_fdt_size_hint(), MB(2));
+    paddr_t kernsize;
+
+#ifdef CONFIG_HAS_DOMAIN_TYPE
+    if ( (info->type == DOMAIN_64BIT) && (info->image.start == 0) )
+    {
+        paddr_t load_addr = bank_start + info->image.text_offset;
+
+        /*
+         * The caller compares this value with a size measured from
+         * bank_start, so include the text offset before the kernel.
+         */
+        kernsize = ROUNDUP(load_addr + info->image.len, MB(2)) - bank_start;
+        return kernsize + initrd_len + dtb_len;
+    }
+#endif
+
+    if ( info->image.start == 0 )
+        kernsize = ROUNDUP(info->image.len, MB(2));
+    else
+        kernsize = kernel_placement_size(info->image.start, info->image.len);
+
+    return kernsize + initrd_len + dtb_len;
+}
+
 static void __init kernel_zimage_load(struct kernel_info *info)
 {
     paddr_t load_addr = kernel_zimage_place(info);
diff --git a/xen/common/device-tree/domain-build.c 
b/xen/common/device-tree/domain-build.c
index 2a760b007b..d8865db259 100644
--- a/xen/common/device-tree/domain-build.c
+++ b/xen/common/device-tree/domain-build.c
@@ -299,20 +299,33 @@ static bool __init allocate_hwdom_memory(struct 
kernel_info *kinfo)
 
     for ( i = 0; (kinfo->unassigned_mem > 0) && (i < nr_banks); i++ )
     {
-        paddr_t bank_size;
+        const paddr_t bank_start = hwdom_free_mem->bank[i].start;
+        paddr_t bank_size = hwdom_free_mem->bank[i].size;
+
+        /*
+         * Check the size that would actually be assigned, not just the size
+         * of the host region.
+         */
+        bank_size = min(bank_size, kinfo->unassigned_mem);
 
         /*
          * The first bank must be large enough for place_modules() to
          * fit the kernel, DTB and initrd.  Skip small regions to avoid
          * ending up with a tiny first bank.
          */
-        if ( !mem->nr_banks && (hwdom_free_mem->bank[i].size < min_bank_size) )
-            continue;
+        if ( !mem->nr_banks )
+        {
+            paddr_t arch_min_size;
+            paddr_t required_first_bank_size;
+
+            arch_min_size = arch_get_min_first_bank_size(kinfo, bank_start);
+            required_first_bank_size = max(min_bank_size, arch_min_size);
+
+            if ( bank_size < required_first_bank_size )
+                continue;
+        }
 
-        bank_size = MIN(hwdom_free_mem->bank[i].size, kinfo->unassigned_mem);
-        if ( !allocate_bank_memory(kinfo,
-                                   gaddr_to_gfn(hwdom_free_mem->bank[i].start),
-                                   bank_size) )
+        if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(bank_start), bank_size) 
)
         {
             xfree(hwdom_free_mem);
             return false;
diff --git a/xen/include/xen/fdt-kernel.h b/xen/include/xen/fdt-kernel.h
index 8cd1670c2c..931b3e1686 100644
--- a/xen/include/xen/fdt-kernel.h
+++ b/xen/include/xen/fdt-kernel.h
@@ -86,6 +86,14 @@ kernel_info_get_mem_const(const struct kernel_info *kinfo)
     return container_of(&kinfo->mem.common, const struct membanks, common);
 }
 
+#ifndef arch_get_min_first_bank_size
+static inline paddr_t arch_get_min_first_bank_size(struct kernel_info *info,
+                                                   paddr_t bank_start)
+{
+    return 0;
+}
+#endif
+
 #ifndef KERNEL_INFO_SHM_MEM_INIT
 
 #ifdef CONFIG_STATIC_SHM
-- 
2.43.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.