[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH for-4.22 v2 2/2] device-tree: validate hwdom bank 0 boot placement



From: Mykola Kvach <mykola_kvach@xxxxxxxx>

With LLC coloring enabled, the hardware domain memory is allocated by
allocate_hwdom_memory() rather than by using the fixed direct-map layout.

Commit de99f3263555 ("device-tree: Improve hwdom memory allocation for
DMA") made that allocator prefer lower host regions. The first-bank
filter, however, still only checked the old 128MB heuristic. A low region
can satisfy that heuristic but still be too small, or otherwise
unsuitable, for the hardware-domain kernel and the DTB/initrd area to fit
in bank 0 according to the Arm placement rules.

Keep the existing first-bank size policy and add an architecture-specific
candidate check. On Arm, compute the kernel load address for the
candidate bank using the same logic as kernel_zimage_place(), verify that
the kernel range is covered by that bank, and then reuse the same
DTB/initrd placement helper as place_dtb_initrd(). The FDT is generated
later, so use the hardware-domain FDT allocation size as a conservative
upper bound for the final DTB size.

Check the candidate after capping the host region by the remaining
unassigned hardware-domain memory, so the validation is performed against
the size that would actually become bank 0.

This keeps the DMA-oriented allocation policy from de99f3263555 while
preventing a too-small bank 0 from reaching place_dtb_initrd().

Make kernel_zimage_place_in_bank() return INVALID_PADDR when a
position-independent zImage cannot be placed in the supplied bank; the
real load path turns this into a panic, while the hwdom candidate check
uses it to reject the bank.

Fixes: de99f3263555 ("device-tree: Improve hwdom memory allocation for DMA")
Signed-off-by: Mykola Kvach <mykola_kvach@xxxxxxxx>
---
Changes in v2:
- Split the behavior-preserving placement refactoring into the previous
  patch.
- Reuse the refactored Arm kernel and DTB/initrd placement helpers for
  the first-bank candidate check.

Link to v1:
  
https://patchew.org/Xen/4f862bb2dc323914b8120b0f16af7516140cf42b.1780065103.git.mykola._5Fkvach@xxxxxxxx/

Changes since RFC:
- Do not keep the RFC scalar minimum-size check.  It can both reject valid
  layouts and accept layouts which still fail later.  Instead, validate
  the candidate bank using the same kernel and DTB/initrd placement rules
  as the load path.
- Replace the scalar minimum-size check with arch_hwdom_first_bank_ok().
- Validate fixed-address and AArch32 start == 0 kernel placement against
  the candidate bank.
- Check the candidate after capping the host region by the remaining
  unassigned hardware-domain memory.
- Treat the hardware-domain FDT allocation size as a conservative upper
  bound because the final FDT is generated later.

Link to RFC: 
https://patchew.org/Xen/9ae4f7dd49f5b1f761193adae573c2675c92e883.1779051035.git.mykola._5Fkvach@xxxxxxxx/

Why the RFC scalar approach was not kept:

A simple minimum-size check is not sufficient here because the validity
of the first bank depends on the actual Arm placement rules, not only on
the aggregate size of the kernel, DTB and initrd. The DTB/initrd area may
fit before a 64-bit Image loaded with a text offset, while an AArch32
position-independent kernel may leave no valid module location even when
the aggregate size appears to fit. Fixed-address kernels also need the
candidate bank start to be considered.
---
 xen/arch/arm/acpi/domain_build.c        |  2 -
 xen/arch/arm/domain_build.c             |  8 ++++
 xen/arch/arm/include/asm/domain_build.h |  4 ++
 xen/arch/arm/include/asm/kernel.h       |  9 ++++
 xen/arch/arm/kernel.c                   | 57 ++++++++++++++++++++++++-
 xen/common/device-tree/domain-build.c   | 24 ++++++++---
 xen/include/xen/fdt-kernel.h            |  9 ++++
 7 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/xen/arch/arm/acpi/domain_build.c b/xen/arch/arm/acpi/domain_build.c
index 249d899c33..db16f7fa94 100644
--- a/xen/arch/arm/acpi/domain_build.c
+++ b/xen/arch/arm/acpi/domain_build.c
@@ -26,8 +26,6 @@
 #undef virt_to_mfn
 #define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
 
-#define ACPI_DOM0_FDT_MIN_SIZE 4096
-
 static int __init acpi_iomem_deny_access(struct domain *d)
 {
     acpi_status status;
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 1efddc60ef..550617f152 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -115,6 +115,14 @@ int __init parse_arch_dom0_param(const char *s, const char 
*e)
                              (IS_ENABLED(CONFIG_STATIC_SHM) ?         \
                               (NR_SHMEM_BANKS * (160 + 16)) : 0))
 
+paddr_t __init hwdom_get_fdt_alloc_size(void)
+{
+    if ( acpi_disabled )
+        return fdt_totalsize(device_tree_flattened) + DOM0_FDT_EXTRA_SIZE;
+
+    return ACPI_DOM0_FDT_MIN_SIZE;
+}
+
 unsigned int __init dom0_max_vcpus(void)
 {
     if ( opt_dom0_max_vcpus == 0 )
diff --git a/xen/arch/arm/include/asm/domain_build.h 
b/xen/arch/arm/include/asm/domain_build.h
index df8b361b3d..85cf46a958 100644
--- a/xen/arch/arm/include/asm/domain_build.h
+++ b/xen/arch/arm/include/asm/domain_build.h
@@ -19,6 +19,10 @@ int prepare_acpi(struct domain *d, struct kernel_info 
*kinfo);
 
 int add_ext_regions(unsigned long s_gfn, unsigned long e_gfn, void *data);
 
+#define ACPI_DOM0_FDT_MIN_SIZE 4096
+
+paddr_t hwdom_get_fdt_alloc_size(void);
+
 #if defined(CONFIG_MPU) && defined(CONFIG_ARM_64)
 /* Utility function to determine if an Armv8-R processor supports VMSA. */
 bool has_v8r_vmsa_support(void);
diff --git a/xen/arch/arm/include/asm/kernel.h 
b/xen/arch/arm/include/asm/kernel.h
index 21f4273fa1..bf14fb208a 100644
--- a/xen/arch/arm/include/asm/kernel.h
+++ b/xen/arch/arm/include/asm/kernel.h
@@ -8,12 +8,21 @@
 
 #include <asm/domain.h>
 
+#include <xen/types.h>
+
+struct kernel_info;
+
 struct arch_kernel_info
 {
     /* Enable pl011 emulation */
     bool vpl011;
 };
 
+#define arch_hwdom_first_bank_ok arch_hwdom_first_bank_ok
+bool arch_hwdom_first_bank_ok(const struct kernel_info *info,
+                              paddr_t bank_start,
+                              paddr_t bank_size);
+
 #endif /* #ifdef __ARCH_ARM_KERNEL_H__ */
 
 /*
diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
index d1be4d8074..ecea2822a1 100644
--- a/xen/arch/arm/kernel.c
+++ b/xen/arch/arm/kernel.c
@@ -64,9 +64,15 @@ kernel_zimage_place_in_bank(const struct kernel_info *info,
         load_end = bank_start + bank_size;
         load_end = MIN(bank_start + MB(128), load_end);
 
+        if ( load_end - bank_start < info->image.len )
+            return INVALID_PADDR;
+
         load_addr = load_end - info->image.len;
         /* Align to 2MB */
         load_addr &= ~(MB(2) - 1);
+
+        if ( load_addr < bank_start )
+            return INVALID_PADDR;
     }
     else
         load_addr = info->image.start;
@@ -164,9 +170,56 @@ static void __init place_dtb_initrd(struct kernel_info 
*info,
 static paddr_t __init kernel_zimage_place(struct kernel_info *info)
 {
     const struct membanks *mem = kernel_info_get_mem(info);
+    paddr_t load_addr;
+
+    load_addr = kernel_zimage_place_in_bank(info, mem->bank[0].start,
+                                            mem->bank[0].size);
+    if ( load_addr == INVALID_PADDR )
+        panic("Unable to find suitable location for the kernel\n");
+
+    return load_addr;
+}
+
+bool __init arch_hwdom_first_bank_ok(const struct kernel_info *info,
+                                     paddr_t bank_start,
+                                     paddr_t bank_size)
+{
+    const struct boot_module *initrd = info->bd.initrd;
+    /*
+     * place_dtb_initrd() rounds the DTB and initrd placement to 2MB 
boundaries;
+     * use the same granularity when checking whether the first bank can hold
+     * them.
+     */
+    const paddr_t initrd_len = ROUNDUP(initrd ? initrd->size : 0, MB(2));
+    /*
+     * The hardware domain FDT has not been generated yet. Use the allocation
+     * size as a conservative upper bound for the final DTB size.
+     */
+    const paddr_t dtb_len = ROUNDUP(hwdom_get_fdt_alloc_size(), MB(2));
+    const paddr_t rambase = bank_start;
+    const paddr_t ramsize = bank_size;
+    const paddr_t dtb_initrd_size = initrd_len + dtb_len;
+    const paddr_t ramend = rambase + ramsize;
+    paddr_t kernbase;
+    paddr_t kernend;
+    paddr_t dtb_base;
+
+    kernbase = kernel_zimage_place_in_bank(info, bank_start, bank_size);
+    if ( kernbase == INVALID_PADDR ||
+         info->image.len > INVALID_PADDR - kernbase )
+        return false;
+
+    kernend = kernbase + info->image.len;
+
+    if ( kernbase < rambase || kernend > ramend )
+        return false;
+
+    if ( !first_bank_can_fit_modules(ramsize, kernbase, kernend,
+                                     dtb_initrd_size) )
+        return false;
 
-    return kernel_zimage_place_in_bank(info, mem->bank[0].start,
-                                       mem->bank[0].size);
+    return find_dtb_initrd_placement(rambase, ramend, kernbase, kernend,
+                                     dtb_initrd_size, &dtb_base);
 }
 
 static void __init kernel_zimage_load(struct kernel_info *info)
diff --git a/xen/common/device-tree/domain-build.c 
b/xen/common/device-tree/domain-build.c
index f3ba496f1e..2e806c1b09 100644
--- a/xen/common/device-tree/domain-build.c
+++ b/xen/common/device-tree/domain-build.c
@@ -299,20 +299,30 @@ static bool __init allocate_hwdom_memory(struct 
kernel_info *kinfo)
 
     for ( i = 0; (kinfo->unassigned_mem > 0) && (i < nr_banks); i++ )
     {
-        paddr_t bank_size;
+        const paddr_t bank_start = hwdom_free_mem->bank[i].start;
+        paddr_t bank_size = hwdom_free_mem->bank[i].size;
+
+        /*
+         * Check the size that would actually be assigned, not just the size
+         * of the host region.
+         */
+        bank_size = min(bank_size, kinfo->unassigned_mem);
 
         /*
          * The first bank must be large enough for place_dtb_initrd() to
          * fit the kernel, DTB and initrd.  Skip small regions to avoid
          * ending up with a tiny first bank.
          */
-        if ( !mem->nr_banks && (hwdom_free_mem->bank[i].size < min_bank_size) )
-            continue;
+        if ( !mem->nr_banks )
+        {
+            if ( bank_size < min_bank_size )
+                continue;
+
+            if ( !arch_hwdom_first_bank_ok(kinfo, bank_start, bank_size) )
+                continue;
+        }
 
-        bank_size = MIN(hwdom_free_mem->bank[i].size, kinfo->unassigned_mem);
-        if ( !allocate_bank_memory(kinfo,
-                                   gaddr_to_gfn(hwdom_free_mem->bank[i].start),
-                                   bank_size) )
+        if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(bank_start), bank_size) 
)
         {
             xfree(hwdom_free_mem);
             return false;
diff --git a/xen/include/xen/fdt-kernel.h b/xen/include/xen/fdt-kernel.h
index 00c37be101..71e2344b97 100644
--- a/xen/include/xen/fdt-kernel.h
+++ b/xen/include/xen/fdt-kernel.h
@@ -93,6 +93,15 @@ kernel_info_get_mem_const(const struct kernel_info *kinfo)
     return container_of(&kinfo->mem.common, const struct membanks, common);
 }
 
+#ifndef arch_hwdom_first_bank_ok
+static inline bool arch_hwdom_first_bank_ok(const struct kernel_info *info,
+                                            paddr_t bank_start,
+                                            paddr_t bank_size)
+{
+    return true;
+}
+#endif
+
 #ifndef KERNEL_INFO_SHM_MEM_INIT
 
 #ifdef CONFIG_STATIC_SHM
-- 
2.43.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.