Subject: add and use XEN_DOMCTL_getpageframeinfo3 To support wider than 28-bit MFNs, add XEN_DOMCTL_getpageframeinfo3 (with the type replacing the passed in MFN rather than getting or-ed into it) to properly back xc_get_pfn_type_batch(). With xc_get_pfn_type_batch() only used internally to libxc, move its prototype from xenctrl.h to xc_private.h. This also fixes a couple of bugs in pre-existing code: - the failure path for init_mem_info() leaked minfo->pfn_type, - one error path of the XEN_DOMCTL_getpageframeinfo2 handler used put_domain() where rcu_unlock_domain() was meant, and - the XEN_DOMCTL_getpageframeinfo2 handler could call xsm_getpageframeinfo() with an invalid struct page_info pointer. Signed-off-by: Jan Beulich --- 2010-01-06.orig/tools/libxc/xc_domain_save.c 2010-01-11 14:39:23.000000000 +0100 +++ 2010-01-06/tools/libxc/xc_domain_save.c 2010-01-12 10:22:58.000000000 +0100 @@ -959,6 +959,12 @@ int xc_domain_save(int xc_handle, int io /* Get the size of the P2M table */ dinfo->p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom) + 1; + if ( dinfo->p2m_size > ~XEN_DOMCTL_PFINFO_LTAB_MASK ) + { + ERROR("Cannot save this big a guest"); + goto out; + } + /* Domain is still running at this point */ if ( live ) { @@ -1296,17 +1302,11 @@ int xc_domain_save(int xc_handle, int io else { /* Get page types */ - for ( j = 0; j < batch; j++ ) - ((uint32_t *)pfn_type)[j] = pfn_type[j]; - if ( xc_get_pfn_type_batch(xc_handle, dom, batch, - (uint32_t *)pfn_type) ) + if ( xc_get_pfn_type_batch(xc_handle, dom, batch, pfn_type) ) { ERROR("get_pfn_type_batch failed"); goto out; } - for ( j = batch-1; j >= 0; j-- ) - pfn_type[j] = ((uint32_t *)pfn_type)[j] & - XEN_DOMCTL_PFINFO_LTAB_MASK; for ( j = 0; j < batch; j++ ) { --- 2010-01-06.orig/tools/libxc/xc_offline_page.c 2010-01-12 10:13:31.000000000 +0100 +++ 2010-01-06/tools/libxc/xc_offline_page.c 2010-01-12 10:23:27.000000000 +0100 @@ -24,7 +24,7 @@ struct domain_mem_info{ int domid; unsigned int pt_level; unsigned int guest_width; - uint32_t *pfn_type; + xen_pfn_t *pfn_type; xen_pfn_t *p2m_table; unsigned long p2m_size; xen_pfn_t *m2p_table; @@ -266,19 +266,18 @@ static int init_mem_info(int xc_handle, } /* Get pfn type */ - minfo->pfn_type = malloc(sizeof(uint32_t) * minfo->p2m_size); + minfo->pfn_type = calloc(sizeof(*minfo->pfn_type), minfo->p2m_size); if (!minfo->pfn_type) { ERROR("Failed to malloc pfn_type\n"); goto failed; } - memset(minfo->pfn_type, 0, sizeof(uint32_t) * minfo->p2m_size); for (i = 0; i < minfo->p2m_size; i++) minfo->pfn_type[i] = pfn_to_mfn(i, minfo->p2m_table, minfo->guest_width); - if ( lock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(uint32_t)) ) + if ( lock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(*minfo->pfn_type)) ) { ERROR("Unable to lock pfn_type array"); goto failed; @@ -297,12 +296,12 @@ static int init_mem_info(int xc_handle, return 0; unlock: - unlock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(uint32_t)); + unlock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(*minfo->pfn_type)); failed: if (minfo->pfn_type) { - minfo->pfn_type = NULL; free(minfo->pfn_type); + minfo->pfn_type = NULL; } if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); @@ -418,7 +417,9 @@ static int change_pte(int xc_handle, int uint64_t pte, new_pte; int j; - if ( table_mfn == INVALID_P2M_ENTRY ) + if ( (table_mfn == INVALID_P2M_ENTRY) || + ((minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == + XEN_DOMCTL_PFINFO_XTAB) ) continue; if ( minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) --- 2010-01-06.orig/tools/libxc/xc_private.c 2010-01-12 10:13:31.000000000 +0100 +++ 2010-01-06/tools/libxc/xc_private.c 2010-01-12 10:22:33.000000000 +0100 @@ -149,14 +149,14 @@ void unlock_pages(void *addr, size_t len } /* NB: arr must be locked */ -int xc_get_pfn_type_batch(int xc_handle, - uint32_t dom, int num, uint32_t *arr) +int xc_get_pfn_type_batch(int xc_handle, uint32_t dom, + unsigned int num, xen_pfn_t *arr) { DECLARE_DOMCTL; - domctl.cmd = XEN_DOMCTL_getpageframeinfo2; + domctl.cmd = XEN_DOMCTL_getpageframeinfo3; domctl.domain = (domid_t)dom; - domctl.u.getpageframeinfo2.num = num; - set_xen_guest_handle(domctl.u.getpageframeinfo2.array, arr); + domctl.u.getpageframeinfo3.num = num; + set_xen_guest_handle(domctl.u.getpageframeinfo3.array, arr); return do_domctl(xc_handle, &domctl); } --- 2010-01-06.orig/tools/libxc/xc_private.h 2009-06-19 11:11:23.000000000 +0200 +++ 2010-01-06/tools/libxc/xc_private.h 2010-01-12 10:22:08.000000000 +0100 @@ -191,6 +191,9 @@ void *xc_map_foreign_ranges(int xc_handl size_t size, int prot, size_t chunksize, privcmd_mmap_entry_t entries[], int nentries); +int xc_get_pfn_type_batch(int xc_handle, uint32_t dom, + unsigned int num, xen_pfn_t *); + void bitmap_64_to_byte(uint8_t *bp, const uint64_t *lp, int nbits); void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits); --- 2010-01-06.orig/tools/libxc/xenctrl.h 2010-01-08 13:50:58.000000000 +0100 +++ 2010-01-06/tools/libxc/xenctrl.h 2010-01-12 10:21:39.000000000 +0100 @@ -802,9 +802,6 @@ int xc_mmuext_op(int xc_handle, struct m int xc_memory_op(int xc_handle, int cmd, void *arg); -int xc_get_pfn_type_batch(int xc_handle, uint32_t dom, - int num, uint32_t *arr); - /* Get current total pages allocated to a domain. */ long xc_get_tot_pages(int xc_handle, uint32_t domid); --- 2010-01-06.orig/xen/arch/x86/domctl.c 2010-01-12 10:13:31.000000000 +0100 +++ 2010-01-06/xen/arch/x86/domctl.c 2010-01-11 14:15:24.000000000 +0100 @@ -160,6 +160,106 @@ long arch_do_domctl( } break; + case XEN_DOMCTL_getpageframeinfo3: +#ifdef __x86_64__ + if (!has_32bit_shinfo(current->domain)) + { + unsigned int n, j; + unsigned int num = domctl->u.getpageframeinfo3.num; + domid_t dom = domctl->domain; + struct domain *d; + struct page_info *page; + xen_pfn_t *arr; + + ret = -ESRCH; + if ( unlikely((d = rcu_lock_domain_by_id(dom)) == NULL) ) + break; + + if ( unlikely(num > 1024) || + unlikely(num != domctl->u.getpageframeinfo3.num) ) + { + ret = -E2BIG; + rcu_unlock_domain(d); + break; + } + + page = alloc_domheap_page(NULL, 0); + if ( !page ) + { + ret = -ENOMEM; + rcu_unlock_domain(d); + break; + } + arr = page_to_virt(page); + + for ( n = ret = 0; n < num; ) + { + unsigned int k = min_t(unsigned int, num - n, PAGE_SIZE / 4); + + if ( copy_from_guest_offset(arr, + domctl->u.getpageframeinfo3.array, + n, k) ) + { + ret = -EFAULT; + break; + } + + for ( j = 0; j < k; j++ ) + { + unsigned long type = 0, mfn = arr[j]; + + page = mfn_to_page(mfn); + + if ( unlikely(!mfn_valid(mfn)) ) + type = XEN_DOMCTL_PFINFO_XTAB; + else if ( xsm_getpageframeinfo(page) != 0 ) + ; + else if ( likely(get_page(page, d)) ) + { + switch( page->u.inuse.type_info & PGT_type_mask ) + { + case PGT_l1_page_table: + type = XEN_DOMCTL_PFINFO_L1TAB; + break; + case PGT_l2_page_table: + type = XEN_DOMCTL_PFINFO_L2TAB; + break; + case PGT_l3_page_table: + type = XEN_DOMCTL_PFINFO_L3TAB; + break; + case PGT_l4_page_table: + type = XEN_DOMCTL_PFINFO_L4TAB; + break; + } + + if ( page->u.inuse.type_info & PGT_pinned ) + type |= XEN_DOMCTL_PFINFO_LPINTAB; + + put_page(page); + } + else + type = XEN_DOMCTL_PFINFO_XTAB; + + arr[j] = type; + } + + if ( copy_to_guest_offset(domctl->u.getpageframeinfo3.array, + n, arr, k) ) + { + ret = -EFAULT; + break; + } + + n += k; + } + + free_domheap_page(virt_to_page(arr)); + + rcu_unlock_domain(d); + break; + } +#endif + /* fall thru */ case XEN_DOMCTL_getpageframeinfo2: { int n,j; @@ -183,7 +283,7 @@ long arch_do_domctl( if ( !arr32 ) { ret = -ENOMEM; - put_domain(d); + rcu_unlock_domain(d); break; } @@ -209,11 +309,14 @@ long arch_do_domctl( page = mfn_to_page(mfn); - ret = xsm_getpageframeinfo(page); - if ( ret ) - continue; + if ( domctl->cmd == XEN_DOMCTL_getpageframeinfo3) + arr32[j] = 0; - if ( likely(mfn_valid(mfn) && get_page(page, d)) ) + if ( unlikely(!mfn_valid(mfn)) ) + arr32[j] |= XEN_DOMCTL_PFINFO_XTAB; + else if ( xsm_getpageframeinfo(page) != 0 ) + continue; + else if ( likely(get_page(page, d)) ) { unsigned long type = 0; --- 2010-01-06.orig/xen/include/public/domctl.h 2010-01-12 10:13:31.000000000 +0100 +++ 2010-01-06/xen/include/public/domctl.h 2010-01-11 12:13:16.000000000 +0100 @@ -161,6 +161,14 @@ struct xen_domctl_getpageframeinfo2 { typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); +/* XEN_DOMCTL_getpageframeinfo3 */ +struct xen_domctl_getpageframeinfo3 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(xen_pfn_t) array; +}; + /* * Control shadow pagetables operation @@ -832,6 +840,7 @@ struct xen_domctl { #define XEN_DOMCTL_disable_migrate 58 #define XEN_DOMCTL_gettscinfo 59 #define XEN_DOMCTL_settscinfo 60 +#define XEN_DOMCTL_getpageframeinfo3 61 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -844,6 +853,7 @@ struct xen_domctl { struct xen_domctl_getmemlist getmemlist; struct xen_domctl_getpageframeinfo getpageframeinfo; struct xen_domctl_getpageframeinfo2 getpageframeinfo2; + struct xen_domctl_getpageframeinfo3 getpageframeinfo3; struct xen_domctl_vcpuaffinity vcpuaffinity; struct xen_domctl_shadow_op shadow_op; struct xen_domctl_max_mem max_mem;