Subject: linux: add new (replacement) mmap-batch ioctl While the error indicator of IOCTL_PRIVCMD_MMAPBATCH should be in the top nibble (it is documented that way in include/xen/public/privcmd.h and include/xen/compat_ioctl.h), it really wasn't for 64-bit implementations. With MFNs now possibly being 32 or more bits wide on x86-64, using bits 28-31 as failure indicator (and bit 31 as paged-out indicator) is not longer acceptable. Instead, a new ioctl with a separate error indication array is being introduced. As usual, written against 2.6.32.3 and made apply to the 2.6.18 tree without further testing. Signed-off-by: Jan Beulich --- head-2010-01-04.orig/drivers/xen/privcmd/compat_privcmd.c 2010-01-04 13:50:00.000000000 +0100 +++ head-2010-01-04/drivers/xen/privcmd/compat_privcmd.c 2010-01-04 15:13:52.000000000 +0100 @@ -96,6 +96,56 @@ int privcmd_ioctl_32(int fd, unsigned in #endif } break; + case IOCTL_PRIVCMD_MMAP_BATCH_32: { + struct privcmd_mmap_batch *p; + struct privcmd_mmap_batch_32 *p32; + struct privcmd_mmap_batch_32 n32; +#ifdef xen_pfn32_t + xen_pfn_t *__user arr; + xen_pfn32_t *__user arr32; + unsigned int i; +#endif + + p32 = compat_ptr(arg); + p = compat_alloc_user_space(sizeof(*p)); + if (copy_from_user(&n32, p32, sizeof(n32)) || + put_user(n32.num, &p->num) || + put_user(n32.dom, &p->dom) || + put_user(n32.addr, &p->addr) || + put_user(compat_ptr(n32.err), &p->err)) + return -EFAULT; +#ifdef xen_pfn32_t + arr = compat_alloc_user_space(n32.num * sizeof(*arr) + + sizeof(*p)); + arr32 = compat_ptr(n32.arr); + for (i = 0; i < n32.num; ++i) { + xen_pfn32_t mfn; + + if (get_user(mfn, arr32 + i) || put_user(mfn, arr + i)) + return -EFAULT; + } + + if (put_user(arr, &p->arr)) + return -EFAULT; +#else + if (put_user(compat_ptr(n32.arr), &p->arr)) + return -EFAULT; +#endif + + ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAP_BATCH, (unsigned long)p); + +#ifdef xen_pfn32_t + for (i = 0; !ret && i < n32.num; ++i) { + xen_pfn_t mfn; + + if (get_user(mfn, arr + i) || put_user(mfn, arr32 + i)) + ret = -EFAULT; + else if (mfn != (xen_pfn32_t)mfn) + ret = -ERANGE; + } +#endif + } + break; default: ret = -EINVAL; break; --- head-2010-01-04.orig/drivers/xen/privcmd/privcmd.c 2010-01-05 11:21:42.000000000 +0100 +++ head-2010-01-04/drivers/xen/privcmd/privcmd.c 2010-01-05 11:22:28.000000000 +0100 @@ -299,6 +299,108 @@ static long privcmd_ioctl(struct file *f mmapbatch_out: list_for_each_safe(l,l2,&pagelist) free_page((unsigned long)l); + } + break; + + case IOCTL_PRIVCMD_MMAP_BATCH: { + privcmd_mmap_batch_t m; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + const xen_pfn_t __user *p; + xen_pfn_t *mfn; + unsigned long addr, nr_pages; + unsigned int i, nr; + LIST_HEAD(pagelist); + struct list_head *l, *l2; + int *err, paged_out; + + if (!is_initial_xendomain()) + return -EPERM; + + if (copy_from_user(&m, udata, sizeof(m))) + return -EFAULT; + + nr_pages = m.num; + addr = m.addr; + if (m.num <= 0 || nr_pages > (ULONG_MAX >> PAGE_SHIFT) || + addr != m.addr || nr_pages > (-addr >> PAGE_SHIFT)) + return -EINVAL; + + p = m.arr; + for (i = 0; i < nr_pages; i += nr, p += nr) { + nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE); + + ret = -ENOMEM; + l = (struct list_head *)__get_free_page(GFP_KERNEL); + if (l == NULL) + goto mmap_batch_out; + + INIT_LIST_HEAD(l); + list_add_tail(l, &pagelist); + + mfn = (void *)(l + 1); + ret = -EFAULT; + if (copy_from_user(mfn, p, nr * sizeof(*mfn))) + goto mmap_batch_out; + } + + down_write(&mm->mmap_sem); + + vma = find_vma(mm, addr); + ret = -EINVAL; + if (!vma || + addr < vma->vm_start || + addr + (nr_pages << PAGE_SHIFT) > vma->vm_end || + !enforce_singleshot_mapping(vma, addr, nr_pages)) { + up_write(&mm->mmap_sem); + goto mmap_batch_out; + } + + i = 0; + ret = 0; + paged_out = 0; + list_for_each(l, &pagelist) { + int rc; + + nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE); + mfn = (void *)(l + 1); + err = (void *)(l + 1); + BUILD_BUG_ON(sizeof(*err) > sizeof(*mfn)); + + while (i < nr) { + rc = direct_remap_pfn_range(vma, addr & PAGE_MASK, + *mfn, PAGE_SIZE, + vma->vm_page_prot, m.dom); + if (rc < 0) { + if (rc == -ENOENT) + paged_out = 1; + ret++; + } else + BUG_ON(rc > 0); + *err++ = rc; + mfn++; i++; addr += PAGE_SIZE; + } + } + + up_write(&mm->mmap_sem); + + if (ret > 0) { + int __user *p = m.err; + + ret = paged_out ? -ENOENT : 0; + i = 0; + list_for_each(l, &pagelist) { + nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE); + err = (void *)(l + 1); + if (copy_to_user(p, err, nr * sizeof(*err))) + ret = -EFAULT; + i += nr; p += nr; + } + } + + mmap_batch_out: + list_for_each_safe(l, l2, &pagelist) + free_page((unsigned long)l); #undef MMAPBATCH_NR_PER_PAGE } break; --- head-2010-01-04.orig/fs/compat_ioctl.c 2009-12-17 16:11:48.000000000 +0100 +++ head-2010-01-04/fs/compat_ioctl.c 2010-01-05 11:00:04.000000000 +0100 @@ -2937,6 +2937,7 @@ IGNORE_IOCTL(FBIOGCURSOR32) #ifdef CONFIG_XEN HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32) HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32) +HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_BATCH_32, privcmd_ioctl_32) COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL) COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ) COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN) --- head-2010-01-04.orig/include/xen/compat_ioctl.h 2009-12-17 15:40:40.000000000 +0100 +++ head-2010-01-04/include/xen/compat_ioctl.h 2010-01-04 14:01:56.000000000 +0100 @@ -49,9 +49,27 @@ struct privcmd_mmapbatch_32 { #endif compat_uptr_t arr; /* array of mfns - top nibble set on err */ }; + +struct privcmd_mmap_batch_32 { + unsigned int num; /* number of pages to populate */ + domid_t dom; /* target domain */ +#if defined(CONFIG_X86) || defined(CONFIG_IA64) + union { /* virtual address */ + __u64 addr __packed; + __u32 va; + }; +#else + __u64 addr; /* virtual address */ +#endif + compat_uptr_t arr; /* array of mfns */ + compat_uptr_t err; /* array of error codes */ +}; + #define IOCTL_PRIVCMD_MMAP_32 \ _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap_32)) -#define IOCTL_PRIVCMD_MMAPBATCH_32 \ +#define IOCTL_PRIVCMD_MMAPBATCH_32 \ _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch_32)) +#define IOCTL_PRIVCMD_MMAP_BATCH_32 \ + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmap_batch_32)) #endif /* __LINUX_XEN_COMPAT_H__ */ --- head-2010-01-04.orig/include/xen/public/privcmd.h 2009-12-18 10:38:09.000000000 +0100 +++ head-2010-01-04/include/xen/public/privcmd.h 2010-01-04 14:02:06.000000000 +0100 @@ -60,6 +60,14 @@ typedef struct privcmd_mmapbatch { xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ } privcmd_mmapbatch_t; +typedef struct privcmd_mmap_batch { + unsigned int num; /* number of pages to populate */ + domid_t dom; /* target domain */ + __u64 addr; /* virtual address */ + const xen_pfn_t __user *arr; /* array of mfns */ + int __user *err; /* array of error codes */ +} privcmd_mmap_batch_t; + /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t @@ -71,5 +79,7 @@ typedef struct privcmd_mmapbatch { _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t)) #define IOCTL_PRIVCMD_MMAPBATCH \ _IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t)) +#define IOCTL_PRIVCMD_MMAP_BATCH \ + _IOC(_IOC_NONE, 'P', 4, sizeof(privcmd_mmap_batch_t)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */