| # HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID e1f3af226a8e8cb32de079b283cdcac718d3e076
# Parent  b92104e0cf08256a818a7d902326898583f13ad8
[LINUX] Various fixes for mmapping I/O and foreign memory pages.
First, auto-translate guests can use remap_pfn_range() rather than
direct_remap_pfn_range(). This actually works better because
remap_pfn_range() can legitimately assert VM_PFNMAP (this patch
removes this flag for direct_remap_pfn_range().
There are various cleanups and fixes to the privcmd interface:
 1. VMAs should be searched and used under the mmap semaphore
 2. Mapping should be single shot (since cirect_remap_pfn_range()
    expects the PTEs to be empty when it is called).
 3. Demand-fault population of the privcmd vma should be disallowed.
 4. Various others, including a more thorough check of input args.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c    |    8 -
 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c |  167 +++++++++++----------
 linux-2.6-xen-sparse/mm/memory.c                   |    5 
 3 files changed, 104 insertions(+), 76 deletions(-)
diff -r b92104e0cf08 -r e1f3af226a8e 
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Mon Oct 09 10:18:11 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Mon Oct 09 10:56:17 
2006 +0100
@@ -28,6 +28,8 @@ static int direct_remap_area_pte_fn(pte_
                                    void *data)
 {
        mmu_update_t **v = (mmu_update_t **)data;
+
+       BUG_ON(!pte_none(*pte));
 
        (*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pmd_page)) <<
                     PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
@@ -110,11 +112,13 @@ int direct_remap_pfn_range(struct vm_are
                           pgprot_t prot,
                           domid_t  domid)
 {
-       /* Same as remap_pfn_range(). */
-       vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               return remap_pfn_range(vma, address, mfn, size, prot);
 
        if (domid == DOMID_SELF)
                return -EINVAL;
+
+       vma->vm_flags |= VM_IO | VM_RESERVED;
 
        vma->vm_mm->context.has_foreign_mappings = 1;
 
diff -r b92104e0cf08 -r e1f3af226a8e 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Mon Oct 09 
10:18:11 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Mon Oct 09 
10:56:17 2006 +0100
@@ -100,10 +100,12 @@ static int privcmd_ioctl(struct inode *i
        break;
 
        case IOCTL_PRIVCMD_MMAP: {
-#define PRIVCMD_MMAP_SZ 32
                privcmd_mmap_t mmapcmd;
-               privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ];
+               privcmd_mmap_entry_t msg;
                privcmd_mmap_entry_t __user *p;
+               struct mm_struct *mm = current->mm;
+               struct vm_area_struct *vma;
+               unsigned long va;
                int i, rc;
 
                if (!is_initial_xendomain())
@@ -113,47 +115,62 @@ static int privcmd_ioctl(struct inode *i
                        return -EFAULT;
 
                p = mmapcmd.entry;
-
-               for (i = 0; i < mmapcmd.num;
-                    i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) {
-                       int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
-                               PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
-
-                       if (copy_from_user(&msg, p,
-                                          n*sizeof(privcmd_mmap_entry_t)))
-                               return -EFAULT;
-     
-                       for (j = 0; j < n; j++) {
-                               struct vm_area_struct *vma = 
-                                       find_vma( current->mm, msg[j].va );
-
-                               if (!vma)
-                                       return -EINVAL;
-
-                               if (msg[j].va > PAGE_OFFSET)
-                                       return -EINVAL;
-
-                               if ((msg[j].va + (msg[j].npages << PAGE_SHIFT))
-                                   > vma->vm_end )
-                                       return -EINVAL;
-
-                               if ((rc = direct_remap_pfn_range(
-                                       vma,
-                                       msg[j].va&PAGE_MASK, 
-                                       msg[j].mfn, 
-                                       msg[j].npages<<PAGE_SHIFT, 
-                                       vma->vm_page_prot,
-                                       mmapcmd.dom)) < 0)
-                                       return rc;
-                       }
-               }
-               ret = 0;
+               if (copy_from_user(&msg, p, sizeof(msg)))
+                       return -EFAULT;
+
+               down_read(&mm->mmap_sem);
+
+               vma = find_vma(mm, msg.va);
+               rc = -EINVAL;
+               if (!vma || (msg.va != vma->vm_start) || vma->vm_private_data)
+                       goto mmap_out;
+
+               /* Mapping is a one-shot operation per vma. */
+               vma->vm_private_data = (void *)1;
+
+               va = vma->vm_start;
+
+               for (i = 0; i < mmapcmd.num; i++, p++) {
+                       rc = -EFAULT;
+                       if (copy_from_user(&msg, p, sizeof(msg)))
+                               goto mmap_out;
+
+                       /* Do not allow range to wrap the address space. */
+                       rc = -EINVAL;
+                       if ((msg.npages > (INT_MAX >> PAGE_SHIFT)) ||
+                           ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
+                               goto mmap_out;
+
+                       /* Range chunks must be contiguous in va space. */
+                       if ((msg.va != va) ||
+                           ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
+                               goto mmap_out;
+
+                       if ((rc = direct_remap_pfn_range(
+                               vma,
+                               msg.va & PAGE_MASK, 
+                               msg.mfn, 
+                               msg.npages << PAGE_SHIFT, 
+                               vma->vm_page_prot,
+                               mmapcmd.dom)) < 0)
+                               goto mmap_out;
+
+                       p++;
+                       va += msg.npages << PAGE_SHIFT;
+               }
+
+               rc = 0;
+
+       mmap_out:
+               up_read(&mm->mmap_sem);
+               ret = rc;
        }
        break;
 
        case IOCTL_PRIVCMD_MMAPBATCH: {
                privcmd_mmapbatch_t m;
-               struct vm_area_struct *vma = NULL;
+               struct mm_struct *mm = current->mm;
+               struct vm_area_struct *vma;
                xen_pfn_t __user *p;
                unsigned long addr, mfn;
                int i;
@@ -161,37 +178,33 @@ static int privcmd_ioctl(struct inode *i
                if (!is_initial_xendomain())
                        return -EPERM;
 
-               if (copy_from_user(&m, udata, sizeof(m))) {
-                       ret = -EFAULT;
-                       goto batch_err;
-               }
-
-               if (m.dom == DOMID_SELF) {
-                       ret = -EINVAL;
-                       goto batch_err;
-               }
-
-               vma = find_vma(current->mm, m.addr);
-               if (!vma) {
-                       ret = -EINVAL;
-                       goto batch_err;
-               }
-
-               if (m.addr > PAGE_OFFSET) {
-                       ret = -EFAULT;
-                       goto batch_err;
-               }
-
-               if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) {
-                       ret = -EFAULT;
-                       goto batch_err;
-               }
+               if (copy_from_user(&m, udata, sizeof(m)))
+                       return -EFAULT;
+
+               if ((m.num <= 0) || (m.num > (INT_MAX >> PAGE_SHIFT)))
+                       return -EINVAL;
+
+               down_read(&mm->mmap_sem);
+
+               vma = find_vma(mm, m.addr);
+               if (!vma ||
+                   (m.addr != vma->vm_start) ||
+                   ((m.addr + (m.num<<PAGE_SHIFT)) != vma->vm_end) ||
+                   vma->vm_private_data) {
+                       up_read(&mm->mmap_sem);
+                       return -EINVAL;
+               }
+
+               /* Mapping is a one-shot operation per vma. */
+               vma->vm_private_data = (void *)1;
 
                p = m.arr;
                addr = m.addr;
                for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
-                       if (get_user(mfn, p))
+                       if (get_user(mfn, p)) {
+                               up_read(&mm->mmap_sem);
                                return -EFAULT;
+                       }
 
                        ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
                                                     mfn, PAGE_SIZE,
@@ -200,15 +213,8 @@ static int privcmd_ioctl(struct inode *i
                                put_user(0xF0000000 | mfn, p);
                }
 
+               up_read(&mm->mmap_sem);
                ret = 0;
-               break;
-
-       batch_err:
-               printk("batch_err ret=%d vma=%p addr=%lx "
-                      "num=%d arr=%p %lx-%lx\n", 
-                      ret, vma, (unsigned long)m.addr, m.num, m.arr,
-                      vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
-               break;
        }
        break;
 
@@ -221,10 +227,27 @@ static int privcmd_ioctl(struct inode *i
 }
 
 #ifndef HAVE_ARCH_PRIVCMD_MMAP
+static struct page *privcmd_nopage(struct vm_area_struct *vma,
+                                  unsigned long address,
+                                  int *type)
+{
+       return NOPAGE_SIGBUS;
+}
+
+static struct vm_operations_struct privcmd_vm_ops = {
+       .nopage = privcmd_nopage
+};
+
 static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
 {
+       /* Unsupported for auto-translate guests. */
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               return -ENOSYS;
+
        /* DONTCOPY is essential for Xen as copy_page_range is broken. */
        vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+       vma->vm_ops = &privcmd_vm_ops;
+       vma->vm_private_data = NULL;
 
        return 0;
 }
diff -r b92104e0cf08 -r e1f3af226a8e linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c  Mon Oct 09 10:18:11 2006 +0100
+++ b/linux-2.6-xen-sparse/mm/memory.c  Mon Oct 09 10:56:17 2006 +0100
@@ -390,7 +390,7 @@ struct page *vm_normal_page(struct vm_ar
 
        if (vma->vm_flags & VM_PFNMAP) {
                unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
-               if ((pfn == vma->vm_pgoff + off) || !pfn_valid(pfn))
+               if (pfn == vma->vm_pgoff + off)
                        return NULL;
                if (!is_cow_mapping(vma->vm_flags))
                        return NULL;
@@ -405,7 +405,8 @@ struct page *vm_normal_page(struct vm_ar
         * Remove this test eventually!
         */
        if (unlikely(!pfn_valid(pfn))) {
-               print_bad_pte(vma, pte, addr);
+               if (!(vma->vm_flags & VM_RESERVED))
+                       print_bad_pte(vma, pte, addr);
                return NULL;
        }
 
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
 |