# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID e1f3af226a8e8cb32de079b283cdcac718d3e076
# Parent b92104e0cf08256a818a7d902326898583f13ad8
[LINUX] Various fixes for mmapping I/O and foreign memory pages.
First, auto-translate guests can use remap_pfn_range() rather than
direct_remap_pfn_range(). This actually works better because
remap_pfn_range() can legitimately assert VM_PFNMAP (this patch
removes this flag for direct_remap_pfn_range().
There are various cleanups and fixes to the privcmd interface:
1. VMAs should be searched and used under the mmap semaphore
2. Mapping should be single shot (since cirect_remap_pfn_range()
expects the PTEs to be empty when it is called).
3. Demand-fault population of the privcmd vma should be disallowed.
4. Various others, including a more thorough check of input args.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c | 8 -
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 167 +++++++++++----------
linux-2.6-xen-sparse/mm/memory.c | 5
3 files changed, 104 insertions(+), 76 deletions(-)
diff -r b92104e0cf08 -r e1f3af226a8e
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Mon Oct 09 10:18:11
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Mon Oct 09 10:56:17
2006 +0100
@@ -28,6 +28,8 @@ static int direct_remap_area_pte_fn(pte_
void *data)
{
mmu_update_t **v = (mmu_update_t **)data;
+
+ BUG_ON(!pte_none(*pte));
(*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pmd_page)) <<
PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
@@ -110,11 +112,13 @@ int direct_remap_pfn_range(struct vm_are
pgprot_t prot,
domid_t domid)
{
- /* Same as remap_pfn_range(). */
- vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return remap_pfn_range(vma, address, mfn, size, prot);
if (domid == DOMID_SELF)
return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_RESERVED;
vma->vm_mm->context.has_foreign_mappings = 1;
diff -r b92104e0cf08 -r e1f3af226a8e
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Mon Oct 09
10:18:11 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Mon Oct 09
10:56:17 2006 +0100
@@ -100,10 +100,12 @@ static int privcmd_ioctl(struct inode *i
break;
case IOCTL_PRIVCMD_MMAP: {
-#define PRIVCMD_MMAP_SZ 32
privcmd_mmap_t mmapcmd;
- privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ];
+ privcmd_mmap_entry_t msg;
privcmd_mmap_entry_t __user *p;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long va;
int i, rc;
if (!is_initial_xendomain())
@@ -113,47 +115,62 @@ static int privcmd_ioctl(struct inode *i
return -EFAULT;
p = mmapcmd.entry;
-
- for (i = 0; i < mmapcmd.num;
- i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) {
- int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
- PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
-
- if (copy_from_user(&msg, p,
- n*sizeof(privcmd_mmap_entry_t)))
- return -EFAULT;
-
- for (j = 0; j < n; j++) {
- struct vm_area_struct *vma =
- find_vma( current->mm, msg[j].va );
-
- if (!vma)
- return -EINVAL;
-
- if (msg[j].va > PAGE_OFFSET)
- return -EINVAL;
-
- if ((msg[j].va + (msg[j].npages << PAGE_SHIFT))
- > vma->vm_end )
- return -EINVAL;
-
- if ((rc = direct_remap_pfn_range(
- vma,
- msg[j].va&PAGE_MASK,
- msg[j].mfn,
- msg[j].npages<<PAGE_SHIFT,
- vma->vm_page_prot,
- mmapcmd.dom)) < 0)
- return rc;
- }
- }
- ret = 0;
+ if (copy_from_user(&msg, p, sizeof(msg)))
+ return -EFAULT;
+
+ down_read(&mm->mmap_sem);
+
+ vma = find_vma(mm, msg.va);
+ rc = -EINVAL;
+ if (!vma || (msg.va != vma->vm_start) || vma->vm_private_data)
+ goto mmap_out;
+
+ /* Mapping is a one-shot operation per vma. */
+ vma->vm_private_data = (void *)1;
+
+ va = vma->vm_start;
+
+ for (i = 0; i < mmapcmd.num; i++, p++) {
+ rc = -EFAULT;
+ if (copy_from_user(&msg, p, sizeof(msg)))
+ goto mmap_out;
+
+ /* Do not allow range to wrap the address space. */
+ rc = -EINVAL;
+ if ((msg.npages > (INT_MAX >> PAGE_SHIFT)) ||
+ ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
+ goto mmap_out;
+
+ /* Range chunks must be contiguous in va space. */
+ if ((msg.va != va) ||
+ ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
+ goto mmap_out;
+
+ if ((rc = direct_remap_pfn_range(
+ vma,
+ msg.va & PAGE_MASK,
+ msg.mfn,
+ msg.npages << PAGE_SHIFT,
+ vma->vm_page_prot,
+ mmapcmd.dom)) < 0)
+ goto mmap_out;
+
+ p++;
+ va += msg.npages << PAGE_SHIFT;
+ }
+
+ rc = 0;
+
+ mmap_out:
+ up_read(&mm->mmap_sem);
+ ret = rc;
}
break;
case IOCTL_PRIVCMD_MMAPBATCH: {
privcmd_mmapbatch_t m;
- struct vm_area_struct *vma = NULL;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
xen_pfn_t __user *p;
unsigned long addr, mfn;
int i;
@@ -161,37 +178,33 @@ static int privcmd_ioctl(struct inode *i
if (!is_initial_xendomain())
return -EPERM;
- if (copy_from_user(&m, udata, sizeof(m))) {
- ret = -EFAULT;
- goto batch_err;
- }
-
- if (m.dom == DOMID_SELF) {
- ret = -EINVAL;
- goto batch_err;
- }
-
- vma = find_vma(current->mm, m.addr);
- if (!vma) {
- ret = -EINVAL;
- goto batch_err;
- }
-
- if (m.addr > PAGE_OFFSET) {
- ret = -EFAULT;
- goto batch_err;
- }
-
- if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) {
- ret = -EFAULT;
- goto batch_err;
- }
+ if (copy_from_user(&m, udata, sizeof(m)))
+ return -EFAULT;
+
+ if ((m.num <= 0) || (m.num > (INT_MAX >> PAGE_SHIFT)))
+ return -EINVAL;
+
+ down_read(&mm->mmap_sem);
+
+ vma = find_vma(mm, m.addr);
+ if (!vma ||
+ (m.addr != vma->vm_start) ||
+ ((m.addr + (m.num<<PAGE_SHIFT)) != vma->vm_end) ||
+ vma->vm_private_data) {
+ up_read(&mm->mmap_sem);
+ return -EINVAL;
+ }
+
+ /* Mapping is a one-shot operation per vma. */
+ vma->vm_private_data = (void *)1;
p = m.arr;
addr = m.addr;
for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
- if (get_user(mfn, p))
+ if (get_user(mfn, p)) {
+ up_read(&mm->mmap_sem);
return -EFAULT;
+ }
ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
mfn, PAGE_SIZE,
@@ -200,15 +213,8 @@ static int privcmd_ioctl(struct inode *i
put_user(0xF0000000 | mfn, p);
}
+ up_read(&mm->mmap_sem);
ret = 0;
- break;
-
- batch_err:
- printk("batch_err ret=%d vma=%p addr=%lx "
- "num=%d arr=%p %lx-%lx\n",
- ret, vma, (unsigned long)m.addr, m.num, m.arr,
- vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
- break;
}
break;
@@ -221,10 +227,27 @@ static int privcmd_ioctl(struct inode *i
}
#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static struct page *privcmd_nopage(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ return NOPAGE_SIGBUS;
+}
+
+static struct vm_operations_struct privcmd_vm_ops = {
+ .nopage = privcmd_nopage
+};
+
static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
{
+ /* Unsupported for auto-translate guests. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -ENOSYS;
+
/* DONTCOPY is essential for Xen as copy_page_range is broken. */
vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+ vma->vm_ops = &privcmd_vm_ops;
+ vma->vm_private_data = NULL;
return 0;
}
diff -r b92104e0cf08 -r e1f3af226a8e linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c Mon Oct 09 10:18:11 2006 +0100
+++ b/linux-2.6-xen-sparse/mm/memory.c Mon Oct 09 10:56:17 2006 +0100
@@ -390,7 +390,7 @@ struct page *vm_normal_page(struct vm_ar
if (vma->vm_flags & VM_PFNMAP) {
unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
- if ((pfn == vma->vm_pgoff + off) || !pfn_valid(pfn))
+ if (pfn == vma->vm_pgoff + off)
return NULL;
if (!is_cow_mapping(vma->vm_flags))
return NULL;
@@ -405,7 +405,8 @@ struct page *vm_normal_page(struct vm_ar
* Remove this test eventually!
*/
if (unlikely(!pfn_valid(pfn))) {
- print_bad_pte(vma, pte, addr);
+ if (!(vma->vm_flags & VM_RESERVED))
+ print_bad_pte(vma, pte, addr);
return NULL;
}
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|