# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1240922662 -3600
# Node ID 978499ee4f39e277c9617f0aef5e34ea5ef40bf9
# Parent cb25b9d5a594a1d48594b6552a4f30be3575face
linux/blktap: fix vma_close() for partial munmap.
vm_area_struct::vm_private_data is used
by get_user_pages() so that we can't override
it. So in order to make blktap work, set it
to a array of struct page*.
Without mm->mmap_sem, virtual mapping can be changed.
so remembering vma which was passed to mmap callback
is bogus because later the vma can be freed or changed.
So don't remember vma and put necessary infomations into
tap_blkif_t. and use find_vma() to get necessary vma's.
Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
---
drivers/xen/blktap/blktap.c | 160 ++++++++++++++++++++++++++++----------------
1 files changed, 104 insertions(+), 56 deletions(-)
diff -r cb25b9d5a594 -r 978499ee4f39 drivers/xen/blktap/blktap.c
--- a/drivers/xen/blktap/blktap.c Tue Apr 28 13:43:46 2009 +0100
+++ b/drivers/xen/blktap/blktap.c Tue Apr 28 13:44:22 2009 +0100
@@ -99,7 +99,7 @@ typedef struct domid_translate_ext {
/*Data struct associated with each of the tapdisk devices*/
typedef struct tap_blkif {
- struct vm_area_struct *vma; /*Shared memory area */
+ struct mm_struct *mm; /*User address space */
unsigned long rings_vstart; /*Kernel memory mapping */
unsigned long user_vstart; /*User memory mapping */
unsigned long dev_inuse; /*One process opens device at a time. */
@@ -116,6 +116,7 @@ typedef struct tap_blkif {
[req id, idx] tuple */
blkif_t *blkif; /*Associate blkif with tapdev */
struct domid_translate_ext trans; /*Translation from domid to bus. */
+ struct page **map; /*Mapping page */
} tap_blkif_t;
static struct tap_blkif *tapfds[MAX_TAP_DEV];
@@ -293,10 +294,6 @@ static inline int OFFSET_TO_SEG(int offs
/******************************************************************
* BLKTAP VM OPS
*/
-struct tap_vma_priv {
- tap_blkif_t *info;
- struct page *map[];
-};
static struct page *blktap_nopage(struct vm_area_struct *vma,
unsigned long address,
@@ -315,11 +312,10 @@ static pte_t blktap_clear_pte(struct vm_
pte_t *ptep, int is_fullmm)
{
pte_t copy;
- tap_blkif_t *info;
+ tap_blkif_t *info = NULL;
int offset, seg, usr_idx, pending_idx, mmap_idx;
unsigned long uvstart;
unsigned long kvaddr;
- struct tap_vma_priv *priv;
struct page *pg;
struct grant_handle_pair *khandle;
struct gnttab_unmap_grant_ref unmap[2];
@@ -338,12 +334,9 @@ static pte_t blktap_clear_pte(struct vm_
return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
ptep, is_fullmm);
- priv = vma->vm_private_data;
-
/* TODO Should these be changed to if statements? */
BUG_ON(!info);
BUG_ON(!info->idx_map);
- BUG_ON(!priv);
offset = (int) ((uvaddr - uvstart) >> PAGE_SHIFT);
usr_idx = OFFSET_TO_USR_IDX(offset);
@@ -355,7 +348,7 @@ static pte_t blktap_clear_pte(struct vm_
kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg);
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
ClearPageReserved(pg);
- priv->map[offset + RING_PAGES] = NULL;
+ info->map[offset + RING_PAGES] = NULL;
khandle = &pending_handle(mmap_idx, pending_idx, seg);
@@ -396,19 +389,43 @@ static pte_t blktap_clear_pte(struct vm_
return copy;
}
+static void blktap_vma_open(struct vm_area_struct *vma)
+{
+ tap_blkif_t *info;
+ if (vma->vm_file == NULL)
+ return;
+
+ info = vma->vm_file->private_data;
+ vma->vm_private_data =
+ &info->map[(vma->vm_start - info->rings_vstart) >> PAGE_SHIFT];
+}
+
+/* tricky part
+ * When partial munmapping, ->open() is called only splitted vma which
+ * will be released soon. * See split_vma() and do_munmap() in mm/mmap.c
+ * So there is no chance to fix up vm_private_data of the end vma.
+ */
static void blktap_vma_close(struct vm_area_struct *vma)
{
- struct tap_vma_priv *priv = vma->vm_private_data;
-
- if (priv) {
- priv->info->vma = NULL;
- kfree(priv);
- }
-}
-
-struct vm_operations_struct blktap_vm_ops = {
+ tap_blkif_t *info;
+ struct vm_area_struct *next = vma->vm_next;
+
+ if (next == NULL ||
+ vma->vm_ops != next->vm_ops ||
+ vma->vm_end != next->vm_start ||
+ vma->vm_file == NULL ||
+ vma->vm_file != next->vm_file)
+ return;
+
+ info = vma->vm_file->private_data;
+ next->vm_private_data =
+ &info->map[(next->vm_start - info->rings_vstart) >> PAGE_SHIFT];
+}
+
+static struct vm_operations_struct blktap_vm_ops = {
nopage: blktap_nopage,
zap_pte: blktap_clear_pte,
+ open: blktap_vma_open,
close: blktap_vma_close,
};
@@ -455,7 +472,7 @@ static tap_blkif_t *get_next_free_dev(vo
info = tapfds[minor];
/* we could have failed a previous attempt. */
if (!info ||
- ((info->dev_inuse == 0) &&
+ ((!test_bit(0, &info->dev_inuse)) &&
(info->dev_pending == 0)) ) {
info->dev_pending = 1;
goto found;
@@ -592,7 +609,7 @@ static int blktap_open(struct inode *ino
FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
filp->private_data = info;
- info->vma = NULL;
+ info->mm = NULL;
info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS,
GFP_KERNEL);
@@ -624,8 +641,10 @@ static int blktap_release(struct inode *
info->ring_ok = 0;
smp_wmb();
- info->dev_inuse = 0;
- DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
+ mmput(info->mm);
+ info->mm = NULL;
+ kfree(info->map);
+ info->map = NULL;
/* Free the ring page. */
ClearPageReserved(virt_to_page(info->ufe_ring.sring));
@@ -643,6 +662,9 @@ static int blktap_release(struct inode *
}
info->status = CLEANSHUTDOWN;
}
+
+ clear_bit(0, &info->dev_inuse);
+ DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
return 0;
}
@@ -669,7 +691,6 @@ static int blktap_mmap(struct file *filp
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
{
int size;
- struct tap_vma_priv *priv;
tap_blkif_t *info = filp->private_data;
int ret;
@@ -706,16 +727,14 @@ static int blktap_mmap(struct file *filp
}
/* Mark this VM as containing foreign pages, and set up mappings. */
- priv = kzalloc(sizeof(*priv) + ((vma->vm_end - vma->vm_start)
- >> PAGE_SHIFT) * sizeof(*priv->map),
- GFP_KERNEL);
- if (priv == NULL) {
+ info->map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) *
+ sizeof(*info->map), GFP_KERNEL);
+ if (info->map == NULL) {
WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
goto fail;
}
- priv->info = info;
-
- vma->vm_private_data = priv;
+
+ vma->vm_private_data = info->map;
vma->vm_flags |= VM_FOREIGN;
vma->vm_flags |= VM_DONTCOPY;
@@ -723,7 +742,7 @@ static int blktap_mmap(struct file *filp
vma->vm_mm->context.has_foreign_mappings = 1;
#endif
- info->vma = vma;
+ info->mm = get_task_mm(current);
smp_wmb();
info->ring_ok = 1;
return 0;
@@ -997,6 +1016,24 @@ static void free_req(pending_req_t *req)
wake_up(&pending_free_wq);
}
+static void blktap_zap_page_range(struct mm_struct *mm,
+ unsigned long uvaddr, int nr_pages)
+{
+ unsigned long end = uvaddr + (nr_pages << PAGE_SHIFT);
+ struct vm_area_struct *vma;
+
+ vma = find_vma(mm, uvaddr);
+ while (vma && uvaddr < end) {
+ unsigned long s = max(uvaddr, vma->vm_start);
+ unsigned long e = min(end, vma->vm_end);
+
+ zap_page_range(vma, s, e - s, NULL);
+
+ uvaddr = e;
+ vma = vma->vm_next;
+ }
+}
+
static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
int tapidx)
{
@@ -1017,14 +1054,13 @@ static void fast_flush_area(pending_req_
return;
}
- mm = info->vma ? info->vma->vm_mm : NULL;
-
- if (info->vma != NULL &&
- xen_feature(XENFEAT_auto_translated_physmap)) {
+ mm = info->mm;
+
+ if (mm != NULL && xen_feature(XENFEAT_auto_translated_physmap)) {
down_write(&mm->mmap_sem);
- zap_page_range(info->vma,
- MMAP_VADDR(info->user_vstart, u_idx, 0),
- req->nr_pages << PAGE_SHIFT, NULL);
+ blktap_zap_page_range(mm,
+ MMAP_VADDR(info->user_vstart, u_idx, 0),
+ req->nr_pages);
up_write(&mm->mmap_sem);
return;
}
@@ -1075,13 +1111,12 @@ static void fast_flush_area(pending_req_
GNTTABOP_unmap_grant_ref, unmap, invcount);
BUG_ON(ret);
- if (info->vma != NULL &&
- !xen_feature(XENFEAT_auto_translated_physmap)) {
+ if (mm != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) {
if (!locked++)
down_write(&mm->mmap_sem);
- zap_page_range(info->vma,
- MMAP_VADDR(info->user_vstart, u_idx, 0),
- req->nr_pages << PAGE_SHIFT, NULL);
+ blktap_zap_page_range(mm,
+ MMAP_VADDR(info->user_vstart, u_idx, 0),
+ req->nr_pages);
}
if (locked)
@@ -1195,7 +1230,6 @@ static int blktap_read_ufe_ring(tap_blki
for (j = 0; j < pending_req->nr_pages; j++) {
unsigned long kvaddr, uvaddr;
- struct tap_vma_priv *priv = info->vma->vm_private_data;
struct page *pg;
int offset;
@@ -1205,7 +1239,7 @@ static int blktap_read_ufe_ring(tap_blki
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
ClearPageReserved(pg);
offset = (uvaddr - info->rings_vstart) >> PAGE_SHIFT;
- priv->map[offset] = NULL;
+ info->map[offset] = NULL;
}
fast_flush_area(pending_req, pending_idx, usr_idx, info->minor);
info->idx_map[usr_idx] = INVALID_REQ;
@@ -1267,7 +1301,8 @@ static int do_block_io_op(blkif_t *blkif
info = tapfds[blkif->dev_num];
- if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
+ if (blkif->dev_num > MAX_TAP_DEV || !info ||
+ !test_bit(0, &info->dev_inuse)) {
if (print_dbug) {
WPRINTK("Can't get UE info!\n");
print_dbug = 0;
@@ -1363,12 +1398,12 @@ static void dispatch_rw_block_io(blkif_t
unsigned int nseg;
int ret, i, nr_sects = 0;
tap_blkif_t *info;
- struct tap_vma_priv *priv;
blkif_request_t *target;
int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
int usr_idx;
uint16_t mmap_idx = pending_req->mem_idx;
struct mm_struct *mm;
+ struct vm_area_struct *vma = NULL;
if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV)
goto fail_response;
@@ -1413,8 +1448,7 @@ static void dispatch_rw_block_io(blkif_t
pending_req->status = BLKIF_RSP_OKAY;
pending_req->nr_pages = nseg;
op = 0;
- priv = info->vma->vm_private_data;
- mm = info->vma->vm_mm;
+ mm = info->mm;
if (!xen_feature(XENFEAT_auto_translated_physmap))
down_write(&mm->mmap_sem);
for (i = 0; i < nseg; i++) {
@@ -1497,7 +1531,7 @@ static void dispatch_rw_block_io(blkif_t
>> PAGE_SHIFT));
offset = (uvaddr - info->rings_vstart) >> PAGE_SHIFT;
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
- priv->map[offset] = pg;
+ info->map[offset] = pg;
}
} else {
for (i = 0; i < nseg; i++) {
@@ -1524,7 +1558,7 @@ static void dispatch_rw_block_io(blkif_t
offset = (uvaddr - info->rings_vstart) >> PAGE_SHIFT;
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
- priv->map[offset] = pg;
+ info->map[offset] = pg;
}
}
@@ -1542,9 +1576,23 @@ static void dispatch_rw_block_io(blkif_t
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
SetPageReserved(pg);
if (xen_feature(XENFEAT_auto_translated_physmap)) {
- ret = vm_insert_page(info->vma,
- MMAP_VADDR(info->user_vstart,
- usr_idx, i), pg);
+ unsigned long uvaddr = MMAP_VADDR(info->user_vstart,
+ usr_idx, i);
+ if (vma && uvaddr >= vma->vm_end) {
+ vma = vma->vm_next;
+ if (vma &&
+ (uvaddr < vma->vm_start ||
+ uvaddr >= vma->vm_end))
+ vma = NULL;
+ }
+ if (vma == NULL) {
+ vma = find_vma(mm, uvaddr);
+ /* this virtual area was already munmapped.
+ so skip to next page */
+ if (!vma)
+ continue;
+ }
+ ret = vm_insert_page(vma, uvaddr, pg);
if (ret) {
up_write(&mm->mmap_sem);
goto fail_flush;
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|