WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH] [PVOPS] fix gntdev on PAE

To: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH] [PVOPS] fix gntdev on PAE
From: Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Date: Tue, 01 Jun 2010 09:31:36 -0700
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>, Gerd Hoffmann <kraxel@xxxxxxxxxx>
Delivery-date: Tue, 01 Jun 2010 10:48:11 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <alpine.DEB.2.00.1006011036520.3028@kaball-desktop>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <alpine.DEB.2.00.1002011504070.24837@kaball-desktop> <alpine.DEB.2.00.1002011544380.24837@kaball-desktop> <4B71DA53.1080404@xxxxxxxx> <alpine.DEB.2.00.1002101219120.12653@kaball-desktop> <4BFFFD7C.5080708@xxxxxxxx> <alpine.DEB.2.00.1006011036520.3028@kaball-desktop>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.9) Gecko/20100430 Fedora/3.0.4-2.fc12 Lightning/1.0b2pre Thunderbird/3.0.4
On 06/01/2010 02:38 AM, Stefano Stabellini wrote:
> On Fri, 28 May 2010, Jeremy Fitzhardinge wrote:
>   
>> I managed to catch a lockdep problem in gntdev, which may be the same as
>> before:
>>
>> BUG: sleeping function called from invalid context at kernel/rwsem.c:21
>> in_atomic(): 1, irqs_disabled(): 0, pid: 4091, name: qemu-dm
>> 2 locks held by qemu-dm/4091:
>>  #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff810bb50f>] sys_munmap+0x33/0x58
>>  #1:  (rcu_read_lock){.+.+..}, at: [<ffffffff810cd63a>] 
>> __mmu_notifier_invalidate_range_start+0x0/0xc7
>> Pid: 4091, comm: qemu-dm Not tainted 2.6.32.13 #23
>> Call Trace:
>>  [<ffffffff8106705b>] ? __debug_show_held_locks+0x22/0x24
>>  [<ffffffff81039522>] __might_sleep+0x123/0x127
>>  [<ffffffff810a8536>] ? release_pages+0xd2/0x1e7
>>  [<ffffffff81498849>] down_read+0x1f/0x57
>>  [<ffffffff81010142>] ? check_events+0x12/0x20
>>  [<ffffffff810a8536>] ? release_pages+0xd2/0x1e7
>>  [<ffffffff810cd63a>] ? __mmu_notifier_invalidate_range_start+0x0/0xc7
>>  [<ffffffff8123e069>] mn_invl_range_start+0x32/0x118
>>  [<ffffffff810cd69c>] __mmu_notifier_invalidate_range_start+0x62/0xc7
>>  [<ffffffff810cd63a>] ? __mmu_notifier_invalidate_range_start+0x0/0xc7
>>  [<ffffffff810b54bc>] unmap_vmas+0x8c/0x91a
>>  [<ffffffff810ba363>] unmap_region+0xda/0x178
>>  [<ffffffff810bb472>] do_munmap+0x2ae/0x318
>>  [<ffffffff810bb51d>] sys_munmap+0x41/0x58
>>  [<ffffffff81013b82>] system_call_fastpath+0x16/0x1b
>>
>>
>> The problem is that mn_invl_range_start does a down_read(), but it is
>> called from __mmu_notifier_invalidate_range_start(), which does an
>> rcu_read_lock, which has the side-effect of disabling preemption.
>>
>> The mmu notifier code seems to have always used rcu_read_lock this way,
>> so I guess this bug has always been there.  It's not immediately obvious
>> how to fix it.
>>
>> Thoughts?
>>     
> What about turning the semaphore into a rwlock?
> Performances shouldn't matter in this case.
> Something like this:
>   

The problem is that the rcu lock disables preemption, so anything inside
it must be non-scheduling.  So it would need to be a spinlock type
thing, I think.

    J

> ---
>
>
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index ddc59cc..91846b9 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -28,7 +28,7 @@
>  #include <linux/types.h>
>  #include <linux/uaccess.h>
>  #include <linux/sched.h>
> -#include <linux/rwsem.h>
> +#include <linux/spinlock_types.h>
>  
>  #include <xen/xen.h>
>  #include <xen/grant_table.h>
> @@ -51,7 +51,7 @@ struct gntdev_priv {
>       struct list_head maps;
>       uint32_t used;
>       uint32_t limit;
> -     struct rw_semaphore sem;
> +     rwlock_t rwlock;
>       struct mm_struct *mm;
>       struct mmu_notifier mn;
>  };
> @@ -277,7 +277,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
>       unsigned long mstart, mend;
>       int err;
>  
> -     down_read(&priv->sem);
> +     read_lock(&priv->rwlock);
>       list_for_each_entry(map, &priv->maps, next) {
>               if (!map->vma)
>                       continue;
> @@ -299,7 +299,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
>                                       (mend - mstart) >> PAGE_SHIFT);
>               WARN_ON(err);
>       }
> -     up_read(&priv->sem);
> +     read_unlock(&priv->rwlock);
>  }
>  
>  static void mn_invl_page(struct mmu_notifier *mn,
> @@ -316,7 +316,7 @@ static void mn_release(struct mmu_notifier *mn,
>       struct grant_map *map;
>       int err;
>  
> -     down_read(&priv->sem);
> +     read_lock(&priv->rwlock);
>       list_for_each_entry(map, &priv->maps, next) {
>               if (!map->vma)
>                       continue;
> @@ -327,7 +327,7 @@ static void mn_release(struct mmu_notifier *mn,
>               err = unmap_grant_pages(map, 0, map->count);
>               WARN_ON(err);
>       }
> -     up_read(&priv->sem);
> +     read_unlock(&priv->rwlock);
>  }
>  
>  struct mmu_notifier_ops gntdev_mmu_ops = {
> @@ -347,7 +347,7 @@ static int gntdev_open(struct inode *inode, struct file 
> *flip)
>               return -ENOMEM;
>  
>       INIT_LIST_HEAD(&priv->maps);
> -     init_rwsem(&priv->sem);
> +     priv->rwlock = RW_LOCK_UNLOCKED;
>       priv->limit = limit;
>  
>       priv->mm = get_task_mm(current);
> @@ -375,13 +375,13 @@ static int gntdev_release(struct inode *inode, struct 
> file *flip)
>       if (debug)
>               printk("%s: priv %p\n", __FUNCTION__, priv);
>  
> -     down_write(&priv->sem);
> +     write_lock(&priv->rwlock);
>       while (!list_empty(&priv->maps)) {
>               map = list_entry(priv->maps.next, struct grant_map, next);
>               err = gntdev_del_map(map);
>               WARN_ON(err);
>       }
> -     up_write(&priv->sem);
> +     write_unlock(&priv->rwlock);
>       mmu_notifier_unregister(&priv->mn, priv->mm);
>       kfree(priv);
>       return 0;
> @@ -404,7 +404,7 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv 
> *priv,
>       if (unlikely(op.count > priv->limit))
>               return -EINVAL;
>  
> -     down_write(&priv->sem);
> +     write_lock(&priv->rwlock);
>       err = -ENOMEM;
>       map = gntdev_add_map(priv, op.count);
>       if (!map)
> @@ -417,13 +417,13 @@ static long gntdev_ioctl_map_grant_ref(struct 
> gntdev_priv *priv,
>       op.index = map->index << PAGE_SHIFT;
>       if (copy_to_user(u, &op, sizeof(op)) != 0)
>               goto err_free;
> -     up_write(&priv->sem);
> +     write_unlock(&priv->rwlock);
>       return 0;
>  
>  err_free:
>       gntdev_del_map(map);
>  err_unlock:
> -     up_write(&priv->sem);
> +     write_unlock(&priv->rwlock);
>       return err;
>  }
>  
> @@ -440,11 +440,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct 
> gntdev_priv *priv,
>               printk("%s: priv %p, del %d+%d\n", __FUNCTION__, priv,
>                      (int)op.index, (int)op.count);
>  
> -     down_write(&priv->sem);
> +     write_lock(&priv->rwlock);
>       map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
>       if (map)
>               err = gntdev_del_map(map);
> -     up_write(&priv->sem);
> +     write_unlock(&priv->rwlock);
>       return err;
>  }
>  
> @@ -460,16 +460,16 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct 
> gntdev_priv *priv,
>               printk("%s: priv %p, offset for vaddr %lx\n", __FUNCTION__, 
> priv,
>                      (unsigned long)op.vaddr);
>  
> -     down_read(&priv->sem);
> +     read_lock(&priv->rwlock);
>       map = gntdev_find_map_vaddr(priv, op.vaddr);
>       if (map == NULL ||
>           map->vma->vm_start != op.vaddr) {
> -             up_read(&priv->sem);
> +             read_unlock(&priv->rwlock);
>               return -EINVAL;
>       }
>       op.offset = map->index << PAGE_SHIFT;
>       op.count = map->count;
> -     up_read(&priv->sem);
> +     read_unlock(&priv->rwlock);
>  
>       if (copy_to_user(u, &op, sizeof(op)) != 0)
>               return -EFAULT;
> @@ -488,9 +488,9 @@ static long gntdev_ioctl_set_max_grants(struct 
> gntdev_priv *priv,
>       if (op.count > limit)
>               return -EINVAL;
>  
> -     down_write(&priv->sem);
> +     write_lock(&priv->rwlock);
>       priv->limit = op.count;
> -     up_write(&priv->sem);
> +     write_unlock(&priv->rwlock);
>       return 0;
>  }
>  
> @@ -538,7 +538,7 @@ static int gntdev_mmap(struct file *flip, struct 
> vm_area_struct *vma)
>               printk("%s: map %d+%d at %lx (pgoff %lx)\n", __FUNCTION__,
>                      index, count, vma->vm_start, vma->vm_pgoff);
>  
> -     down_read(&priv->sem);
> +     read_lock(&priv->rwlock);
>       map = gntdev_find_map_index(priv, index, count);
>       if (!map)
>               goto unlock_out;
> @@ -580,7 +580,7 @@ static int gntdev_mmap(struct file *flip, struct 
> vm_area_struct *vma)
>       map->is_mapped = 1;
>  
>  unlock_out:
> -     up_read(&priv->sem);
> +     read_unlock(&priv->rwlock);
>       return err;
>  }
>  
>
>   


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel