Hi Jan.
I think this one resulted in blkfront_remove (from xenbus) racing
against blkif_release (run from close(bdev)).
Both running for their respective kfree(info) vs. subsequent
dereferences. Or just to leak info entirely.
We only had blkfront_closing() on either thread before, which serializes
around the bd_mutex.
I'm currently thinking that info now rather wants a refcount of its own.
Ideally replacing is_ready entirely, maybe.
Any thoughts?
Daniel
On Mon, 2010-01-18 at 05:19 -0500, Jan Beulich wrote:
> Prevent prematurely freeing 'struct blkfront_info' instances (when the
> xenbus data structures are gone, but the Linux ones are still needed).
>
> Prevent adding a disk with the same (major, minor) [and hence the same
> name and sysfs entries, which leads to oopses] when the previous
> instance wasn't fully de-allocated yet.
>
> This still doesn't address all issues resulting from forced detach:
> I/O submitted after the detach still blocks forever, likely preventing
> subsequent un-mounting from completing. It's not clear to me (not
> knowing much about the block layer) how this can be avoided.
>
> This also doesn't address issues with duplicate device creation caused
> by re-using the hdXX and sdXX name spaces - this would require
> synchronization with the respective native code.
>
> As usual, written against 2.6.32.3 and made apply to the 2.6.18
> tree without further testing.
>
> As I believe that pv-ops Xen has the same issue, I'm also attaching
> a respective (compile tested only) patch against 2.6.33-rc4.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
>
> --- sle11-2010-01-12.orig/drivers/xen/blkfront/blkfront.c 2009-06-04
> 10:47:04.000000000 +0200
> +++ sle11-2010-01-12/drivers/xen/blkfront/blkfront.c 2010-01-15
> 16:47:42.000000000 +0100
> @@ -426,7 +426,10 @@ static int blkfront_remove(struct xenbus
>
> blkif_free(info, 0);
>
> - kfree(info);
> + if(info->users == 0)
> + kfree(info);
> + else
> + info->is_ready = -1;
>
> return 0;
> }
> @@ -488,6 +491,9 @@ static void blkif_restart_queue_callback
> int blkif_open(struct inode *inode, struct file *filep)
> {
> struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
> +
> + if(info->is_ready < 0)
> + return -ENODEV;
> info->users++;
> return 0;
> }
> @@ -504,7 +510,10 @@ int blkif_release(struct inode *inode, s
> struct xenbus_device * dev = info->xbdev;
> enum xenbus_state state =
> xenbus_read_driver_state(dev->otherend);
>
> - if (state == XenbusStateClosing && info->is_ready)
> + if(info->is_ready < 0) {
> + blkfront_closing(dev);
> + kfree(info);
> + } else if (state == XenbusStateClosing && info->is_ready)
> blkfront_closing(dev);
> }
> return 0;
> @@ -894,7 +903,7 @@ int blkfront_is_ready(struct xenbus_devi
> {
> struct blkfront_info *info = dev->dev.driver_data;
>
> - return info->is_ready;
> + return info->is_ready > 0;
> }
>
>
> --- sle11-2010-01-12.orig/drivers/xen/blkfront/block.h 2009-06-04
> 10:47:04.000000000 +0200
> +++ sle11-2010-01-12/drivers/xen/blkfront/block.h 2010-01-18
> 08:56:41.000000000 +0100
> @@ -78,6 +78,7 @@ struct xlbd_major_info
> int index;
> int usage;
> struct xlbd_type_info *type;
> + struct xlbd_minor_state *minors;
> };
>
> struct blk_shadow {
> --- sle11-2010-01-12.orig/drivers/xen/blkfront/vbd.c 2009-06-04
> 10:47:04.000000000 +0200
> +++ sle11-2010-01-12/drivers/xen/blkfront/vbd.c 2010-01-18
> 08:56:32.000000000 +0100
> @@ -48,6 +48,12 @@
> #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
> #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
>
> +struct xlbd_minor_state {
> + unsigned int nr;
> + unsigned long *bitmap;
> + spinlock_t lock;
> +};
> +
> /*
> * For convenience we distinguish between ide, scsi and 'other' (i.e.,
> * potentially combinations of the two) in the naming scheme and in a few
> other
> @@ -97,6 +103,8 @@ static struct xlbd_major_info *major_inf
> #define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ...
> XLBD_MAJOR_VBD_START - 1
> #define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START +
> NUM_VBD_MAJORS - 1
>
> +#define XLBD_MAJOR_VBD_ALT(idx) ((idx) ^ XLBD_MAJOR_VBD_START ^
> (XLBD_MAJOR_VBD_START + 1))
> +
> static struct block_device_operations xlvbd_block_fops =
> {
> .owner = THIS_MODULE,
> @@ -114,6 +122,7 @@ static struct xlbd_major_info *
> xlbd_alloc_major_info(int major, int minor, int index)
> {
> struct xlbd_major_info *ptr;
> + struct xlbd_minor_state *minors;
> int do_register;
>
> ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
> @@ -121,6 +130,22 @@ xlbd_alloc_major_info(int major, int min
> return NULL;
>
> ptr->major = major;
> + minors = kmalloc(sizeof(*minors), GFP_KERNEL);
> + if (minors == NULL) {
> + kfree(ptr);
> + return NULL;
> + }
> +
> + minors->bitmap = kzalloc(BITS_TO_LONGS(256) * sizeof(*minors->bitmap),
> + GFP_KERNEL);
> + if (minors->bitmap == NULL) {
> + kfree(minors);
> + kfree(ptr);
> + return NULL;
> + }
> +
> + spin_lock_init(&minors->lock);
> + minors->nr = 256;
> do_register = 1;
>
> switch (index) {
> @@ -143,13 +168,19 @@ xlbd_alloc_major_info(int major, int min
> * if someone already registered block major 202,
> * don't try to register it again
> */
> - if (major_info[XLBD_MAJOR_VBD_START] != NULL)
> + if (major_info[XLBD_MAJOR_VBD_ALT(index)] != NULL) {
> + kfree(minors->bitmap);
> + kfree(minors);
> + minors = major_info[XLBD_MAJOR_VBD_ALT(index)]->minors;
> do_register = 0;
> + }
> break;
> }
>
> if (do_register) {
> if (register_blkdev(ptr->major, ptr->type->devname)) {
> + kfree(minors->bitmap);
> + kfree(minors);
> kfree(ptr);
> return NULL;
> }
> @@ -157,6 +188,7 @@ xlbd_alloc_major_info(int major, int min
> printk("xen-vbd: registered block device major %i\n",
> ptr->major);
> }
>
> + ptr->minors = minors;
> major_info[index] = ptr;
> return ptr;
> }
> @@ -209,6 +241,61 @@ xlbd_put_major_info(struct xlbd_major_in
> }
>
> static int
> +xlbd_reserve_minors(struct xlbd_major_info *mi, unsigned int minor,
> + unsigned int nr_minors)
> +{
> + struct xlbd_minor_state *ms = mi->minors;
> + unsigned int end = minor + nr_minors;
> + int rc;
> +
> + if (end > ms->nr) {
> + unsigned long *bitmap, *old;
> +
> + bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
> + GFP_KERNEL);
> + if (bitmap == NULL)
> + return -ENOMEM;
> +
> + spin_lock(&ms->lock);
> + if (end > ms->nr) {
> + old = ms->bitmap;
> + memcpy(bitmap, ms->bitmap,
> + BITS_TO_LONGS(ms->nr) * sizeof(*bitmap));
> + ms->bitmap = bitmap;
> + ms->nr = BITS_TO_LONGS(end) * BITS_PER_LONG;
> + } else
> + old = bitmap;
> + spin_unlock(&ms->lock);
> + kfree(old);
> + }
> +
> + spin_lock(&ms->lock);
> + if (find_next_bit(ms->bitmap, end, minor) >= end) {
> + for (; minor < end; ++minor)
> + __set_bit(minor, ms->bitmap);
> + rc = 0;
> + } else
> + rc = -EBUSY;
> + spin_unlock(&ms->lock);
> +
> + return rc;
> +}
> +
> +static void
> +xlbd_release_minors(struct xlbd_major_info *mi, unsigned int minor,
> + unsigned int nr_minors)
> +{
> + struct xlbd_minor_state *ms = mi->minors;
> + unsigned int end = minor + nr_minors;
> +
> + BUG_ON(end > ms->nr);
> + spin_lock(&ms->lock);
> + for (; minor < end; ++minor)
> + __clear_bit(minor, ms->bitmap);
> + spin_unlock(&ms->lock);
> +}
> +
> +static int
> xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
> {
> request_queue_t *rq;
> @@ -285,9 +372,14 @@ xlvbd_add(blkif_sector_t capacity, int v
> if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
> nr_minors = 1 << mi->type->partn_shift;
>
> + err = xlbd_reserve_minors(mi, minor, nr_minors);
> + if (err)
> + goto out;
> + err = -ENODEV;
> +
> gd = alloc_disk(nr_minors);
> if (gd == NULL)
> - goto out;
> + goto release;
>
> offset = mi->index * mi->type->disks_per_major +
> (minor >> mi->type->partn_shift);
> @@ -326,7 +418,7 @@ xlvbd_add(blkif_sector_t capacity, int v
>
> if (xlvbd_init_blk_queue(gd, sector_size)) {
> del_gendisk(gd);
> - goto out;
> + goto release;
> }
>
> info->rq = gd->queue;
> @@ -346,6 +438,8 @@ xlvbd_add(blkif_sector_t capacity, int v
>
> return 0;
>
> + release:
> + xlbd_release_minors(mi, minor, nr_minors);
> out:
> if (mi)
> xlbd_put_major_info(mi);
> @@ -356,14 +450,19 @@ xlvbd_add(blkif_sector_t capacity, int v
> void
> xlvbd_del(struct blkfront_info *info)
> {
> + unsigned int minor, nr_minors;
> +
> if (info->mi == NULL)
> return;
>
> BUG_ON(info->gd == NULL);
> + minor = info->gd->first_minor;
> + nr_minors = info->gd->minors;
> del_gendisk(info->gd);
> put_disk(info->gd);
> info->gd = NULL;
>
> + xlbd_release_minors(info->mi, minor, nr_minors);
> xlbd_put_major_info(info->mi);
> info->mi = NULL;
>
>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|