Misaligned access wrt to physical block size is a major performance
killer. Alignment offsets is for devices which are 'naturally
misaligned', e.g. to compensate for stuff like DOS 63S/track
partitioning. Accept a BLKTAP_DEVICE_FLAG_PSZ here, and expect
userspace to figure out the physical details.
Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
---
drivers/block/blktap/device.c | 37 +++++++++++++++++++++++++++++++++----
drivers/block/blktap/ring.c | 4 ++++
include/linux/blktap.h | 3 +++
3 files changed, 40 insertions(+), 4 deletions(-)
diff --git a/drivers/block/blktap/device.c b/drivers/block/blktap/device.c
index fcf16c9..0e8eb0a 100644
--- a/drivers/block/blktap/device.c
+++ b/drivers/block/blktap/device.c
@@ -285,10 +285,13 @@ blktap_device_configure(struct blktap *tap,
set_capacity(gd, info->capacity);
set_disk_ro(gd, !!(info->flags & BLKTAP_DEVICE_FLAG_RO));
- /* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_logical_block_size(rq, info->sector_size);
blk_queue_max_sectors(rq, 512);
+ /* Hard sector size and alignment in hardware */
+ blk_queue_physical_block_size(rq, info->phys_sector_size);
+ blk_queue_alignment_offset(rq, info->phys_sector_offset);
+
/* Each segment in a request is up to an aligned page in size. */
blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
blk_queue_max_segment_size(rq, PAGE_SIZE);
@@ -309,6 +312,7 @@ blktap_device_validate_info(struct blktap *tap,
struct blktap_device_info *info)
{
struct device *dev = tap->ring.dev;
+ unsigned int phys_mask;
int sector_order;
sector_order = ffs(info->sector_size) - 1;
@@ -321,11 +325,34 @@ blktap_device_validate_info(struct blktap *tap,
(info->capacity > ULLONG_MAX >> sector_order))
goto fail;
+ /* physical blocks default to logical ones */
+ if (!(info->flags & BLKTAP_DEVICE_FLAG_PSZ)) {
+ info->phys_sector_size = info->sector_size;
+ info->phys_sector_offset = 0;
+ }
+
+ /* phys block size is 2^n and >= logical */
+ sector_order = ilog2(info->phys_sector_size);
+ if (sector_order < 9 ||
+ info->phys_sector_size != 1U<<sector_order ||
+ info->phys_sector_size < info->sector_size)
+ goto fail;
+
+ /* alignment offset < physical/logical */
+ phys_mask = (info->phys_sector_size /
+ info->sector_size) - 1;
+ if ((info->phys_sector_offset & ~phys_mask) != 0)
+ goto fail;
+
return 0;
fail:
- dev_err(dev, "capacity: %llu, sector-size: %u\n",
- info->capacity, info->sector_size);
+ dev_err(dev,
+ "capacity: %llu, sector-size: %u/%u, "
+ "phys-offset: %u\n",
+ info->capacity, info->sector_size,
+ info->phys_sector_size, info->phys_sector_offset);
+
return -EINVAL;
}
@@ -473,9 +500,11 @@ blktap_device_create(struct blktap *tap, struct
blktap_device_info *info)
set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
- dev_info(disk_to_dev(gd), "sector-size: %u/%u capacity: %llu\n",
+ dev_info(disk_to_dev(gd),
+ "sector-size: %u/%u phys-offset: %d capacity: %llu\n",
queue_logical_block_size(rq),
queue_physical_block_size(rq),
+ queue_alignment_offset(rq),
(unsigned long long)get_capacity(gd));
return 0;
diff --git a/drivers/block/blktap/ring.c b/drivers/block/blktap/ring.c
index 134583d..25bd311 100644
--- a/drivers/block/blktap/ring.c
+++ b/drivers/block/blktap/ring.c
@@ -446,6 +446,7 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp,
size_t base_sz, sz;
mask = BLKTAP_DEVICE_FLAG_RO;
+ mask |= BLKTAP_DEVICE_FLAG_PSZ;
memset(&info, 0, sizeof(info));
sz = base_sz = BLKTAP_INFO_SIZE_AT(flags);
@@ -453,6 +454,9 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp,
if (copy_from_user(&info, ptr, sz))
return -EFAULT;
+ if ((info.flags & BLKTAP_DEVICE_FLAG_PSZ) != 0)
+ sz = BLKTAP_INFO_SIZE_AT(phys_sector_offset);
+
if (sz > base_sz)
if (copy_from_user(&info, ptr, sz))
return -EFAULT;
diff --git a/include/linux/blktap.h b/include/linux/blktap.h
index 2c3c924..9a280d9 100644
--- a/include/linux/blktap.h
+++ b/include/linux/blktap.h
@@ -17,6 +17,7 @@
#define BLKTAP_IOCTL_REMOVE_DEVICE 207
#define BLKTAP_DEVICE_FLAG_RO 0x00000001UL /* disk is R/O */
+#define BLKTAP_DEVICE_FLAG_PSZ 0x00000002UL /* physical sector size */
struct blktap_info {
unsigned int ring_major;
@@ -28,6 +29,8 @@ struct blktap_device_info {
unsigned long long capacity;
unsigned int sector_size;
unsigned long flags;
+ unsigned int phys_sector_size;
+ unsigned int phys_sector_offset;
};
/*
--
1.7.0.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|