This patch provides turn-based I/O request scheduler.
The turn-besed I/O scheduler makes virtual block device threads to control I/O
performance based on the number of reqeusts per "turn".
--------------------------------------------------
Satoshi UCHIDA
NEC Corporation, Japan
# HG changeset patch
# User s-uchida@xxxxxxxxxxxxx
# Date 1184302442 -32400
# Node ID 216f7a2a56c03aecc08a40ed0f687874f56c89c5
# Parent aee77a9230c1b19873e60761d080af517bbfb189
[IOMGR] Add the turn-based I/O scheduler.
This module control I/O request based on the number of request per "turn".
Signed-off-by Satoshi UCHIDA <s-uchida@xxxxxxxxxxxxx>
diff -r aee77a9230c1 -r 216f7a2a56c0 drivers/xen/Kconfig
--- a/drivers/xen/Kconfig Fri Jun 15 13:33:47 2007 -0600
+++ b/drivers/xen/Kconfig Fri Jul 13 13:45:30 2007 +0900
@@ -74,6 +74,14 @@ config XEN_BLKDEV_TAP
The Backend I/O request manager framework provides interface
which makes backend driver to control I/O requests by I/O
control modules
+
+config XEN_IOSCHED_TURN
+ tristate "Trun-based I/O request scheduler"
+ depends on XEN_IOMGR
+ default m
+ help
+ The turn-based I/O scheduler controls I/O requests based on the number
+ of requests "per turn".
config XEN_NETDEV_BACKEND
tristate "Network-device backend driver"
diff -r aee77a9230c1 -r 216f7a2a56c0 drivers/xen/Makefile
--- a/drivers/xen/Makefile Fri Jun 15 13:33:47 2007 -0600
+++ b/drivers/xen/Makefile Fri Jul 13 13:45:30 2007 +0900
@@ -11,6 +11,7 @@ obj-y += util.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
obj-$(CONFIG_XEN_IOMGR) += iomgr/
+obj-$(CONFIG_XEN_IOSCHED_TURN) += iomgr/
obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/
diff -r aee77a9230c1 -r 216f7a2a56c0 drivers/xen/iomgr/Makefile
+++ a/drivers/xen/iomgr/Makefile Fri Jul 13 13:52:39 2007 +0900
+++ b/drivers/xen/iomgr/Makefile Fri Jul 13 13:52:39 2007 +0900
@@ -1,3 +1,4 @@
xeniomgr-y := iomgr.o
+obj-$(CONFIG_XEN_IOSCHED_TURN) := turn_iosched.o
diff -r aee77a9230c1 -r 216f7a2a56c0 drivers/xen/iomgr/turn_iosched.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/iomgr/turn_iosched.c Fri Jul 13 13:54:02 2007 +0900
@@ -0,0 +1,368 @@
+/*****************************************************************************
+ * turn_iosched.c
+ *
+ * Management stream for I/O request among virtual machines .
+ * The turn-based I/O scheduler control amount of request in a turn.
+ *
+ * Copyright(c) 2007, Satoshi UCHIDA, NEC Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <linux/kthread.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <xen/iomgr.h>
+#include <xen/xenbus.h>
+
+
+static int default_max_cap=64;
+module_param(default_max_cap, int, 644);
+
+static DECLARE_WAIT_QUEUE_HEAD(noncap_wq);
+
+struct hlist_head turn_hash;
+
+
+/* turn scheduler parameter */
+struct turn_data {
+ /* Identification of VBD/TAP device */
+ struct xenbus_device *dev;
+
+ /* capacity updating flag */
+ int update_f;
+
+ /* counting parameter */
+ atomic_t req_cap;
+ atomic_t max_cap;
+
+ struct hlist_node list;
+};
+
+
+/* Find according data to VBD/TAP device. */
+struct turn_data *find_turn_data(struct xenbus_device *dev)
+{
+ struct hlist_head *hash_list = &turn_hash;
+ struct hlist_node *entry;
+ struct turn_data *__data;
+
+ hlist_for_each_entry(__data, entry, hash_list, list) {
+ if (__data->dev == dev)
+ return __data;
+ }
+
+ return NULL;
+}
+
+
+/* Sysfs Interface */
+static ssize_t show_req_cap(struct device *_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ struct turn_data *data = find_turn_data(dev);
+
+ return sprintf(buf, "%d\n", atomic_read(&data->req_cap));
+}
+
+DEVICE_ATTR(req_cap, S_IRUGO , show_req_cap, NULL);
+
+static ssize_t show_max_cap(struct device *_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ struct turn_data *data = find_turn_data(dev);
+
+ return sprintf(buf, "%d\n", atomic_read(&data->max_cap));
+}
+
+static ssize_t store_max_cap(struct device *_dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ struct turn_data *data = find_turn_data(dev);
+ char *endp;
+ int new;
+
+ new = (int)simple_strtoul(buf, &endp, 10);
+ if (endp == buf)
+ return -EINVAL;
+
+ atomic_set(&data->max_cap, new);
+
+ return strnlen(buf, count);
+}
+
+DEVICE_ATTR(max_cap, S_IRUGO | S_IWUSR, show_max_cap, store_max_cap);
+
+static struct attribute *turn_attrs[] = {
+ &dev_attr_req_cap.attr,
+ &dev_attr_max_cap.attr,
+ NULL
+};
+
+static struct attribute_group turn_group = {
+ .name = "iomgr",
+ .attrs = turn_attrs,
+};
+
+int turn_sysfs_addif(struct xenbus_device *dev)
+{
+ int error;
+
+ error = sysfs_create_group(&dev->dev.kobj, &turn_group);
+ if (error)
+ goto fail1;
+
+ return 0;
+
+fail1: sysfs_remove_group(&dev->dev.kobj, &turn_group);
+
+ return error;
+}
+
+void turn_sysfs_delif(struct xenbus_device *dev)
+{
+ sysfs_remove_group(&dev->dev.kobj, &turn_group);
+}
+
+
+/* Create turn parameter for new VBD/TAP device. */
+struct turn_data *create_turn_data(struct xenbus_device *dev)
+{
+ struct turn_data *data;
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return NULL;
+
+ memset(data, 0, sizeof(*data));
+
+ data->dev = dev;
+
+ hlist_add_head(&data->list, &turn_hash);
+ atomic_set(&data->max_cap, default_max_cap);
+ data->req_cap = data->max_cap;
+ data->update_f = 0;
+
+ turn_sysfs_addif(dev);
+
+ return data;
+}
+
+
+/* Wake up for original waiting condition. */
+void turn_wake_up(void)
+{
+ struct hlist_head *hash_list = &turn_hash;
+ struct hlist_node *entry;
+ struct turn_data *__data;
+
+ hlist_for_each_entry(__data, entry, hash_list, list) {
+ __data->update_f = 1;
+ }
+ wake_up(&noncap_wq);
+}
+
+
+/*
+ * Waiting condition for the turn-based I/O scheduler.
+ * Wait VBD/TAP thread when it is no capacity.
+ */
+int turn_waiting_request(struct xenbus_device *dev)
+{
+ struct turn_data *data;
+
+ data = find_turn_data(dev);
+ if (!data)
+ return 0;
+
+ if (data->update_f) {
+ data->req_cap = data->max_cap;
+ data->update_f = 0;
+ } else {
+ if (atomic_sub_and_test(0, &data->req_cap)) {
+ wait_event_interruptible(
+ noncap_wq,
+ (data->update_f) ||
+ hlist_empty(&turn_hash) ||
+ kthread_should_stop());
+ data->req_cap = data->max_cap;
+ data->update_f = 0;
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * Check that VBD/TAP thread have capacity yet.
+ */
+int turn_allow_request(struct xenbus_device *dev)
+{
+ struct turn_data *data;
+
+ data = find_turn_data(dev);
+ if (data == NULL){
+ data = create_turn_data(dev);
+ if (data == NULL) {
+ printk("FAILED : TURN_IOSCHED : allocating
paramater\n");
+ return -ENOMEM;
+ }
+ }
+
+
+ if (atomic_sub_and_test(0, &data->req_cap)) {
+ return -IOMGR_ALLOW_NG;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Decrease VBD/TAP thread capacity, because new request is allocated.
+ */
+int turn_alloc_request(struct xenbus_device *dev) {
+ struct turn_data *data;
+
+ data = find_turn_data(dev);
+ if (!data)
+ return 0;
+
+ atomic_dec(&data->req_cap);
+ return 0;
+}
+
+
+/*
+ * Wake up, if no request is pending.
+ * Namely, turn is increase.
+ * In above case,
+ * 1. all threads no have capacity, or
+ * 2. all threads no have requests which are process as soon as.
+ */
+void turn_free_request(struct xenbus_device *dev)
+{
+ if (atomic_sub_and_test(0, num_pending_req())) {
+ turn_wake_up();
+ }
+}
+
+
+/*
+ * Management when VBD/TAP thread starts.
+ */
+int turn_dev_start(struct xenbus_device *dev)
+{
+ struct turn_data *data;
+
+ data = create_turn_data(dev);
+ if (data == NULL) {
+ printk("FAILED : TURN_IOSCHED : allocating paramater\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Management when VBD/TAP thread stops.
+ */
+void turn_dev_stop(struct xenbus_device *dev)
+{
+ struct turn_data *data;
+
+ data = find_turn_data(dev);
+ if (!data)
+ return;
+
+ hlist_del(&data->list);
+ turn_sysfs_delif(data->dev);
+ kfree(data);
+}
+
+
+/*
+ * Definition of the turn-based I/O scheduler.
+ */
+static struct iomgr iomgr_turn = {
+ .ops = {
+ .iomgr_waiting_request_fn = turn_waiting_request,
+ .iomgr_allow_request_fn = turn_allow_request,
+ .iomgr_alloc_request_fn = turn_alloc_request,
+ .iomgr_oo_abort_request_fn = NULL,
+ .iomgr_free_request_fn = turn_free_request,
+ .iomgr_dev_start_fn = turn_dev_start,
+ .iomgr_dev_stop_fn = turn_dev_stop,
+ },
+ .iomgr_name = "TURN",
+};
+
+
+/* Initializing function. */
+static int __init turn_init(void)
+{
+ INIT_HLIST_HEAD(&turn_hash);
+ return xen_iomgr_register(&iomgr_turn);
+}
+module_init(turn_init);
+
+
+/* Finishing function. */
+static void __exit turn_exit(void)
+{
+ struct hlist_head *hash_list = &turn_hash;
+ struct hlist_node *entry,*next;
+ struct turn_data *__data;
+
+ xen_iomgr_unregister(&iomgr_turn);
+ hlist_for_each_entry_safe(__data, entry, next, hash_list, list) {
+ if (__data != NULL) {
+ turn_sysfs_delif(__data->dev);
+ kfree(__data);
+ }
+ }
+ INIT_HLIST_HEAD(&turn_hash);
+ wake_up(&noncap_wq);
+}
+module_exit(turn_exit);
+
+MODULE_AUTHOR("Satoshi UCHIDA");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Turn-based IO scheduler");
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|