This includes various bits of patches which were
Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx>
Signed-off-by: Mitch Williams <mitch.a.williams@xxxxxxxxx>
Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>
All bugs are mine, of course.
---
include/linux/netdevice.h | 5 +
include/linux/netvmq.h | 399 +++++++++++++++++++++++++++++++++++++++++++++
net/Kconfig | 6 +
3 files changed, 410 insertions(+), 0 deletions(-)
create mode 100644 include/linux/netvmq.h
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2b7b804..f439800 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -753,6 +753,11 @@ struct net_device
#define GSO_MAX_SIZE 65536
unsigned int gso_max_size;
+#ifdef CONFIG_NET_VMQ
+ /* multi-queue for virtualization */
+ struct net_vmq *vmq;
+#endif
+
#ifdef CONFIG_DCBNL
/* Data Center Bridging netlink ops */
struct dcbnl_rtnl_ops *dcbnl_ops;
diff --git a/include/linux/netvmq.h b/include/linux/netvmq.h
new file mode 100644
index 0000000..108807b
--- /dev/null
+++ b/include/linux/netvmq.h
@@ -0,0 +1,399 @@
+/******************************************************************************
+ * netvmq.h
+ *
+ * Interface between the I/O virtualization layer and multi-queue devices to
+ * enable direct data placement in guest memory
+ *
+ * Copyright (c) 2008, Jose Renato Santos, Hewlett-Packard Co.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * This file defines the vmq API for Linux network device drivers
+ * to enable the use of multi-queue NICs for virtualization.
+ * The goal is to enable network device drivers to dedicate
+ * each RX queue to a specific guest. This means network
+ * drivers should be able to allocate physical memory from
+ * the set of memory pages assigned to a specific guest.
+ *
+ * The interface between network device drivers and the virtualization
+ * layer has two components:
+ * 1) A set of functions implemented by the virtualization layer that
+ * can be called from new multi-queue network device drivers
+ * 2) A set of new functions implemented by the device drivers to support
+ * multi-queue
+ */
+
+#ifndef _NETVMQ_H
+#define _NETVMQ_H
+
+#ifdef CONFIG_NET_VMQ
+
+#include <linux/netdevice.h>
+
+/* status flags for vmq_queue struct */
+/* allocated/free queue*/
+#define _VMQ_queue_allocated (0)
+#define VMQ_queue_allocated (1U<<_VMQ_queue_allocated)
+
+/* queue type. RX/TX */
+#define _VMQ_queue_rx (1)
+#define VMQ_queue_rx (1U<<_VMQ_queue_rx)
+
+/* enabled/disabled queue */
+#define _VMQ_queue_enabled (2)
+#define VMQ_queue_enabled (1U<<_VMQ_queue_enabled)
+
+/* queue type used to allocate or check number of available queues */
+#define VMQ_TYPE_RX (1)
+#define VMQ_TYPE_TX (2)
+#define VMQ_TYPE_TX_RX (VMQ_TYPE_RX | VMQ_TYPE_TX)
+
+
+struct vmq_queue {
+ /* queue flags - VMQ_queue_* */
+ unsigned int flags;
+ /* pointer to opaque struct with guest information */
+ /* format is specific to the virtualization layer used */
+ void *guest;
+ /* pointer to opaque struct in device driver */
+ void *devqueue;
+};
+typedef struct vmq_queue vmq_queue_t;
+
+struct net_vmq {
+ /* pointer to device driver specific functions for multi-queue */
+
+ int (*avail_queues)(struct net_device *netdev,
+ unsigned int queue_type);
+ int (*alloc_queue)(struct net_device *netdev,
+ unsigned int queue_type);
+ int (*free_queue)(struct net_device *netdev, int queue);
+ int (*get_maxsize)(struct net_device *netdev);
+ int (*get_size)(struct net_device *netdev, int queue);
+ int (*set_size)(struct net_device *netdev, int queue, int size);
+ int (*set_mac)(struct net_device *netdev, int queue, u8 *mac_addr);
+ int (*set_vlan)(struct net_device *netdev, int queue, int vlan_id);
+ int (*enable)(struct net_device *netdev, int queue);
+ int (*disable)(struct net_device *netdev, int queue);
+
+ /* maximum number of vm queues that device can allocate */
+ int nvmq;
+
+ /* Variable size Vector with queues info */
+ /* nvmq defines the vector size */
+ vmq_queue_t *queue;
+};
+typedef struct net_vmq net_vmq_t;
+
+/**
+ * alloc_vmq - Allocate net_vmq struct used for multi-queue devices
+ * @max_queue: Maximum number of queues that can be allocated
+ * for virtualization
+ */
+static inline net_vmq_t *alloc_vmq(int max_queues)
+{
+ net_vmq_t *vmq;
+ vmq = kzalloc(sizeof(net_vmq_t), GFP_KERNEL);
+ if (!vmq)
+ return NULL;
+ vmq->queue = kzalloc(max_queues * sizeof(vmq_queue_t), GFP_KERNEL);
+ if (!vmq->queue) {
+ kfree(vmq);
+ return NULL;
+ }
+ return vmq;
+}
+
+/**
+ * free_vmq - Free net_vmq struct
+ * @vmq: pointer to net_vmq struct
+ */
+static inline void free_vmq(net_vmq_t *vmq)
+{
+ kfree(vmq->queue);
+ kfree(vmq);
+}
+
+/*================================================================*
+ * 1) Functions provided by the virtualization layer to support *
+ * multi-queue devices. *
+ * Device drivers that support multi-queue should use these new *
+ * functions instead of the ones they replace *
+ *================================================================*/
+
+
+/* vmq_alloc_skb : This function should be used instead of the usual
+ * netdev_alloc_skb() in order to post RX buffers to a RX queue
+ * dedicated to a guest. Queues not dedicated to a guest should
+ * use the reguler netdev_alloc_skb() function
+ *
+ * It will return buffers from memory belonging to a given guest
+ * The device driver should not try to change the data alignment
+ * or change the skb data pointer in any way.
+ * The function should already return an skb with the right alignment
+ *
+ * The device driver should be prepared to handle a NULL return value
+ * indicating no memory for that guest is currently available. In this case
+ * the device driver should only postpone the buffer allocation
+ * (probably until the next buffer is used by the device) and continue
+ * operating with the previously posted buffers
+ *
+ * netdev: network device allocating the skb
+ * queue: Queue id of a queue dedicated to a guest
+ * individual queues are identified by a integer in the
+ * the range [0, MAX-1]. Negative values are use to indicate error
+ * The maximum number of queues (MAX) is determined by the device
+ *
+ * length: size to allocate
+ */
+struct sk_buff *vmq_alloc_skb(struct net_device *netdev, int queue,
+ unsigned int length);
+
+
+/* vmq_free_skb : Free an skb allocated with vmq_alloc_skb()
+ *
+ * skb: socket buffer to be freed
+ * qid: Queue id of a queue dedicated to a guest
+ * We could add a qid field in sk_buff struct and avoid passing it
+ * as a parameter in vm_free_skb() and vmq_netif_rx()
+ */
+void vmq_free_skb(struct sk_buff *skb, int queue);
+
+/* vmq_alloc_page : Allocate full pages from guest memory.
+ * This can only be used when the device MTU is larger than a page
+ * and multiple pages are neeeded to receive a packet.
+ *
+ * Similarly to vmq_alloc_skb(),
+ * the device driver should be prepared to handle a NULL return value
+ * indicating no memory for that guest is currently available. In this case
+ * the device driver should only postpone the buffer allocation
+ * (probably until the next buffer is used by the device) and continue
+ * operating with the previously posted buffers
+ *
+ * netdev: network device allocating the skb
+ * queue: Queue id of a queue dedicated to a guest
+ * individual queues are identified by a integer in the
+ * the range [0, MAX-1]. Negative values are use to indicate error
+ * The maximum number of queues (MAX) is determined by the device
+ */
+struct page *vmq_alloc_page(struct net_device *netdev, int queue);
+
+/* vmq_free_page : Free a guest page allocated with vmq_alloc_page()
+ *
+ * page: page to be freed
+ * queue: Queue id of a queue dedicated to a guest
+ */
+void vmq_free_page(struct page *page, int queue);
+
+/*
+ * vmq_netif_rx: This function is a replacement for the generic netif_rx()
+ * and allows packets received on a particular queue to be forwarded directly
+ * to a particular guest bypassing the regular network stack (bridge in xen).
+ * In Xen this function will be implemented by the Xen netback driver.
+ * The use of this function by the driver is optional and may be configured
+ * using a kernel CONFIG option (CONFIG option to be defined)
+ *
+ * skb: Received socket buffer
+ * queue: Queue id of a queue dedicated to a guest
+ */
+int vmq_netif_rx(struct sk_buff *skb, int queue);
+
+/*==============================================================*
+ * 2) New device driver functions for multi-queue devices *
+ *==============================================================*/
+
+/* vmq_avail_queues: Returns number of available queues that can be allocated
+ * It does not include already allocated queues or queues used for receive
+ * side scaling. It should return 0 when vmq_alloc_queue() would fail
+ *
+ * netdev: network device
+ * queue_type: Queue type, (VMQ_TYPE_*)
+ * RETURN VALUE:
+ * number of available queues
+ * returns 0 on success
+ */
+static inline int vmq_avail_queues(struct net_device *netdev,
+ unsigned int queue_type)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->avail_queues(netdev, queue_type);
+}
+
+/* vmq_alloc_queue: allocate a queue
+ *
+ * netdev: network device
+ * queue_type: Queue type, (VMQ_TYPE_*)
+ * RETURN VALUE:
+ * queue id of the allocated queue (the qid should be an integer which
+ * cannot exceed or be equal to the maximum number of queues);
+ * a negative value indicates error
+ */
+static inline int vmq_alloc_queue(struct net_device *netdev,
+ unsigned int queue_type)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->alloc_queue(netdev, queue_type);
+}
+
+/* vmq_free_queue: free a queue previously allocated with vmq_alloc_queue()
+ *
+ * netdev: network device
+ * queue: id of queue to be freed
+ * RETURN VALUE:
+ * a negative value indicates error;
+ * returns 0 on success
+ */
+static inline int vmq_free_queue(struct net_device *netdev, int queue)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->free_queue(netdev, queue);
+}
+
+/* vmq_get_maxsize: Get maximum size that can be set for a queue
+ * (max number of HW descriptors)
+ *
+ * netdev: network device
+ * RETURN VALUE:
+ * max size of a queue
+ * a negative value indicates error,
+ */
+static inline int vmq_get_maxsize(struct net_device *netdev)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->get_maxsize(netdev);
+}
+
+/* vmq_get_size: Get size of queue (number of HW descriptors)
+ *
+ * netdev: network device
+ * queue: queue id
+ * RETURN VALUE:
+ * size of queue
+ * a negative value indicates error,
+ */
+static inline int vmq_get_size(struct net_device *netdev, int queue)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->get_size(netdev, queue);
+}
+
+/* vmq_set_size: Set size of queue (number of HW descriptors)
+ * It can return error if size exceeds maximum hw capablity
+ * We will probably need function to return the maximum
+ * HW queue size, but we can live without it for now
+ * netdev: network device
+ * queue: queue id
+ * size: Queue size (number of HW descriptors)
+ * RETURN VALUE:
+ * a negative value indicates error,
+ * returns 0 on success
+ */
+static inline int vmq_set_size(struct net_device *netdev, int queue, int size)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->set_size(netdev, queue, size);
+}
+
+/* vmq_set_mac: Set MAC address filter for a queue
+ *
+ * netdev: network device
+ * queue: queue id
+ * mac_addr: pointer to a 6 byte array with the MAC address
+ * MAC address FF:FF:FF:FF:FF:FF is used to reset the filter
+ * RETURN VALUE:
+ * a negative value indicates error,
+ * returns 0 on success
+ */
+static inline int vmq_set_mac(struct net_device *netdev, int queue,
+ u8 *mac_addr)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->set_mac(netdev, queue, mac_addr);
+}
+
+/* vmq_set_vlan: Set VLAN filter for a queue
+ *
+ * netdev: network device
+ * queue: queue id
+ * vlan_id: VLAN id
+ * The invalid VLAN id -1 is used to reset the VLAN filter
+ * RETURN VALUE:
+ * a negative value indicates error,
+ * returns 0 on success
+ */
+static inline int vmq_set_vlan(struct net_device *netdev, int queue,
+ int vlan_id)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->set_vlan(netdev, queue, vlan_id);
+}
+
+/* vmq_enable_queue: Enable queue
+ * For receive queues this will trigger allocating and posting buffers
+ *
+ * netdev: network device
+ * queue: queue id
+ * RETURN VALUE:
+ * a negative value indicates error,
+ * returns 0 on success
+ */
+static inline int vmq_enable_queue(struct net_device *netdev, int queue)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->enable(netdev, queue);
+}
+
+/* vmq_disable_queue: Disable queue
+ * This will flush all buffers in the queue and will free the respective
+ * skb's or fragment pages
+ *
+ * netdev: network device
+ * queue_id: queue id
+ * RETURN VALUE:
+ * a negative value indicates error,
+ * returns 0 on success
+ */
+static inline int vmq_disable_queue(struct net_device *netdev, int queue)
+{
+ if (!netdev->vmq)
+ return -EINVAL;
+ return netdev->vmq->disable(netdev, queue);
+}
+
+#endif /* CONFIG_NET_VMQ */
+
+#endif /* _NETVMQ_H */
diff --git a/net/Kconfig b/net/Kconfig
index 0732cb3..7837a9e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -37,6 +37,12 @@ source "net/unix/Kconfig"
source "net/xfrm/Kconfig"
source "net/iucv/Kconfig"
+config NET_VMQ
+ bool "Virtual-machine multi-queue support"
+ default n
+ help
+ Add support for the VMQ features of certain modern network cards.
+
config INET
bool "TCP/IP networking"
---help---
--
1.6.3.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|