WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 19/22] Add the basic VMQ APIs. Nobody uses or impleme

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 19/22] Add the basic VMQ APIs. Nobody uses or implements them at the moment, but that will change shortly.
From: <steven.smith@xxxxxxxxxx>
Date: Sun, 4 Oct 2009 16:04:12 +0100
Cc: Mitch Williams <mitch.a.williams@xxxxxxxxx>, Jose Renato Santos <jsantos@xxxxxxxxxx>, jean.guyader@xxxxxxxxxx, Steven, keir.fraser@xxxxxxxxxx, Smith <steven.smith@xxxxxxxxxx>
Delivery-date: Sun, 04 Oct 2009 08:31:10 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <cover.1254666837.git.ssmith@xxxxxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <cover.1254666837.git.ssmith@xxxxxxxxxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
This includes various bits of patches which were

Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx>
Signed-off-by: Mitch Williams <mitch.a.williams@xxxxxxxxx>
Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>

All bugs are mine, of course.
---
 include/linux/netdevice.h |    5 +
 include/linux/netvmq.h    |  399 +++++++++++++++++++++++++++++++++++++++++++++
 net/Kconfig               |    6 +
 3 files changed, 410 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netvmq.h

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2b7b804..f439800 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -753,6 +753,11 @@ struct net_device
 #define GSO_MAX_SIZE           65536
        unsigned int            gso_max_size;
 
+#ifdef CONFIG_NET_VMQ
+       /* multi-queue for virtualization */
+       struct net_vmq          *vmq;
+#endif
+
 #ifdef CONFIG_DCBNL
        /* Data Center Bridging netlink ops */
        struct dcbnl_rtnl_ops *dcbnl_ops;
diff --git a/include/linux/netvmq.h b/include/linux/netvmq.h
new file mode 100644
index 0000000..108807b
--- /dev/null
+++ b/include/linux/netvmq.h
@@ -0,0 +1,399 @@
+/******************************************************************************
+ * netvmq.h
+ *
+ * Interface between the I/O virtualization layer and multi-queue devices to
+ * enable direct data placement in guest memory
+ *
+ * Copyright (c) 2008, Jose Renato Santos, Hewlett-Packard Co.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * This file defines the vmq API for Linux network device drivers
+ * to enable the use of multi-queue NICs for virtualization.
+ * The goal is to enable network device drivers to dedicate
+ * each RX queue to a specific guest. This means network
+ * drivers should be able to allocate physical memory from
+ * the set of memory pages assigned to a specific guest.
+ *
+ * The interface between network device drivers and the virtualization
+ * layer has two components:
+ *   1) A set of functions implemented by the virtualization layer that
+ *      can be called from new multi-queue network device drivers
+ *   2) A set of new functions implemented by the device drivers to support
+ *    multi-queue
+ */
+
+#ifndef _NETVMQ_H
+#define _NETVMQ_H
+
+#ifdef CONFIG_NET_VMQ
+
+#include <linux/netdevice.h>
+
+/* status flags for vmq_queue struct  */
+/* allocated/free queue*/
+#define _VMQ_queue_allocated   (0)
+#define VMQ_queue_allocated    (1U<<_VMQ_queue_allocated)
+
+/* queue type. RX/TX */
+#define _VMQ_queue_rx          (1)
+#define VMQ_queue_rx           (1U<<_VMQ_queue_rx)
+
+/* enabled/disabled queue */
+#define _VMQ_queue_enabled     (2)
+#define VMQ_queue_enabled      (1U<<_VMQ_queue_enabled)
+
+/* queue type used to allocate or check number of available queues */
+#define VMQ_TYPE_RX            (1)
+#define VMQ_TYPE_TX            (2)
+#define VMQ_TYPE_TX_RX         (VMQ_TYPE_RX | VMQ_TYPE_TX)
+
+
+struct vmq_queue {
+       /* queue flags - VMQ_queue_*   */
+       unsigned int            flags;
+       /* pointer to opaque struct with guest information     */
+       /* format is specific to the virtualization layer used */
+       void                    *guest;
+       /* pointer to opaque struct in device driver */
+       void                    *devqueue;
+};
+typedef struct vmq_queue vmq_queue_t;
+
+struct net_vmq {
+       /* pointer to device driver specific functions for multi-queue */
+
+       int (*avail_queues)(struct net_device *netdev,
+                           unsigned int queue_type);
+       int (*alloc_queue)(struct net_device *netdev,
+                          unsigned int queue_type);
+       int (*free_queue)(struct net_device *netdev, int queue);
+       int (*get_maxsize)(struct net_device *netdev);
+       int (*get_size)(struct net_device *netdev, int queue);
+       int (*set_size)(struct net_device *netdev, int queue, int size);
+       int (*set_mac)(struct net_device *netdev, int queue, u8 *mac_addr);
+       int (*set_vlan)(struct net_device *netdev, int queue, int vlan_id);
+       int (*enable)(struct net_device *netdev, int queue);
+       int (*disable)(struct net_device *netdev, int queue);
+
+       /* maximum number of vm queues that device can allocate */
+       int                     nvmq;
+
+       /* Variable size Vector with queues info */
+       /* nvmq defines the vector size */
+       vmq_queue_t             *queue;
+};
+typedef struct net_vmq net_vmq_t;
+
+/**
+ *     alloc_vmq - Allocate net_vmq struct used for multi-queue devices
+ *     @max_queue: Maximum number of queues that can be allocated
+ *                  for virtualization
+ */
+static inline net_vmq_t *alloc_vmq(int max_queues)
+{
+       net_vmq_t *vmq;
+       vmq = kzalloc(sizeof(net_vmq_t), GFP_KERNEL);
+       if (!vmq)
+               return NULL;
+       vmq->queue = kzalloc(max_queues * sizeof(vmq_queue_t), GFP_KERNEL);
+       if (!vmq->queue) {
+               kfree(vmq);
+               return NULL;
+       }
+       return vmq;
+}
+
+/**
+ *     free_vmq - Free net_vmq struct
+ *     @vmq: pointer to net_vmq struct
+ */
+static inline void free_vmq(net_vmq_t *vmq)
+{
+       kfree(vmq->queue);
+       kfree(vmq);
+}
+
+/*================================================================*
+ * 1) Functions provided by the virtualization layer to support   *
+ * multi-queue devices.                                           *
+ * Device drivers that support multi-queue should use these new   *
+ * functions instead of the ones they replace                     *
+ *================================================================*/
+
+
+/* vmq_alloc_skb : This function should be used instead of the usual
+ * netdev_alloc_skb() in order to post RX buffers to a RX queue
+ * dedicated to a guest. Queues not dedicated to a guest should
+ * use the reguler netdev_alloc_skb() function
+ *
+ * It will return buffers from memory belonging to a given guest
+ * The device driver should not try to change the data alignment
+ * or change the skb data pointer in any way.
+ * The function should already return an skb with the right alignment
+ *
+ * The device driver should be prepared to handle a NULL return value
+ * indicating no memory for that guest is currently available. In this case
+ * the device driver should only postpone the buffer allocation
+ * (probably until the next buffer is used by the device) and continue
+ * operating with the previously posted buffers
+ *
+ *   netdev: network device allocating the skb
+ *   queue: Queue id of a queue dedicated to a guest
+ *          individual queues are identified by a integer in the
+ *          the range [0, MAX-1]. Negative values are use to indicate error
+ *          The maximum number of queues (MAX) is determined by the device
+ *
+ *   length: size to allocate
+ */
+struct sk_buff *vmq_alloc_skb(struct net_device *netdev, int queue,
+                             unsigned int length);
+
+
+/* vmq_free_skb : Free an skb allocated with vmq_alloc_skb()
+ *
+ *   skb: socket buffer to be freed
+ *   qid: Queue id of a queue dedicated to a guest
+ *        We could add a qid field in sk_buff struct and avoid passing it
+ *        as a parameter in vm_free_skb() and vmq_netif_rx()
+ */
+void vmq_free_skb(struct sk_buff *skb, int queue);
+
+/* vmq_alloc_page : Allocate full pages from guest memory.
+ * This can only be used when the device MTU is larger than a page
+ * and multiple pages are neeeded to receive a packet.
+ *
+ * Similarly to vmq_alloc_skb(),
+ * the device driver should be prepared to handle a NULL return value
+ * indicating no memory for that guest is currently available. In this case
+ * the device driver should only postpone the buffer allocation
+ * (probably until the next buffer is used by the device) and continue
+ * operating with the previously posted buffers
+ *
+ *   netdev: network device allocating the skb
+ *   queue: Queue id of a queue dedicated to a guest
+ *          individual queues are identified by a integer in the
+ *          the range [0, MAX-1]. Negative values are use to indicate error
+ *          The maximum number of queues (MAX) is determined by the device
+ */
+struct page *vmq_alloc_page(struct net_device *netdev, int queue);
+
+/* vmq_free_page : Free a guest page allocated with vmq_alloc_page()
+ *
+ *   page: page to be freed
+ *   queue: Queue id of a queue dedicated to a guest
+ */
+void vmq_free_page(struct page *page, int queue);
+
+/*
+ * vmq_netif_rx: This function is a replacement for the generic netif_rx()
+ * and allows packets received on a particular queue to be forwarded directly
+ * to a particular guest bypassing the regular network stack (bridge in xen).
+ * In Xen this function will be implemented by the Xen netback driver.
+ * The use of this function by the driver is optional and may be configured
+ * using a kernel CONFIG option (CONFIG option to be defined)
+ *
+ *   skb: Received socket buffer
+ *   queue: Queue id of a queue dedicated to a guest
+ */
+int vmq_netif_rx(struct sk_buff *skb, int queue);
+
+/*==============================================================*
+ * 2) New device driver functions for multi-queue devices       *
+ *==============================================================*/
+
+/* vmq_avail_queues: Returns number of available queues that can be allocated
+ *     It does not include already allocated queues or queues used for receive
+ *     side scaling. It should return 0 when vmq_alloc_queue() would fail
+ *
+ *   netdev: network device
+ *   queue_type: Queue type, (VMQ_TYPE_*)
+ *   RETURN VALUE:
+ *     number of available queues
+ *     returns 0 on success
+ */
+static inline int vmq_avail_queues(struct net_device *netdev,
+                                  unsigned int queue_type)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->avail_queues(netdev, queue_type);
+}
+
+/* vmq_alloc_queue: allocate a queue
+ *
+ *   netdev: network device
+ *   queue_type: Queue type, (VMQ_TYPE_*)
+ *   RETURN VALUE:
+ *     queue id of the allocated queue (the qid should be an integer which
+ *       cannot exceed or be equal to the maximum number of queues);
+ *     a negative value indicates error
+ */
+static inline int vmq_alloc_queue(struct net_device *netdev,
+                                 unsigned int queue_type)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->alloc_queue(netdev, queue_type);
+}
+
+/* vmq_free_queue: free a queue previously allocated with vmq_alloc_queue()
+ *
+ *   netdev: network device
+ *   queue: id of queue to be freed
+ *   RETURN VALUE:
+ *     a negative value indicates error;
+ *     returns 0 on success
+ */
+static inline int vmq_free_queue(struct net_device *netdev, int queue)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->free_queue(netdev, queue);
+}
+
+/* vmq_get_maxsize: Get maximum size that can be set for a queue
+ * (max number of HW descriptors)
+ *
+ *   netdev: network device
+ *   RETURN VALUE:
+ *     max size of a queue
+ *     a negative value indicates error,
+ */
+static inline int vmq_get_maxsize(struct net_device *netdev)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->get_maxsize(netdev);
+}
+
+/* vmq_get_size: Get size of queue (number of HW descriptors)
+ *
+ *   netdev: network device
+ *   queue: queue id
+ *   RETURN VALUE:
+ *     size of queue
+ *     a negative value indicates error,
+ */
+static inline int vmq_get_size(struct net_device *netdev, int queue)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->get_size(netdev, queue);
+}
+
+/* vmq_set_size: Set size of queue (number of HW descriptors)
+ *   It can return error if size exceeds maximum hw capablity
+ *   We will probably need function to return the maximum
+ *   HW queue size, but we can live without it for now
+ *   netdev: network device
+ *   queue: queue id
+ *   size: Queue size (number of HW descriptors)
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_set_size(struct net_device *netdev, int queue, int size)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->set_size(netdev, queue, size);
+}
+
+/* vmq_set_mac: Set MAC address filter for a queue
+ *
+ *   netdev: network device
+ *   queue: queue id
+ *   mac_addr: pointer to a 6 byte array with the MAC address
+ *             MAC address FF:FF:FF:FF:FF:FF is used to reset the filter
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_set_mac(struct net_device *netdev, int queue,
+                             u8 *mac_addr)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->set_mac(netdev, queue, mac_addr);
+}
+
+/* vmq_set_vlan: Set VLAN filter for a queue
+ *
+ *   netdev: network device
+ *   queue: queue id
+ *   vlan_id: VLAN id
+ *            The invalid VLAN id -1 is used to reset the VLAN filter
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_set_vlan(struct net_device *netdev, int queue,
+                              int vlan_id)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->set_vlan(netdev, queue, vlan_id);
+}
+
+/* vmq_enable_queue: Enable queue
+ *     For receive queues this will trigger allocating and posting buffers
+ *
+ *   netdev: network device
+ *   queue: queue id
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_enable_queue(struct net_device *netdev, int queue)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->enable(netdev, queue);
+}
+
+/* vmq_disable_queue: Disable queue
+ *     This will flush all buffers in the queue and will free the respective
+ *     skb's or fragment pages
+ *
+ *   netdev: network device
+ *   queue_id: queue id
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_disable_queue(struct net_device *netdev, int queue)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->disable(netdev, queue);
+}
+
+#endif /* CONFIG_NET_VMQ */
+
+#endif /* _NETVMQ_H */
diff --git a/net/Kconfig b/net/Kconfig
index 0732cb3..7837a9e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -37,6 +37,12 @@ source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
 source "net/iucv/Kconfig"
 
+config NET_VMQ
+       bool "Virtual-machine multi-queue support"
+       default n
+       help
+         Add support for the VMQ features of certain modern network cards.
+
 config INET
        bool "TCP/IP networking"
        ---help---
-- 
1.6.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>