This only includes the transmit half, because the receiver uses an
unmodified posted buffers mode implementation.
This includes various bits of patches which were
Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx>
Signed-off-by: Mitch Williams <mitch.a.williams@xxxxxxxxx>
Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>
All bugs are mine, of course.
---
drivers/xen/Kconfig | 5 +
drivers/xen/netchannel2/Makefile | 4 +
drivers/xen/netchannel2/chan.c | 7 +-
drivers/xen/netchannel2/netback2.c | 9 +
drivers/xen/netchannel2/netchannel2_core.h | 10 +
drivers/xen/netchannel2/posted_buffer.h | 50 ++
drivers/xen/netchannel2/posted_buffers.c | 20 +-
drivers/xen/netchannel2/util.c | 8 +-
drivers/xen/netchannel2/vmq.c | 805 ++++++++++++++++++++++++++++
drivers/xen/netchannel2/vmq.h | 58 ++
drivers/xen/netchannel2/vmq_def.h | 68 +++
drivers/xen/netchannel2/xmit_packet.c | 6 +
12 files changed, 1029 insertions(+), 21 deletions(-)
create mode 100644 drivers/xen/netchannel2/posted_buffer.h
create mode 100644 drivers/xen/netchannel2/vmq.c
create mode 100644 drivers/xen/netchannel2/vmq.h
create mode 100644 drivers/xen/netchannel2/vmq_def.h
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a7e5b5c..a37b0cd 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -234,6 +234,11 @@ config XEN_NETDEV2_FRONTEND
depends on XEN_NETCHANNEL2
default y
+config XEN_NETDEV2_VMQ
+ bool "Net channel 2 support for multi-queue devices"
+ depends on XEN_NETDEV2_BACKEND && NET_VMQ
+ default y
+
config XEN_NETDEV2_BYPASSABLE
bool "Net channel 2 bypassee support"
depends on XEN_NETDEV2_BACKEND
diff --git a/drivers/xen/netchannel2/Makefile b/drivers/xen/netchannel2/Makefile
index 11a257e..918d8d8 100644
--- a/drivers/xen/netchannel2/Makefile
+++ b/drivers/xen/netchannel2/Makefile
@@ -12,6 +12,10 @@ ifeq ($(CONFIG_XEN_NETDEV2_FRONTEND),y)
netchannel2-objs += netfront2.o
endif
+ifeq ($(CONFIG_XEN_NETDEV2_VMQ),y)
+netchannel2-objs += vmq.o
+endif
+
ifeq ($(CONFIG_XEN_NETDEV2_BYPASSABLE),y)
netchannel2-objs += bypassee.o
endif
diff --git a/drivers/xen/netchannel2/chan.c b/drivers/xen/netchannel2/chan.c
index 060b49b..8dad6fe 100644
--- a/drivers/xen/netchannel2/chan.c
+++ b/drivers/xen/netchannel2/chan.c
@@ -13,6 +13,7 @@
#include "netchannel2_endpoint.h"
#include "netchannel2_core.h"
+#include "vmq.h"
static int process_ring(struct napi_struct *napi,
int work_avail);
@@ -810,6 +811,8 @@ static int process_ring(struct napi_struct *napi,
/* Pick up incoming messages. */
work_done = nc2_poll(ncrp, work_avail, &rx_queue);
+ do_vmq_work(nc);
+
/* Transmit pending packets. */
if (!skb_queue_empty(&ncrp->pending_tx_queue)) {
skb = __skb_dequeue(&ncrp->pending_tx_queue);
@@ -828,9 +831,11 @@ static int process_ring(struct napi_struct *napi,
This must happen before we flush the rings, since
that's when the PACKET messages will be made
visible to the other end. */
- if (ncrp == &nc->rings)
+ if (ncrp == &nc->rings) {
flush_hypercall_batcher(&nc->batcher,
nc2_posted_on_gntcopy_fail);
+ vmq_flush_unmap_hypercall();
+ }
flush_rings(ncrp);
diff --git a/drivers/xen/netchannel2/netback2.c
b/drivers/xen/netchannel2/netback2.c
index 129ef81..eb2a781 100644
--- a/drivers/xen/netchannel2/netback2.c
+++ b/drivers/xen/netchannel2/netback2.c
@@ -10,8 +10,13 @@
#include "netchannel2_core.h"
#include "netchannel2_endpoint.h"
#include "netchannel2_uspace.h"
+#include "vmq.h"
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+#define NR_TX_BUFS (VMQ_MAX_BUFFERS+256)
+#else
#define NR_TX_BUFS 256
+#endif
static atomic_t next_handle;
/* A list of all currently-live netback2 interfaces. */
@@ -168,6 +173,8 @@ static int attach_to_frontend(struct netback2 *nd)
return err;
}
+ nc2_vmq_connect(nc);
+
/* All done */
nd->attached = 1;
@@ -176,6 +183,8 @@ static int attach_to_frontend(struct netback2 *nd)
static void nb2_shutdown(struct netchannel2 *nc)
{
+ nc2_vmq_disconnect(nc);
+
nc2_set_nr_tx_buffers(nc, 0);
}
diff --git a/drivers/xen/netchannel2/netchannel2_core.h
b/drivers/xen/netchannel2/netchannel2_core.h
index 1939cbb..8e1657d 100644
--- a/drivers/xen/netchannel2/netchannel2_core.h
+++ b/drivers/xen/netchannel2/netchannel2_core.h
@@ -7,6 +7,8 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include "vmq_def.h"
+
/* After we send this number of frags, we request the other end to
* notify us when sending the corresponding finish packet message */
#define MAX_MAX_COUNT_FRAGS_NO_EVENT 192
@@ -43,6 +45,9 @@ enum transmit_policy {
transmit_policy_grant = transmit_policy_first,
transmit_policy_post,
transmit_policy_map,
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+ transmit_policy_vmq,
+#endif
transmit_policy_small,
transmit_policy_last = transmit_policy_small
};
@@ -437,6 +442,11 @@ struct netchannel2 {
struct hypercall_batcher batcher;
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+ /* vmq data for supporting multi-queue devices */
+ nc2_vmq_t vmq;
+#endif
+
#ifdef CONFIG_XEN_NETDEV2_AUTOMATIC_BYPASS
struct nc2_auto_bypass auto_bypass;
#endif
diff --git a/drivers/xen/netchannel2/posted_buffer.h
b/drivers/xen/netchannel2/posted_buffer.h
new file mode 100644
index 0000000..e249777
--- /dev/null
+++ b/drivers/xen/netchannel2/posted_buffer.h
@@ -0,0 +1,50 @@
+/* Buffer management related bits, shared between vmq.c and
+ * posted_buffer.c */
+#ifndef NC2_POSTED_BUFFER_H__
+#define NC2_POSTED_BUFFER_H__
+
+/* A buffer which the other end has provided us which we can use to
+ transmit packets to it. */
+struct nc2_tx_buffer {
+ struct list_head list;
+ uint32_t id; /* ID assigned by the remote endpoint. */
+ grant_ref_t gref;
+ uint16_t off_in_page;
+ uint16_t size;
+ grant_handle_t grant_handle;
+};
+
+/* add a buffer to the pending list to be returned to the other end buffer */
+static inline void return_tx_buffer(struct netchannel2 *nc,
+ struct nc2_tx_buffer *buffer)
+{
+ list_add(&buffer->list, &nc->pending_tx_buffer_return);
+}
+
+static inline struct nc2_tx_buffer *_get_tx_buffer(struct netchannel2 *nc)
+{
+ struct nc2_tx_buffer *buffer;
+ struct list_head *entry = nc->avail_tx_buffers.next;
+ list_del(entry);
+ buffer = list_entry(entry, struct nc2_tx_buffer, list);
+ nc->nr_avail_tx_buffers--;
+ return buffer;
+}
+
+/* recycle a posted buffer: return it to the list of available buffers */
+static inline void recycle_tx_buffer(struct netchannel2 *nc,
+ struct nc2_tx_buffer *buffer)
+{
+ list_add(&buffer->list, &nc->avail_tx_buffers);
+ nc->nr_avail_tx_buffers++;
+}
+
+/* add a buffer slot to list of unused buffer slots after it has been
+ * returned to other end */
+static inline void free_tx_buffer(struct netchannel2 *nc,
+ struct nc2_tx_buffer *buffer)
+{
+ list_add(&buffer->list, &nc->unused_tx_buffer_slots);
+}
+
+#endif /* !NC2_POSTED_BUFFER_H__ */
diff --git a/drivers/xen/netchannel2/posted_buffers.c
b/drivers/xen/netchannel2/posted_buffers.c
index 96de7da..9fb7570 100644
--- a/drivers/xen/netchannel2/posted_buffers.c
+++ b/drivers/xen/netchannel2/posted_buffers.c
@@ -9,6 +9,7 @@
#include <xen/live_maps.h>
#include "netchannel2_endpoint.h"
#include "netchannel2_core.h"
+#include "posted_buffer.h"
#define POSTED_BUFFER_SIZE PAGE_SIZE
@@ -350,17 +351,6 @@ void nc2_handle_set_nr_posted_buffers(struct netchannel2
*nc,
/* -------------------------- Transmit ------------------------------- */
-/* A buffer which the other end has provided us which we can use to
- transmit packets to it. */
-struct nc2_tx_buffer {
- struct list_head list;
- uint32_t id; /* ID assigned by the remote endpoint. */
- grant_ref_t gref;
- uint16_t off_in_page;
- uint16_t size;
- grant_handle_t grant_handle;
-};
-
/* A representation of a packet which is halfway through being
prepared for transmission. */
struct post_packet_plan {
@@ -373,14 +363,6 @@ struct post_packet_plan {
volatile struct netchannel2_fragment *output_frag;
};
-/* add a buffer slot to list of unused buffer slots after it has been
- * returned to other end */
-static void free_tx_buffer(struct netchannel2 *nc,
- struct nc2_tx_buffer *buffer)
-{
- list_add(&buffer->list, &nc->unused_tx_buffer_slots);
-}
-
/* A grant copy failed while we were transmitting a packet. That
indicates that the *receiving* domain gave us a bad RX buffer.
We're too late to send them an error, so there isn't really
diff --git a/drivers/xen/netchannel2/util.c b/drivers/xen/netchannel2/util.c
index 79d9f09..1d96256 100644
--- a/drivers/xen/netchannel2/util.c
+++ b/drivers/xen/netchannel2/util.c
@@ -34,7 +34,13 @@ int allocate_txp_slot(struct netchannel2_ring_pair *ncrp,
static void nc2_free_skb(struct netchannel2 *nc,
struct sk_buff *skb)
{
- dev_kfree_skb(skb);
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+ nc2_vmq_t *vmq = &nc->vmq;
+ if (get_skb_overlay(skb)->policy == transmit_policy_vmq)
+ skb_queue_tail(&vmq->dealloc_queue, skb);
+ else
+#endif
+ dev_kfree_skb(skb);
}
void release_txp_slot(struct netchannel2_ring_pair *ncrp,
diff --git a/drivers/xen/netchannel2/vmq.c b/drivers/xen/netchannel2/vmq.c
new file mode 100644
index 0000000..e36962b
--- /dev/null
+++ b/drivers/xen/netchannel2/vmq.c
@@ -0,0 +1,805 @@
+/*****************************************************************************
+ * vmq.c
+ *
+ * Support multi-queue network devices.
+ *
+ * Copyright (c) 2008, Kaushik Kumar Ram, Rice University.
+ * Copyright (c) 2008, Jose Renato Santos, Hewlett-Packard Co.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+/* This only implements the transmit half of the method; receive is
+ * handled by posted_buffers.c */
+#include <linux/kernel.h>
+#include <linux/netvmq.h>
+#include <linux/skbuff.h>
+#include <xen/xenbus.h>
+#include <xen/balloon.h>
+#include "netchannel2_core.h"
+
+#include "posted_buffer.h"
+#include "vmq.h"
+
+/* state of device queue when operating in vmq mode */
+#define VMQ_QUEUE_DISABLED 0
+#define VMQ_QUEUE_STARTING 1
+#define VMQ_QUEUE_ENABLED 2
+#define VMQ_QUEUE_CLOSING 3
+
+#define VMQ_MAX_UNMAP_OPS 256
+struct vmq_unmap_grants {
+ unsigned n;
+ gnttab_unmap_grant_ref_t gop[VMQ_MAX_UNMAP_OPS];
+};
+typedef struct vmq_unmap_grants vmq_unmap_grants_t;
+
+vmq_unmap_grants_t vmq_unmap_grants;
+
+static inline void vmq_flush_unmap_grants(void)
+{
+ if (vmq_unmap_grants.n == 0)
+ return;
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ vmq_unmap_grants.gop,
+ vmq_unmap_grants.n))
+ BUG();
+ vmq_unmap_grants.n = 0;
+}
+
+static inline gnttab_unmap_grant_ref_t *vmq_next_unmap_gop(void)
+{
+ if (vmq_unmap_grants.n == VMQ_MAX_UNMAP_OPS)
+ vmq_flush_unmap_grants();
+ return &vmq_unmap_grants.gop[vmq_unmap_grants.n++];
+}
+
+void vmq_flush_unmap_hypercall(void)
+{
+ vmq_flush_unmap_grants();
+}
+
+static inline unsigned long vmq_idx_to_pfn(nc2_vmq_t *vmq, unsigned int idx)
+{
+ return page_to_pfn(vmq->pages[idx]);
+}
+
+static inline unsigned long vmq_idx_to_kaddr(nc2_vmq_t *vmq, unsigned int idx)
+{
+ return (unsigned long)pfn_to_kaddr(vmq_idx_to_pfn(vmq, idx));
+}
+
+/* get vmq idx from page struct */
+static long nc2_vmq_page_index(struct page *page)
+{
+ nc2_vmq_buf_t *vmq_buf;
+ vmq_buf = (nc2_vmq_buf_t *)page->mapping;
+ return vmq_buf - vmq_buf->nc->vmq.buffer;
+}
+
+/* Read a physical device name from xenstore and
+ * returns a pointer to the associated net_device structure.
+ * Returns NULL on error. */
+static struct net_device *read_pdev(struct xenbus_device *dev)
+{
+ char *pdevstr;
+ struct net_device *pdev = NULL;
+
+ pdevstr = xenbus_read(XBT_NIL, dev->nodename, "pdev", NULL);
+ if (IS_ERR(pdevstr))
+ return NULL;
+
+ if (pdevstr)
+ pdev = dev_get_by_name(&init_net, pdevstr);
+
+ kfree(pdevstr);
+
+ return pdev;
+}
+
+static void nc2_vmq_page_release(struct page *page, unsigned int order)
+{
+ printk(KERN_CRIT "%s: ERROR: Unexpected release of netchannel2 vmq
page",
+ __func__);
+ BUG_ON(1);
+}
+
+static inline int nc2_vmq_is_disabled(struct netchannel2 *nc)
+{
+ return nc->vmq.vmq_state == VMQ_QUEUE_DISABLED;
+}
+
+static inline int nc2_vmq_is_starting(struct netchannel2 *nc)
+{
+ return nc->vmq.vmq_state == VMQ_QUEUE_STARTING;
+}
+
+static inline int nc2_vmq_is_enabled(struct netchannel2 *nc)
+{
+ return nc->vmq.vmq_state == VMQ_QUEUE_ENABLED;
+}
+
+static inline int nc2_vmq_is_closing(struct netchannel2 *nc)
+{
+ return nc->vmq.vmq_state == VMQ_QUEUE_CLOSING;
+}
+
+static inline void nc2_vmq_enable(struct netchannel2 *nc)
+{
+ nc2_vmq_t *vmq = &nc->vmq;
+ vmq_get(vmq);
+ vmq_enable_queue(vmq->pdev, vmq->vmq_id);
+ vmq->vmq_state = VMQ_QUEUE_ENABLED;
+}
+
+void nc2_vmq_disconnect(struct netchannel2 *nc)
+{
+ nc2_vmq_t *vmq = &nc->vmq;
+
+ if (nc2_vmq_is_enabled(nc)) {
+ vmq_disable_queue(vmq->pdev, vmq->vmq_id);
+ vmq_free_queue(vmq->pdev, vmq->vmq_id);
+ vmq->vmq_state = VMQ_QUEUE_CLOSING;
+ /* wait until all buffers have been returned by dev driver */
+ wait_event(vmq->waiting_to_free,
+ atomic_read(&vmq->refcnt) == 0);
+ return;
+ }
+
+ if (nc2_vmq_is_starting(nc)) {
+ vmq_free_queue(vmq->pdev, vmq->vmq_id);
+ vmq->vmq_state = VMQ_QUEUE_CLOSING;
+ return;
+ }
+
+}
+
+
+static void nc2_vmq_end_map_buffers(gnttab_map_grant_ref_t *mop, int count,
+ struct netchannel2 *nc, u16 *alloc_idx)
+{
+ int i, err;
+ u16 idx;
+ unsigned int prod;
+ nc2_vmq_t *vmq = &nc->vmq;
+
+ prod = vmq->mapped_pages_prod;
+
+ for (i = 0; i < count; i++) {
+ idx = alloc_idx[i];
+
+ /* Check error status */
+ err = mop->status;
+ if (likely(!err)) {
+ set_phys_to_machine(
+ __pa(vmq_idx_to_kaddr(vmq, idx))
+ >> PAGE_SHIFT,
+ FOREIGN_FRAME(mop->dev_bus_addr
+ >> PAGE_SHIFT));
+ /* Store the handle */
+ vmq->buffer[idx].buf->grant_handle = mop->handle;
+
+ /* Add it to the mapped pages list */
+ vmq->mapped_pages[VMQ_IDX_MASK(prod++)] = idx;
+ mop++;
+ continue;
+ }
+
+ /* Error mapping page: return posted buffer to other end.
+ * TODO: We might need an error field on the return buffer
+ * message */
+ return_tx_buffer(nc, vmq->buffer[idx].buf);
+
+ /* Add the page back to the free list */
+ vmq->unmapped_pages[VMQ_IDX_MASK(vmq->unmapped_pages_prod++)]
+ = idx;
+
+ mop++;
+ }
+
+ smp_wmb();
+ vmq->mapped_pages_prod = prod;
+
+ return;
+}
+
+/* Map guest buffers and place them in the mapped buffers list. The mapped
+ * pages in this list are used when allocating a skb (vmq_alloc_skb()).
+ */
+static void nc2_vmq_map_buffers(struct netchannel2 *nc)
+{
+ u16 idx;
+ int count = 0;
+ unsigned int cons;
+ int nbufs;
+ int buf_avail;
+ struct nc2_tx_buffer *buf;
+ struct nc2_vmq *vmq = &nc->vmq;
+ int n_mapped = nr_vmq_bufs(nc);
+
+
+ /*
+ * Putting hundreds of bytes on the stack is considered rude.
+ * Static works because a tasklet can only be on one CPU at any time.
+ */
+ static gnttab_map_grant_ref_t rx_map_ops[VMQ_MAX_BUFFERS];
+ static u16 alloc_idx[VMQ_MAX_BUFFERS];
+
+ /* If there is at least VMQ_MIN_BUFFERS buffers, no work to do */
+ if (n_mapped >= VMQ_MIN_BUFFERS)
+ return;
+
+ /* Try to get VMQ_MAX_BUFFERS mapped buffers, if there are
+ sufficient buffers posted by the other end */
+ nbufs = VMQ_MAX_BUFFERS - n_mapped;
+ buf_avail = nc->nr_avail_tx_buffers;
+ if (nbufs > buf_avail)
+ nbufs = buf_avail;
+
+ /* Xen cannot handle more than 512 grant ops in a single hypercall */
+ if (nbufs > 512)
+ nbufs = 512;
+
+ /* give up if there are no buffers available */
+ if (nbufs <= 0)
+ return;
+
+ /* Note that we *should* have free pages to consume here
+ * and no checks are needed.
+ */
+ cons = vmq->unmapped_pages_cons;
+
+ while (count < nbufs) {
+ idx = vmq->unmapped_pages[VMQ_IDX_MASK(cons++)];
+ buf = vmq->buffer[idx].buf = _get_tx_buffer(nc);
+ /* Setup grant map operation */
+ gnttab_set_map_op(&rx_map_ops[count],
+ vmq_idx_to_kaddr(vmq, idx),
+ GNTMAP_host_map,
+ buf->gref,
+ nc->rings.otherend_id);
+ alloc_idx[count] = idx;
+ count++;
+ }
+
+ vmq->unmapped_pages_cons = cons;
+
+ /* Map all the pages */
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ rx_map_ops, nbufs));
+
+ /* Finalize buffer mapping after checking if the grant operations
+ succeeded */
+ nc2_vmq_end_map_buffers(rx_map_ops, nbufs, nc, alloc_idx);
+
+ vmq->nbufs += nbufs;
+}
+
+static void nc2_vmq_unmap_buf(struct netchannel2 *nc,
+ unsigned int idx, int recycle)
+{
+ nc2_vmq_t *vmq = &nc->vmq;
+ unsigned long pfn;
+ gnttab_unmap_grant_ref_t *gop;
+ unsigned prod;
+
+ pfn = vmq_idx_to_pfn(vmq, idx);
+ /* Already unmapped? */
+ if (!phys_to_machine_mapping_valid(pfn))
+ return;
+
+ gop = vmq_next_unmap_gop();
+ gnttab_set_unmap_op(gop, vmq_idx_to_kaddr(vmq, idx),
+ GNTMAP_host_map,
+ vmq->buffer[idx].buf->grant_handle);
+
+ vmq->nbufs--;
+
+ set_phys_to_machine(__pa(vmq_idx_to_kaddr(vmq, idx)) >>
+ PAGE_SHIFT,
+ INVALID_P2M_ENTRY);
+ /* Ready for next use. */
+ gnttab_reset_grant_page(vmq->pages[idx]);
+ /* Add the page back to the unmapped list */
+ prod = vmq->unmapped_pages_prod;
+ vmq->unmapped_pages[VMQ_IDX_MASK(prod++)] = idx;
+ if (recycle)
+ recycle_tx_buffer(nc, vmq->buffer[idx].buf);
+ else
+ free_tx_buffer(nc, vmq->buffer[idx].buf);
+ smp_wmb();
+ vmq->unmapped_pages_prod = prod;
+}
+
+static void nc2_vmq_free_mapped_bufs(struct netchannel2 *nc)
+{
+ nc2_vmq_t *vmq = &nc->vmq;
+ unsigned int idx;
+ unsigned prod, cons;
+
+ /* The queue should be disabled before this function is called */
+ BUG_ON(vmq->vmq_state == VMQ_QUEUE_ENABLED);
+
+ cons = vmq->mapped_pages_cons;
+ prod = vmq->mapped_pages_prod;
+ smp_rmb();
+
+ while (cons != prod) {
+ idx = vmq->mapped_pages[VMQ_IDX_MASK(cons++)];
+ nc2_vmq_unmap_buf(nc, idx, 1);
+ }
+
+ vmq_flush_unmap_grants();
+
+ vmq->mapped_pages_cons = cons;
+
+}
+
+static void nc2_vmq_free_skb(struct sk_buff *skb)
+{
+ struct netchannel2 *nc;
+ nc2_vmq_t *vmq;
+ unsigned int idx;
+ int nr_frags, i;
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ skb_frag_t *frags = shinfo->frags;
+
+ nc = netdev_priv(skb->dev);
+ vmq = &nc->vmq;
+
+ nr_frags = shinfo->nr_frags;
+ for (i = 0; i < nr_frags; i++) {
+ idx = nc2_vmq_page_index(frags[i].page);
+ nc2_vmq_unmap_buf(nc, idx, 1);
+ }
+
+ vmq_flush_unmap_grants();
+
+ shinfo->frag_list = NULL;
+ shinfo->nr_frags = 0;
+
+ /* Add the skb back to the free pool */
+ skb_queue_tail(&vmq->free_skb_list, skb);
+}
+
+/* Initialize the free socket buffer list */
+static int vmq_init_free_skb_list(int n, struct sk_buff_head *free_skb_list)
+{
+ int i;
+ struct sk_buff *skb;
+
+ skb_queue_head_init(free_skb_list);
+
+ for (i = 0; i < n; i++) {
+ skb = alloc_skb(VMQ_SKB_SIZE, GFP_ATOMIC);
+ if (!skb) {
+ printk("Netchannel2 vmq: Failed to allocate socket "
+ "buffer %d (max=%d)\n", i, (int)n);
+ goto error;
+ }
+ skb_queue_tail(free_skb_list, skb);
+ }
+
+ return 0;
+error:
+ /* Free all the allocated buffers and return Error */
+ while (!skb_queue_empty(free_skb_list))
+ kfree_skb(skb_dequeue(free_skb_list));
+
+ return -1;
+}
+
+/* Initialize vmq. Return 1 if vmq is used and 0 otherwise */
+int nc2_vmq_connect(struct netchannel2 *nc)
+{
+ nc2_vmq_t *vmq = &nc->vmq;
+ struct page *page;
+ int q_id;
+ int size;
+ int i;
+
+ vmq->vmq_mode = 0;
+ vmq->pdev = read_pdev(nc->xenbus_device);
+
+ /* cannot use vmq mode if physical device not found */
+ if (!vmq->pdev)
+ return 0;
+
+ /* Allocate a RX queue */
+ q_id = vmq_alloc_queue(vmq->pdev, VMQ_TYPE_RX);
+ if (q_id < 0)
+ /* Allocation failed, cannot use multi-queue */
+ goto free_pdev;
+
+ vmq->vmq_id = q_id;
+
+ /* Set the size of the queue */
+ size = vmq_get_maxsize(vmq->pdev);
+ if (size > VMQ_QUEUE_SIZE)
+ size = VMQ_QUEUE_SIZE;
+ if (vmq_set_size(vmq->pdev, q_id, size) < 0) {
+ /* Failure, free up the queue and return error */
+ printk(KERN_ERR "%s: could not set queue size on net device\n",
+ __func__);
+ goto free_queue;
+ }
+ vmq->vmq_size = size;
+
+ /* Set the mac address of the queue */
+ if (vmq_set_mac(vmq->pdev, q_id, nc->rings.remote_mac) < 0) {
+ /* Failure, free up the queue and return error */
+ printk(KERN_ERR "%s: could not set MAC address for net device
queue\n",
+ __func__);
+ goto free_queue;
+ }
+
+ vmq->pages = alloc_empty_pages_and_pagevec(VMQ_MAX_BUFFERS);
+ if (vmq->pages == NULL) {
+ printk(KERN_ERR "%s: out of memory\n", __func__);
+ goto free_queue;
+ }
+
+ skb_queue_head_init(&vmq->dealloc_queue);
+ skb_queue_head_init(&vmq->rx_queue);
+
+ if (vmq_init_free_skb_list(VMQ_MAX_BUFFERS,
+ &vmq->free_skb_list)) {
+ printk(KERN_ERR "%s: Could not allocate free socket buffers",
+ __func__);
+ goto free_pagevec;
+ }
+
+ for (i = 0; i < VMQ_MAX_BUFFERS; i++) {
+ vmq->buffer[i].nc = nc;
+ page = vmq->pages[i];
+ SetPageForeign(page, nc2_vmq_page_release);
+ page->mapping = (void *)&vmq->buffer[i];
+ vmq->unmapped_pages[i] = i;
+ }
+
+ vmq->unmapped_pages_prod = VMQ_MAX_BUFFERS;
+ vmq->unmapped_pages_cons = 0;
+
+ vmq->mapped_pages_prod = 0;
+ vmq->mapped_pages_cons = 0;
+
+ vmq->nbufs = 0;
+ vmq->vmq_mode = 1;
+
+ /* Store the pointer to netchannel2 device in pdev */
+ BUG_ON((vmq->pdev->vmq == NULL) || (vmq->pdev->vmq->queue == NULL));
+ vmq->pdev->vmq->queue[q_id].guest = (void *)nc->net_device;
+
+ atomic_set(&vmq->refcnt, 0);
+ init_waitqueue_head(&vmq->waiting_to_free);
+
+ printk(KERN_INFO "Netchannel2 using vmq mode for guest %d\n",
+ nc->xenbus_device->otherend_id);
+
+ vmq->vmq_state = VMQ_QUEUE_STARTING;
+
+ return 1; /* Success */
+
+
+free_pagevec:
+ free_empty_pages_and_pagevec(vmq->pages, VMQ_MAX_BUFFERS);
+free_queue:
+ vmq_free_queue(vmq->pdev, vmq->vmq_id);
+free_pdev:
+ dev_put(vmq->pdev);
+ vmq->pdev = NULL;
+ return 0;
+}
+
+void nc2_vmq_shutdown(struct netchannel2 *nc)
+{
+ nc2_vmq_t *vmq = &nc->vmq;
+ int i;
+
+ if (!vmq->vmq_mode)
+ return;
+
+ /* All posted bufs should have been returned */
+ BUG_ON(nr_vmq_bufs(nc) != nr_vmq_mapped_bufs(nc));
+
+ /* free the mapped bufs */
+ nc2_vmq_free_mapped_bufs(nc);
+
+ /* Free the vmq pages */
+ if (vmq->pages) {
+ for (i = 0; i < VMQ_MAX_BUFFERS; i++) {
+ if (PageForeign(vmq->pages[i]))
+ ClearPageForeign(vmq->pages[i]);
+ vmq->pages[i]->mapping = NULL;
+ }
+ free_empty_pages_and_pagevec(vmq->pages, VMQ_MAX_BUFFERS);
+ vmq->pages = NULL;
+ }
+
+ while (!skb_queue_empty(&vmq->free_skb_list)) {
+ /* Free the socket buffer pool */
+ kfree_skb(skb_dequeue(&vmq->free_skb_list));
+ }
+ vmq->vmq_state = VMQ_QUEUE_DISABLED;
+ vmq->vmq_mode = 0;
+
+ if (vmq->pdev) {
+ dev_put(vmq->pdev);
+ vmq->pdev = NULL;
+ }
+
+ vmq_put(vmq);
+}
+
+static int prepare_xmit_allocate_vmq(struct netchannel2 *nc,
+ struct sk_buff *skb)
+{
+ unsigned msg_size;
+
+ msg_size = get_transmitted_packet_msg_size(skb);
+ if (!nc2_reserve_payload_bytes(&nc->rings.prod_ring, msg_size))
+ return -1;
+ return 0;
+}
+
+void do_vmq_work(struct netchannel2 *nc)
+{
+ nc2_vmq_t *vmq = &nc->vmq;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ /* if not in vmq mode do nothing */
+ if (!nc2_in_vmq_mode(nc))
+ return;
+
+ /* Map guest buffers for dedicated NIC RX queue if needed */
+ if (nr_vmq_bufs(nc) < VMQ_MIN_BUFFERS) {
+ nc2_vmq_map_buffers(nc);
+ /* We delay enabling the queue until we have enough
+ posted buffers. Check if it is time to enable it */
+ if (nc2_vmq_is_starting(nc) &&
+ (nr_vmq_bufs(nc) >= VMQ_MIN_BUFFERS)) {
+ nc2_vmq_enable(nc);
+ }
+ }
+
+ /* free vmq skb's returned by the physical device driver */
+ while (!skb_queue_empty(&nc->vmq.dealloc_queue))
+ nc2_vmq_free_skb(skb_dequeue(&nc->vmq.dealloc_queue));
+
+ /* complete vmq closing after all packets returned by physical
+ * device driver */
+
+ if (nc2_vmq_is_closing(nc) &&
+ (nr_vmq_bufs(nc) == nr_vmq_mapped_bufs(nc))) {
+ nc->vmq.vmq_state = VMQ_QUEUE_DISABLED;
+ nc2_vmq_shutdown(nc);
+ }
+
+ spin_lock_irqsave(&vmq->rx_queue.lock, flags);
+ while (!skb_queue_empty(&vmq->rx_queue)) {
+ skb = __skb_dequeue(&nc->vmq.rx_queue);
+ if (prepare_xmit_allocate_vmq(nc, skb) < 0) {
+ __skb_queue_head(&vmq->rx_queue, skb);
+ spin_unlock_irqrestore(&vmq->rx_queue.lock, flags);
+ return;
+ }
+ __skb_queue_tail(&nc->rings.pending_tx_queue, skb);
+ }
+ spin_unlock_irqrestore(&vmq->rx_queue.lock, flags);
+}
+
+/* Return the netchannel2 device corresponding to the given queue in pdev */
+static inline struct net_device *nc2_vmq_queue_to_vif(struct net_device *pdev,
+ int queue_id)
+{
+ net_vmq_t *n_vmq;
+ vmq_queue_t *vmq_q;
+
+ n_vmq = pdev->vmq;
+ BUG_ON(n_vmq == NULL);
+ vmq_q = &n_vmq->queue[queue_id];
+ BUG_ON(vmq_q == NULL);
+
+ return (struct net_device *)vmq_q->guest;
+}
+
+/* Handle incoming vmq packet */
+int vmq_netif_rx(struct sk_buff *skb, int queue_id)
+{
+ struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
+ struct net_device *dev;
+ struct netchannel2 *nc;
+ nc2_vmq_t *vmq;
+
+ memset(skb_co, 0, sizeof(*skb_co));
+
+ skb_co->nr_fragments = skb_shinfo(skb)->nr_frags;
+ skb_co->type = NC2_PACKET_TYPE_pre_posted;
+ skb_co->policy = transmit_policy_vmq;
+
+ /* get the netchannel2 interface corresponding to this queue */
+ dev = nc2_vmq_queue_to_vif(skb->dev, queue_id);
+ nc = netdev_priv(dev);
+ vmq = &nc->vmq;
+
+ /* replace source dev with destination dev */
+ skb->dev = dev;
+ /* add skb to rx_queue */
+ skb_queue_tail(&vmq->rx_queue, skb);
+
+ /* Trigger thread excution to procees new packets */
+ nc2_kick(&nc->rings);
+
+ return 0;
+}
+EXPORT_SYMBOL(vmq_netif_rx);
+
+
+/* Allocate a socket buffer from the free list, get a guest posted
+ * buffer, attach it to the skb, and return it.
+ */
+struct sk_buff *vmq_alloc_skb(struct net_device *netdevice, int queue_id,
+ unsigned int length)
+{
+ struct sk_buff *skb;
+ struct netchannel2 *nc;
+ nc2_vmq_t *vmq;
+ unsigned int idx;
+ int nr_bufs, i;
+ unsigned int cons;
+ unsigned int prod;
+
+ /* get the netchannel2 interface corresponding to this queue */
+ nc = netdev_priv(nc2_vmq_queue_to_vif(netdevice, queue_id));
+
+ vmq = &nc->vmq;
+
+ /* Get a free buffer from the pool */
+ if (skb_queue_empty(&vmq->free_skb_list)) {
+ /* No buffers to allocate */
+ return NULL;
+ }
+
+
+ skb = skb_dequeue(&vmq->free_skb_list);
+ BUG_ON(skb == NULL);
+
+ nr_bufs = VMQ_NUM_BUFFERS(length);
+
+ cons = vmq->mapped_pages_cons;
+ prod = vmq->mapped_pages_prod;
+ smp_rmb();
+
+ if (nr_bufs > (prod - cons))
+ /* Not enough mapped buffers in the pool */
+ goto kick_nc2;
+
+ if (nr_bufs > MAX_SKB_FRAGS)
+ goto error;
+
+ for (i = 0; i < nr_bufs; i++) {
+ idx = vmq->mapped_pages[VMQ_IDX_MASK(cons)];
+ /* FIX ME: This can be simplified */
+ skb_shinfo(skb)->frags[i].page =
+ virt_to_page(vmq_idx_to_kaddr(vmq, idx));
+ skb_shinfo(skb)->frags[i].page_offset = 0;
+ skb_shinfo(skb)->frags[i].size = PAGE_SIZE;
+ skb_shinfo(skb)->nr_frags++;
+ skb->dev = netdevice;
+ cons++;
+ }
+
+ vmq->mapped_pages_cons = cons;
+
+ /* if number of buffers get low run tasklet to map more buffers */
+ if (nr_vmq_bufs(nc) < VMQ_MIN_BUFFERS)
+ nc2_kick(&nc->rings);
+
+ return skb;
+
+kick_nc2:
+ /* kick netchannel2 interface to get any recently posted buffers */
+ nc2_kick(&nc->rings);
+error:
+ /* Add the skb back to the free pool */
+ skb_queue_tail(&vmq->free_skb_list, skb);
+ return NULL;
+}
+EXPORT_SYMBOL(vmq_alloc_skb);
+
+/* Detach the guest pages and free the socket buffer */
+void vmq_free_skb(struct sk_buff *skb, int queue_id)
+{
+ struct net_device *dev;
+ struct netchannel2 *nc;
+ nc2_vmq_t *vmq;
+
+ /* get the netchannel2 interface corresponding to this queue */
+ dev = nc2_vmq_queue_to_vif(skb->dev, queue_id);
+
+ nc = netdev_priv(dev);
+ vmq = &nc->vmq;
+
+ /* Add skb to the dealloc queue */
+ skb->dev = dev;
+ skb_queue_tail(&vmq->dealloc_queue, skb);
+
+ /* kick netchannel2 interface */
+ nc2_kick(&nc->rings);
+
+}
+EXPORT_SYMBOL(vmq_free_skb);
+
+int nc2_is_vmq_packet(struct netchannel2 *nc, struct sk_buff *skb)
+{
+ int nr_frags;
+ long idx;
+ nc2_vmq_t *vmq = &nc->vmq;
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ if (vmq->vmq_mode && nr_frags &&
+ PageForeign(skb_shinfo(skb)->frags[0].page)) {
+ idx = nc2_vmq_page_index(skb_shinfo(skb)->frags[0].page);
+ if ((idx >= 0) && (idx < VMQ_MAX_BUFFERS))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Prepare to transmit a vmq packet */
+void xmit_vmq(struct netchannel2 *nc, struct sk_buff *skb,
+ volatile void *msg_buf)
+{
+ volatile struct netchannel2_msg_packet *msg = msg_buf;
+ volatile struct netchannel2_fragment *out_frag;
+ nc2_vmq_t *vmq = &nc->vmq;
+ skb_frag_t *frag;
+ struct nc2_tx_buffer *txbuf;
+ int nr_frags;
+ unsigned int idx;
+ unsigned x;
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ for (x = 0; x < nr_frags; x++) {
+ frag = &skb_shinfo(skb)->frags[x];
+ out_frag = &msg->frags[x];
+
+ idx = nc2_vmq_page_index(frag->page);
+ txbuf = vmq->buffer[idx].buf;
+ out_frag->pre_post.id = txbuf->id;
+ out_frag->off = frag->page_offset;
+ out_frag->size = frag->size;
+ /* TODO: need to batch unmap grants */
+ nc2_vmq_unmap_buf(nc, idx, 0);
+ }
+
+ /* Avoid unmapping frags grants when skb is freed later */
+ /* by nc2_vmq_free_skb() */
+ skb_shinfo(skb)->nr_frags = 0;
+}
+
diff --git a/drivers/xen/netchannel2/vmq.h b/drivers/xen/netchannel2/vmq.h
new file mode 100644
index 0000000..fa1cc8a
--- /dev/null
+++ b/drivers/xen/netchannel2/vmq.h
@@ -0,0 +1,58 @@
+#ifndef VMQ_H__
+#define VMQ_H__
+
+#include "netchannel2_core.h"
+
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+
+int nc2_vmq_connect(struct netchannel2 *nc);
+void nc2_vmq_disconnect(struct netchannel2 *nc);
+void do_vmq_work(struct netchannel2 *nc);
+int nc2_is_vmq_packet(struct netchannel2 *nc, struct sk_buff *skb);
+void xmit_vmq(struct netchannel2 *nc, struct sk_buff *skb,
+ volatile void *msg);
+void vmq_flush_unmap_hypercall(void);
+
+#define vmq_get(_b) \
+ atomic_inc(&(_b)->refcnt);
+
+#define vmq_put(_b) \
+ do { \
+ if (atomic_dec_and_test(&(_b)->refcnt)) { \
+ wake_up(&(_b)->waiting_to_free); \
+ } \
+ } while (0)
+
+static inline int nr_vmq_mapped_bufs(struct netchannel2 *nc)
+{
+ return nc->vmq.mapped_pages_prod -
+ nc->vmq.mapped_pages_cons;
+}
+
+static inline int nr_vmq_bufs(struct netchannel2 *nc)
+{
+ return nc->vmq.nbufs;
+}
+
+static inline int nc2_in_vmq_mode(struct netchannel2 *nc)
+{
+ return nc->vmq.vmq_mode;
+}
+
+#else
+static inline int nc2_vmq_connect(struct netchannel2 *nc)
+{
+ return 0;
+}
+static inline void nc2_vmq_disconnect(struct netchannel2 *nc)
+{
+}
+static inline void do_vmq_work(struct netchannel2 *nc)
+{
+}
+static inline void vmq_flush_unmap_hypercall(void)
+{
+}
+#endif /* CONFIG_XEN_NETDEV2_VMQ */
+
+#endif /* !VMQ_H__ */
diff --git a/drivers/xen/netchannel2/vmq_def.h
b/drivers/xen/netchannel2/vmq_def.h
new file mode 100644
index 0000000..60f1ccb
--- /dev/null
+++ b/drivers/xen/netchannel2/vmq_def.h
@@ -0,0 +1,68 @@
+#ifndef VMQ_DEF_H__
+#define VMQ_DEF_H__
+
+
+/* size of HW queue in VMQ device */
+#define VMQ_QUEUE_SIZE 1024
+
+/* Mimimum amount of buffers needed for VMQ
+ * This is the lower water mark that triggers mapping more guest buffers
+ * Should be larger than the queue size to allow for in flight packets
+ */
+#define VMQ_MIN_BUFFERS 1920
+
+/* Maximum amount of posted buffers which are reserved for VMQ
+ * Should be less than MAX_POSTED_BUFFERS. For now, the difference can be used
+ * for intra-node guest to guest traffic. When we map guest buffers we try to
+ * have VMQ_MAX_BUFFERS mapped. The difference
(VMQ_MAX_BUFFERS-VMQ_MIN_BUFFERS)
+ * helps batch multiple grant map operattions
+ * VMQ_QUEUE_SIZE < VMQ_MIN_BUFFER < VMQ_MAX_BUFFER < MAX_POSTED_BUFFERS
+ * VMQ_MAX_BUFFERS must be a power of 2
+ */
+#define VMQ_MAX_BUFFERS 2048
+
+/* skb size is zero since packet data uses fragments */
+#define VMQ_SKB_SIZE 0
+
+#define VMQ_NUM_BUFFERS(len) ((len + PAGE_SIZE - 1) / PAGE_SIZE)
+
+#define VMQ_IDX_MASK(_i) ((_i)&(VMQ_MAX_BUFFERS-1))
+
+typedef struct nc2_vmq_buf {
+ struct nc2_tx_buffer *buf;
+ struct netchannel2 *nc;
+} nc2_vmq_buf_t;
+
+typedef struct nc2_vmq {
+ struct net_device *pdev; /* Pointer to physical device */
+ int vmq_mode; /* indicate if vif is in vmq mode */
+ struct page **pages; /* pages for mapping guest RX bufs */
+ struct sk_buff_head free_skb_list; /* Free socket buffer pool */
+ struct sk_buff_head dealloc_queue; /* list of skb's to be free */
+ struct sk_buff_head rx_queue; /* list of received packets */
+
+ /* guest mapped buffers */
+ nc2_vmq_buf_t buffer[VMQ_MAX_BUFFERS];
+
+ /* Ring with free pages available for mapping guest RX buffers */
+ u16 unmapped_pages[VMQ_MAX_BUFFERS];
+ unsigned int unmapped_pages_prod;
+ unsigned int unmapped_pages_cons;
+
+ /* Ring of mapped RX pages avaialable for vmq device */
+ u16 mapped_pages[VMQ_MAX_BUFFERS];
+ unsigned int mapped_pages_prod;
+ unsigned int mapped_pages_cons;
+
+ unsigned int nbufs; /* number of vmq buffers: posted to */
+ /* HW queue or available to be posted */
+ int vmq_id; /* Queue id */
+ int vmq_size; /* Queue size */
+ int vmq_state; /* queue stste */
+
+ atomic_t refcnt;
+ wait_queue_head_t waiting_to_free;
+
+} nc2_vmq_t;
+
+#endif /* !VMQ_DEF_H__ */
diff --git a/drivers/xen/netchannel2/xmit_packet.c
b/drivers/xen/netchannel2/xmit_packet.c
index 1a879aa..09827fc 100644
--- a/drivers/xen/netchannel2/xmit_packet.c
+++ b/drivers/xen/netchannel2/xmit_packet.c
@@ -3,6 +3,7 @@
#include <linux/kernel.h>
#include <linux/version.h>
#include "netchannel2_core.h"
+#include "vmq.h"
/* You don't normally want to transmit in posted buffers mode, because
grant mode is usually faster, but it's sometimes useful for testing
@@ -189,6 +190,11 @@ int nc2_really_start_xmit(struct netchannel2_ring_pair
*ncrp,
set_offload_flags(skb, msg);
switch (skb_co->policy) {
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+ case transmit_policy_vmq:
+ xmit_vmq(nc, skb, msg);
+ break;
+#endif
case transmit_policy_small:
/* Nothing to do */
break;
--
1.6.3.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|