# HG changeset patch
# User vh249@xxxxxxxxxxxxxxxxxxxx
# Node ID 7bc4ebdd56605b550cc10360dd8f748f95252f47
# Parent a49bf96419a421637aedf01735141fb207fb43f0
add grant table capabilities to netback and netfront.
Signed-off-by: Vincent Hanquez <vincent@xxxxxxxxxxxxx>
diff -r a49bf96419a4 -r 7bc4ebdd5660 xen/common/grant_table.c
--- a/xen/common/grant_table.c Mon Jul 25 09:56:50 2005
+++ b/xen/common/grant_table.c Mon Jul 25 17:07:31 2005
@@ -809,6 +809,146 @@
}
#endif
+static long
+gnttab_donate(gnttab_donate_t *uop, unsigned int count)
+{
+ struct domain *d = current->domain;
+ struct domain *e;
+ struct pfn_info *page;
+ u32 _d, _nd, x, y;
+ int i;
+ int result = GNTST_okay;
+
+ for (i = 0; i < count; i++) {
+ gnttab_donate_t *gop = &uop[i];
+#if GRANT_DEBUG
+ printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+ i, gop->mfn, gop->domid, gop->handle);
+#endif
+ page = &frame_table[gop->mfn];
+
+ if (unlikely(IS_XEN_HEAP_FRAME(page))) {
+ printk("gnttab_donate: xen heap frame mfn=%lx\n", (unsigned long)
gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely(!pfn_valid(page_to_pfn(page)))) {
+ printk("gnttab_donate: invalid pfn for mfn=%lx\n", (unsigned long)
gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
+ printk("gnttab_donate: can't find domain %d\n", gop->domid);
+ gop->status = GNTST_bad_domain;
+ continue;
+ }
+
+ spin_lock(&d->page_alloc_lock);
+
+ /*
+ * The tricky bit: atomically release ownership while
+ * there is just one benign reference to the page
+ * (PGC_allocated). If that reference disappears then the
+ * deallocation routine will safely spin.
+ */
+ _d = pickle_domptr(d);
+ _nd = page->u.inuse._domain;
+ y = page->count_info;
+ do {
+ x = y;
+ if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+ (1 | PGC_allocated)) || unlikely(_nd != _d)) {
+ printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+ " caf=%08x, taf=%08x\n", (void *) page_to_pfn(page),
+ d, d->domain_id, unpickle_domptr(_nd), x,
+ page->u.inuse.type_info);
+ spin_unlock(&d->page_alloc_lock);
+ put_domain(e);
+ return 0;
+ }
+ __asm__ __volatile__(
+ LOCK_PREFIX "cmpxchg8b %2"
+ : "=d" (_nd), "=a" (y),
+ "=m" (*(volatile u64 *)(&page->count_info))
+ : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
+ } while (unlikely(_nd != _d) || unlikely(y != x));
+
+ /*
+ * Unlink from 'd'. At least one reference remains (now
+ * anonymous), so noone else is spinning to try to delete
+ * this page from 'd'.
+ */
+ d->tot_pages--;
+ list_del(&page->list);
+
+ spin_unlock(&d->page_alloc_lock);
+
+ spin_lock(&e->page_alloc_lock);
+
+ /*
+ * Check that 'e' will accept the page and has reservation
+ * headroom. Also, a domain mustn't have PGC_allocated
+ * pages when it is dying.
+ */
+#ifdef GRANT_DEBUG
+ if (unlikely(e->tot_pages >= e->max_pages)) {
+ printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
+ e->tot_pages, e->max_pages);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags))) {
+ printk("gnttab_donate: target domain is dying\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#else
+ ASSERT(e->tot_pages <= e->max_pages);
+ if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
+ unlikely(e->tot_pages == e->max_pages) ||
+ unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: Transferee has no reservation headroom
(%d,%d), or "
+ "provided a bad grant ref (%08x), or is dying (%p).\n",
+ e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#endif
+ /* Okay, add the page to 'e'. */
+ if (unlikely(e->tot_pages++ == 0)) {
+ get_knownalive_domain(e);
+ }
+ list_add_tail(&page->list, &e->page_list);
+ page_set_owner(page, e);
+
+ spin_unlock(&e->page_alloc_lock);
+
+ /*
+ * Transfer is all done: tell the guest about its new page
+ * frame.
+ */
+ gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+
+ put_domain(e);
+
+ gop->status = GNTST_okay;
+ }
+ return result;
+}
+
long
do_grant_table_op(
unsigned int cmd, void *uop, unsigned int count)
@@ -843,6 +983,11 @@
rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
break;
#endif
+ case GNTTABOP_donate:
+ if (unlikely(!array_access_ok(uop, count, sizeof(gnttab_donate_t))))
+ goto out;
+ rc = gnttab_donate(uop, count);
+ break;
default:
rc = -ENOSYS;
break;
diff -r a49bf96419a4 -r 7bc4ebdd5660 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h Mon Jul 25 09:56:50 2005
+++ b/xen/include/public/grant_table.h Mon Jul 25 17:07:31 2005
@@ -213,6 +213,19 @@
s16 status; /* GNTST_* */
} gnttab_dump_table_t;
+/*
+ * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain. The
+ * foreign domain has previously registered the details of the transfer.
+ * These can be identified from <handle>, a grant reference.
+ */
+#define GNTTABOP_donate 4
+typedef struct {
+ memory_t mfn; /* 0 */
+ domid_t domid; /* 4 */
+ u16 handle; /* 8 */
+ s16 status; /* 10: GNTST_* */
+ u32 __pad;
+} gnttab_donate_t; /* 14 bytes */
/*
* Bitfield values for update_pin_status.flags.
diff -r a49bf96419a4 -r 7bc4ebdd5660 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig Mon Jul 25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig Mon Jul 25 17:07:31 2005
@@ -96,6 +96,20 @@
network interfaces within another guest OS. Unless you are building a
dedicated device-driver domain, or your master control domain
(domain 0), then you almost certainly want to say Y here.
+
+config XEN_NETDEV_GRANT_TX
+ bool "Grant table substrate for net drivers tx path (DANGEROUS)"
+ default n
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
+
+config XEN_NETDEV_GRANT_RX
+ bool "Grant table substrate for net drivers rx path (DANGEROUS)"
+ default n
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
bool "Pipelined transmitter (DANGEROUS)"
diff -r a49bf96419a4 -r 7bc4ebdd5660
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Mon Jul
25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Mon Jul
25 17:07:31 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.12-xen0
-# Sat Jul 9 09:19:47 2005
+# Mon Jul 25 09:48:34 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -18,6 +18,8 @@
CONFIG_XEN_NETDEV_BACKEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_GRANT_TX is not set
+# CONFIG_XEN_NETDEV_GRANT_RX is not set
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
@@ -176,38 +178,12 @@
# PCI Hotplug Support
#
# CONFIG_HOTPLUG_PCI is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_PREEMPT is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_HIGHMEM is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_FRAME_POINTER is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_4KSTACKS is not set
-CONFIG_X86_FIND_SMP_CONFIG=y
-CONFIG_X86_MPPARSE=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_PC=y
CONFIG_SECCOMP=y
+CONFIG_EARLY_PRINTK=y
#
# Executable file formats
@@ -1274,3 +1250,29 @@
CONFIG_CRC32=y
CONFIG_LIBCRC32C=y
CONFIG_ZLIB_INFLATE=y
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
diff -r a49bf96419a4 -r 7bc4ebdd5660
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Mon Jul 25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Mon Jul 25 17:07:31 2005
@@ -166,8 +166,14 @@
u16 flags;
flags = shared[ref].flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ /*
+ * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
+ * if gnttab_donate executes without interruption???
+ */
+#else
ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-
+#endif
/*
* If a transfer is committed then wait for the frame address to appear.
* Otherwise invalidate the grant entry against future use.
diff -r a49bf96419a4 -r 7bc4ebdd5660
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Jul 25
09:56:50 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Jul 25
17:07:31 2005
@@ -18,6 +18,24 @@
#include <linux/delay.h>
#endif
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, unsigned char *p)
+{
+ int i;
+
+ printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
static void make_tx_response(netif_t *netif,
@@ -41,7 +59,9 @@
static struct sk_buff_head rx_queue;
static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
+#endif
static unsigned char rx_notify[NR_EVENT_CHANNELS];
/* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -68,7 +88,20 @@
static PEND_RING_IDX dealloc_prod, dealloc_cons;
static struct sk_buff_head tx_queue;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static u16 grant_tx_ref[MAX_PENDING_REQS];
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
+#endif
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
static struct list_head net_schedule_list;
static spinlock_t net_schedule_list_lock;
@@ -91,6 +124,7 @@
return mfn;
}
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static void free_mfn(unsigned long mfn)
{
unsigned long flags;
@@ -102,6 +136,7 @@
BUG();
spin_unlock_irqrestore(&mfn_lock, flags);
}
+#endif
static inline void maybe_schedule_tx_action(void)
{
@@ -160,7 +195,17 @@
dev_kfree_skb(skb);
skb = nskb;
}
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef DEBUG_GRANT
+ printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x
gr=%04x\n",
+ netif->rx->req_prod,
+ netif->rx_req_cons,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
+#endif
+#endif
netif->rx_req_cons++;
netif_get(netif);
@@ -201,7 +246,11 @@
u16 size, id, evtchn;
multicall_entry_t *mcl;
mmu_update_t *mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_donate_t *gop;
+#else
struct mmuext_op *mmuext;
+#endif
unsigned long vdata, mdata, new_mfn;
struct sk_buff_head rxq;
struct sk_buff *skb;
@@ -212,7 +261,12 @@
mcl = rx_mcl;
mmu = rx_mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
+
while ( (skb = skb_dequeue(&rx_queue)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -228,7 +282,6 @@
skb_queue_head(&rx_queue, skb);
break;
}
-
/*
* Set the new P2M table entry before reassigning the old data page.
* Heed the comment in pgtable-2level.h:pte_page(). :-)
@@ -239,6 +292,14 @@
pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop->mfn = mdata >> PAGE_SHIFT;
+ gop->domid = netif->domid;
+ gop->handle = netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+ netif->rx_resp_prod_copy++;
+ gop++;
+#else
mcl->op = __HYPERVISOR_mmuext_op;
mcl->args[0] = (unsigned long)mmuext;
mcl->args[1] = 1;
@@ -249,13 +310,16 @@
mmuext->cmd = MMUEXT_REASSIGN_PAGE;
mmuext->mfn = mdata >> PAGE_SHIFT;
mmuext++;
-
+#endif
mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu->val = __pa(vdata) >> PAGE_SHIFT;
mmu++;
__skb_queue_tail(&rxq, skb);
+#ifdef DEBUG_GRANT
+ dump_packet('a', mdata, vdata);
+#endif
/* Filled the batch queue? */
if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
break;
@@ -271,12 +335,24 @@
mcl->args[3] = DOMID_SELF;
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+#else
mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+#endif
if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
BUG();
mcl = rx_mcl;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
+ grant_rx_op, gop - grant_rx_op))) {
+ BUG();
+ }
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
while ( (skb = __skb_dequeue(&rxq)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -284,9 +360,12 @@
/* Rederive the machine addresses. */
new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mdata = (unsigned long)skb->data & ~PAGE_MASK;
+#else
mdata = ((mmuext[0].mfn << PAGE_SHIFT) |
((unsigned long)skb->data & ~PAGE_MASK));
-
+#endif
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
@@ -299,13 +378,16 @@
/* Check the reassignment error code. */
status = NETIF_RSP_OKAY;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ BUG_ON(gop->status != 0);
+#else
if ( unlikely(mcl[1].result != 0) )
{
DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
free_mfn(mdata >> PAGE_SHIFT);
status = NETIF_RSP_ERROR;
}
-
+#endif
evtchn = netif->evtchn;
id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
if ( make_rx_response(netif, id, status, mdata,
@@ -318,9 +400,13 @@
netif_put(netif);
dev_kfree_skb(skb);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl++;
+ gop++;
+#else
mcl += 2;
mmuext += 1;
+#endif
}
while ( notify_nr != 0 )
@@ -404,6 +490,7 @@
netif_schedule_work(netif);
}
+/* Called after netfront has transmitted */
static void net_tx_action(unsigned long unused)
{
struct list_head *ent;
@@ -412,13 +499,40 @@
netif_tx_request_t txreq;
u16 pending_idx;
NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_unmap_grant_ref_t unmap_ops[MAX_PENDING_REQS];
+ gnttab_unmap_grant_ref_t *gop;
+
+ gnttab_map_grant_ref_t map_ops[MAX_PENDING_REQS];
+ gnttab_map_grant_ref_t *mop;
+#else
multicall_entry_t *mcl;
+#endif
PEND_RING_IDX dc, dp;
unsigned int data_len;
+
if ( (dc = dealloc_cons) == (dp = dealloc_prod) )
goto skip_dealloc;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ /*
+ * Free up any grants we have finished using
+ */
+ gop = unmap_ops;
+ while (dc != dp) {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+ gop->host_virt_addr = MMAP_VADDR(pending_idx);
+ gop->dev_bus_addr = 0;
+ gop->handle = grant_tx_ref[pending_idx];
+ grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+ gop++;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ unmap_ops, gop - unmap_ops))) {
+ BUG();
+ }
+#else
mcl = tx_mcl;
while ( dc != dp )
{
@@ -433,10 +547,13 @@
BUG();
mcl = tx_mcl;
+#endif
while ( dealloc_cons != dp )
{
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
/* The update_va_mapping() must not fail. */
BUG_ON(mcl[0].result != 0);
+#endif
pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
@@ -460,11 +577,17 @@
netif_put(netif);
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
mcl++;
+#endif
}
skip_dealloc:
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ mop = map_ops;
+#else
mcl = tx_mcl;
+#endif
while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
!list_empty(&net_schedule_list) )
{
@@ -486,7 +609,6 @@
rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
sizeof(txreq));
-
/* Credit-based scheduling. */
if ( txreq.size > netif->remaining_credit )
{
@@ -566,13 +688,20 @@
/* Packets passed to netif_rx() must have some headroom. */
skb_reserve(skb, 16);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ mop->host_virt_addr = MMAP_VADDR(pending_idx);
+ mop->dom = netif->domid;
+ mop->ref = txreq.addr >> PAGE_SHIFT;
+ mop->flags = GNTMAP_host_map | GNTMAP_readonly;
+ mop++;
+#else
MULTI_update_va_mapping_otherdomain(
mcl, MMAP_VADDR(pending_idx),
pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL),
0, netif->domid);
mcl++;
+#endif
memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
pending_tx_info[pending_idx].netif = netif;
@@ -582,11 +711,26 @@
pending_cons++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((mop - map_ops) >= ARRAY_SIZE(map_ops))
+ break;
+#else
/* Filled the batch queue? */
if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
break;
- }
-
+#endif
+ }
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (mop == map_ops) {
+ return;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ map_ops, mop - map_ops))) {
+ BUG();
+ }
+ mop = map_ops;
+#else
if ( mcl == tx_mcl )
return;
@@ -594,6 +738,7 @@
BUG();
mcl = tx_mcl;
+#endif
while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
{
pending_idx = *((u16 *)skb->data);
@@ -601,6 +746,20 @@
memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
/* Check the remap error code. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (unlikely(mop->dev_bus_addr == 0)) {
+ printk(KERN_ALERT "#### netback grant fails\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ kfree_skb(skb);
+ mop++;
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ continue;
+ }
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+ FOREIGN_FRAME(mop->dev_bus_addr);
+ grant_tx_ref[pending_idx] = mop->handle;
+#else
if ( unlikely(mcl[0].result != 0) )
{
DPRINTK("Bad page frame\n");
@@ -614,6 +773,7 @@
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
+#endif
data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
@@ -621,7 +781,6 @@
memcpy(skb->data,
(void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
data_len);
-
if ( data_len < txreq.size )
{
/* Append the packet payload as a fragment. */
@@ -655,7 +814,11 @@
netif_rx(skb);
netif->dev->last_rx = jiffies;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ mop++;
+#else
mcl++;
+#endif
}
}
@@ -775,6 +938,12 @@
return 0;
printk("Initialising Xen netif backend\n");
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ printk("#### netback tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ printk("#### netback rx using grant tables\n");
+#endif
/* We can increase reservation by this much in net_rx_action(). */
balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
diff -r a49bf96419a4 -r 7bc4ebdd5660
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jul 25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jul 25 17:07:31 2005
@@ -50,6 +50,9 @@
/* Private indexes into shared ring. */
NETIF_RING_IDX rx_req_cons;
NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
+#endif
NETIF_RING_IDX tx_req_cons;
NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
diff -r a49bf96419a4 -r 7bc4ebdd5660 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h Mon Jul 25 09:56:50 2005
+++ b/xen/include/public/io/netif.h Mon Jul 25 17:07:31 2005
@@ -23,10 +23,17 @@
typedef struct {
u16 id; /* Echoed in response message. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_ref_t gref; /* 2: Reference to incoming granted frame */
+#endif
} netif_rx_request_t;
typedef struct {
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ u32 addr; /* 0: Offset in page of start of received packet */
+#else
memory_t addr; /* Machine address of packet. */
+#endif
u16 csum_valid:1; /* Protocol checksum is validated? */
u16 id:15;
s16 status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
diff -r a49bf96419a4 -r 7bc4ebdd5660
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Jul 25
09:56:50 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Jul 25
17:07:31 2005
@@ -54,6 +54,25 @@
#include <asm/page.h>
#include <asm/uaccess.h>
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, u32 ap)
+{
+ unsigned char *p = (unsigned char *)ap;
+ int i;
+
+ printk(KERN_ALERT "#### rx_poll %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
#ifndef __GFP_NOWARN
#define __GFP_NOWARN 0
#endif
@@ -82,6 +101,21 @@
#define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+static domid_t rdomid = 0;
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
+
static void network_tx_buf_gc(struct net_device *dev);
static void network_alloc_rx_buffers(struct net_device *dev);
@@ -322,6 +356,14 @@
for (i = np->tx_resp_cons; i != prod; i++) {
id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
skb = np->tx_skbs[id];
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
+ printk(KERN_ALERT "netfront: query foreign access\n");
+ }
+ gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
+ grant_tx_ref[id] = GRANT_INVALID_REF;
+#endif
ADD_ID_TO_FREELIST(np->tx_skbs, id);
dev_kfree_skb_irq(skb);
}
@@ -356,6 +398,9 @@
struct sk_buff *skb;
int i, batch_target;
NETIF_RING_IDX req_prod = np->rx->req_prod;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ int ref;
+#endif
if (unlikely(np->backend_state != BEST_CONNECTED))
return;
@@ -388,7 +433,16 @@
np->rx_skbs[id] = skb;
np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if ((ref = gnttab_claim_grant_reference(&gref_rx_head,
gref_rx_terminal)) < 0) {
+ printk(KERN_ALERT "#### netfront can't claim rx reference\n");
+ BUG();
+ }
+ grant_rx_ref[id] = ref;
+ gnttab_grant_foreign_transfer_ref(ref, rdomid,
+ virt_to_machine(skb->head) >> PAGE_SHIFT);
+ np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+#endif
rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
/* Remove this page from pseudo phys map before passing back to Xen. */
@@ -436,6 +490,10 @@
struct net_private *np = netdev_priv(dev);
netif_tx_request_t *tx;
NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ unsigned int ref;
+ unsigned long mfn;
+#endif
if (unlikely(np->tx_full)) {
printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
@@ -470,7 +528,18 @@
tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
tx->id = id;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal))
< 0) {
+ printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
+ BUG();
+ }
+ mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+ gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
+ tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+ grant_tx_ref[id] = ref;
+#else
tx->addr = virt_to_machine(skb->data);
+#endif
tx->size = skb->len;
tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
@@ -530,6 +599,10 @@
int work_done, budget, more_to_do = 1;
struct sk_buff_head rxq;
unsigned long flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ unsigned long mfn;
+ grant_ref_t ref;
+#endif
spin_lock(&np->rx_lock);
@@ -542,7 +615,6 @@
if ((budget = *pbudget) > dev->quota)
budget = dev->quota;
-
rp = np->rx->resp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
@@ -550,7 +622,6 @@
(i != rp) && (work_done < budget);
i++, work_done++) {
rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-
/*
* An error here is very odd. Usually indicates a backend bug,
* low-memory condition, or that we didn't have reservation headroom.
@@ -565,11 +636,23 @@
continue;
}
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ ref = grant_rx_ref[rx->id];
+ grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+
+ mfn = gnttab_end_foreign_transfer(ref);
+ gnttab_release_grant_reference(&gref_rx_head, ref);
+#endif
+
skb = np->rx_skbs[rx->id];
ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
/* NB. We handle skb overflow later. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ skb->data = skb->head + rx->addr;
+#else
skb->data = skb->head + (rx->addr & ~PAGE_MASK);
+#endif
skb->len = rx->status;
skb->tail = skb->data + skb->len;
@@ -580,16 +663,32 @@
np->stats.rx_bytes += rx->status;
/* Remap the page. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
+#else
mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+#endif
mmu->val = __pa(skb->head) >> PAGE_SHIFT;
mmu++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
+ pfn_pte_ma(mfn, PAGE_KERNEL), 0);
+#else
MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
pfn_pte_ma(rx->addr >> PAGE_SHIFT,
PAGE_KERNEL), 0);
+#endif
mcl++;
phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mfn;
+#else
rx->addr >> PAGE_SHIFT;
-
+#endif
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll enqueue vdata=%08x mfn=%08x
ref=%04x\n",
+ skb->data, mfn, ref);
+#endif
__skb_queue_tail(&rxq, skb);
}
@@ -608,6 +707,11 @@
}
while ((skb = __skb_dequeue(&rxq)) != NULL) {
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll dequeue vdata=%08x mfn=%08x\n",
+ skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+ dump_packet('d', skb->data, (unsigned long)skb->data);
+#endif
/*
* Enough room in skbuff for the data we were passed? Also, Linux
* expects at least 16 bytes headroom in each receive buffer.
@@ -615,6 +719,7 @@
if (unlikely(skb->tail > skb->end) ||
unlikely((skb->data - skb->head) < 16)) {
nskb = NULL;
+
/* Only copy the packet if it fits in the current MTU. */
if (skb->len <= (dev->mtu + ETH_HLEN)) {
@@ -646,7 +751,6 @@
/* Set the shared-info area, which is hidden behind the real data. */
init_skb_shinfo(skb);
-
/* Ethernet-specific work. Delayed to here as it peeks the header. */
skb->protocol = eth_type_trans(skb, dev);
@@ -919,6 +1023,9 @@
network_connect(dev, status);
np->evtchn = status->evtchn;
np->irq = bind_evtchn_to_irq(np->evtchn);
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+ rdomid = status->domid;
+#endif
(void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
netctrl_connected_count();
(void)send_fake_arp(dev);
@@ -962,10 +1069,18 @@
np->rx_max_target = RX_MAX_TARGET;
/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
- for (i = 0; i <= NETIF_TX_RING_SIZE; i++)
+ for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
np->tx_skbs[i] = (void *)((unsigned long) i+1);
- for (i = 0; i <= NETIF_RX_RING_SIZE; i++)
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ grant_tx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
+ for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
np->rx_skbs[i] = (void *)((unsigned long) i+1);
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_rx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
dev->open = network_open;
dev->hard_start_xmit = network_start_xmit;
@@ -1267,6 +1382,22 @@
if (xen_start_info.flags & SIF_INITDOMAIN)
return 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+ &gref_tx_head, &gref_tx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+ &gref_rx_head, &gref_rx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront rx using grant tables\n");
+#endif
if ((err = xennet_proc_init()) != 0)
return err;
@@ -1284,6 +1415,16 @@
DPRINTK("< err=%d\n", err);
return err;
+}
+
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
+#endif
}
static void vif_suspend(struct net_private *np)
@@ -1478,3 +1619,4 @@
#endif
module_init(netif_init);
+module_exit(netif_exit);
diff -r a49bf96419a4 -r 7bc4ebdd5660
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Mon Jul
25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Mon Jul
25 17:07:31 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.12-xenU
-# Sun Jul 10 17:32:04 2005
+# Mon Jul 25 10:06:06 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -15,6 +15,8 @@
CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_GRANT_TX is not set
+# CONFIG_XEN_NETDEV_GRANT_RX is not set
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|