On Thu, Jun 30, 2005 at 08:28:54PM -0600, Matt Chapman wrote:
> I'm currently looking at getting domU networking working
> on IA64, and to do this I need to make netback/netfront
> use grant tables.
>
> I'm told that there's already a patch floating around,
> can someone tell me where to find it?
Hi Matt,
Here is the patch for having grant tables with netback and netfront.
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/Kconfig
b/linux-2.6.11-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-06-02 15:02:46 +01:00
@@ -97,6 +97,20 @@
dedicated device-driver domain, or your master control domain
(domain 0), then you almost certainly want to say Y here.
+config XEN_NETDEV_GRANT_TX
+ bool "Grant table substrate for net drivers tx path (DANGEROUS)"
+ default y
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
+
+config XEN_NETDEV_GRANT_RX
+ bool "Grant table substrate for net drivers rx path (DANGEROUS)"
+ default y
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
+
config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
bool "Pipelined transmitter (DANGEROUS)"
depends on XEN_NETDEV_FRONTEND
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 2005-06-02 15:02:46
+01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 2005-06-02 15:02:46
+01:00
@@ -165,8 +165,14 @@
u16 flags;
flags = shared[ref].flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ /*
+ * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
+ * if gnttab_donate executes without interruption???
+ */
+#else
ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-
+#endif
/*
* If a transfer is committed then wait for the frame address to appear.
* Otherwise invalidate the grant entry against future use.
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h
b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h 2005-06-02
15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h 2005-06-02
15:02:46 +01:00
@@ -50,6 +50,9 @@
/* Private indexes into shared ring. */
NETIF_RING_IDX rx_req_cons;
NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
+#endif
NETIF_RING_IDX tx_req_cons;
NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c 2005-06-02
15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c 2005-06-02
15:02:46 +01:00
@@ -18,6 +18,24 @@
#include <linux/delay.h>
#endif
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, unsigned char *p)
+{
+ int i;
+
+ printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
static void make_tx_response(netif_t *netif,
@@ -41,7 +59,9 @@
static struct sk_buff_head rx_queue;
static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
+#endif
static unsigned char rx_notify[NR_EVENT_CHANNELS];
/* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -68,7 +88,21 @@
static PEND_RING_IDX dealloc_prod, dealloc_cons;
static struct sk_buff_head tx_queue;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static gnttab_op_t grant_tx_op[MAX_PENDING_REQS];
+static u16 grant_tx_ref[MAX_PENDING_REQS];
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_op_t grant_rx_op[MAX_PENDING_REQS];
+#endif
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
static struct list_head net_schedule_list;
static spinlock_t net_schedule_list_lock;
@@ -91,6 +125,7 @@
return mfn;
}
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static void free_mfn(unsigned long mfn)
{
unsigned long flags;
@@ -102,6 +137,7 @@
BUG();
spin_unlock_irqrestore(&mfn_lock, flags);
}
+#endif
static inline void maybe_schedule_tx_action(void)
{
@@ -160,7 +196,17 @@
dev_kfree_skb(skb);
skb = nskb;
}
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef DEBUG_GRANT
+ printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x
gr=%04x\n",
+ netif->rx->req_prod,
+ netif->rx_req_cons,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
+#endif
+#endif
netif->rx_req_cons++;
netif_get(netif);
@@ -201,7 +247,11 @@
u16 size, id, evtchn;
multicall_entry_t *mcl;
mmu_update_t *mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_op_t *gop;
+#else
struct mmuext_op *mmuext;
+#endif
unsigned long vdata, mdata, new_mfn;
struct sk_buff_head rxq;
struct sk_buff *skb;
@@ -212,7 +262,12 @@
mcl = rx_mcl;
mmu = rx_mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
+
while ( (skb = skb_dequeue(&rx_queue)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -228,7 +283,6 @@
skb_queue_head(&rx_queue, skb);
break;
}
-
/*
* Set the new P2M table entry before reassigning the old data page.
* Heed the comment in pgtable-2level.h:pte_page(). :-)
@@ -241,6 +295,14 @@
mcl->args[2] = 0;
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop->u.donate.mfn = mdata >> PAGE_SHIFT;
+ gop->u.donate.domid = netif->domid;
+ gop->u.donate.handle = netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+ netif->rx_resp_prod_copy++;
+ gop++;
+#else
mcl->op = __HYPERVISOR_mmuext_op;
mcl->args[0] = (unsigned long)mmuext;
mcl->args[1] = 1;
@@ -251,13 +313,16 @@
mmuext->cmd = MMUEXT_REASSIGN_PAGE;
mmuext->mfn = mdata >> PAGE_SHIFT;
mmuext++;
-
+#endif
mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu->val = __pa(vdata) >> PAGE_SHIFT;
mmu++;
__skb_queue_tail(&rxq, skb);
+#ifdef DEBUG_GRANT
+ dump_packet('a', mdata, vdata);
+#endif
/* Filled the batch queue? */
if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
break;
@@ -273,12 +338,24 @@
mcl->args[3] = DOMID_SELF;
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl[-2].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#else
mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#endif
if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
BUG();
mcl = rx_mcl;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
+ grant_rx_op, gop - grant_rx_op))) {
+ BUG();
+ }
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
while ( (skb = __skb_dequeue(&rxq)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -286,9 +363,12 @@
/* Rederive the machine addresses. */
new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mdata = (unsigned long)skb->data & ~PAGE_MASK;
+#else
mdata = ((mmuext[0].mfn << PAGE_SHIFT) |
((unsigned long)skb->data & ~PAGE_MASK));
-
+#endif
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
@@ -302,13 +382,18 @@
/* Check the reassignment error code. */
status = NETIF_RSP_OKAY;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (unlikely(gop->u.donate.status != 0)) {
+ BUG();
+ }
+#else
if ( unlikely(mcl[1].result != 0) )
{
DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
free_mfn(mdata >> PAGE_SHIFT);
status = NETIF_RSP_ERROR;
}
-
+#endif
evtchn = netif->evtchn;
id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
if ( make_rx_response(netif, id, status, mdata,
@@ -321,9 +406,13 @@
netif_put(netif);
dev_kfree_skb(skb);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl++;
+ gop++;
+#else
mcl += 2;
mmuext += 1;
+#endif
}
while ( notify_nr != 0 )
@@ -407,6 +496,7 @@
netif_schedule_work(netif);
}
+/* Called after netfront has transmitted */
static void net_tx_action(unsigned long unused)
{
struct list_head *ent;
@@ -415,13 +505,36 @@
netif_tx_request_t txreq;
u16 pending_idx;
NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_op_t *gop;
+#else
multicall_entry_t *mcl;
+#endif
PEND_RING_IDX dc, dp;
unsigned int data_len;
+
if ( (dc = dealloc_cons) == (dp = dealloc_prod) )
goto skip_dealloc;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ /*
+ * Free up any grants we have finished using
+ */
+ gop = grant_tx_op;
+ while (dc != dp) {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+ gop->u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+ gop->u.unmap_grant_ref.dev_bus_addr = 0;
+ gop->u.unmap_grant_ref.handle = grant_tx_ref[pending_idx];
+ grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+ gop++;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ grant_tx_op, gop - grant_tx_op))) {
+ BUG();
+ }
+#else
mcl = tx_mcl;
while ( dc != dp )
{
@@ -438,11 +551,14 @@
BUG();
mcl = tx_mcl;
+#endif
while ( dealloc_cons != dp )
{
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
/* The update_va_mapping() must not fail. */
if ( unlikely(mcl[0].result != 0) )
BUG();
+#endif
pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
@@ -466,11 +582,17 @@
netif_put(netif);
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
mcl++;
+#endif
}
skip_dealloc:
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gop = grant_tx_op;
+#else
mcl = tx_mcl;
+#endif
while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
!list_empty(&net_schedule_list) )
{
@@ -492,7 +614,6 @@
rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
sizeof(txreq));
-
/* Credit-based scheduling. */
if ( txreq.size > netif->remaining_credit )
{
@@ -572,13 +693,20 @@
/* Packets passed to netif_rx() must have some headroom. */
skb_reserve(skb, 16);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gop->u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+ gop->u.map_grant_ref.dom = netif->domid;
+ gop->u.map_grant_ref.ref = txreq.addr >> PAGE_SHIFT;
+ gop->u.map_grant_ref.flags = GNTMAP_host_map | GNTMAP_readonly;
+ gop++;
+#else
mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain;
mcl[0].args[0] = MMAP_VADDR(pending_idx);
mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL;
mcl[0].args[2] = 0;
mcl[0].args[3] = netif->domid;
mcl++;
+#endif
memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
pending_tx_info[pending_idx].netif = netif;
@@ -588,11 +716,26 @@
pending_cons++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((gop - grant_tx_op) >= ARRAY_SIZE(grant_tx_op))
+ break;
+#else
/* Filled the batch queue? */
if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
break;
+#endif
}
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gop == grant_tx_op) {
+ return;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ grant_tx_op, gop - grant_tx_op))) {
+ BUG();
+ }
+ gop = grant_tx_op;
+#else
if ( mcl == tx_mcl )
return;
@@ -600,6 +743,7 @@
BUG();
mcl = tx_mcl;
+#endif
while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
{
pending_idx = *((u16 *)skb->data);
@@ -607,6 +751,20 @@
memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
/* Check the remap error code. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (unlikely(gop->u.map_grant_ref.dev_bus_addr == 0)) {
+ printk(KERN_ALERT "#### netback grant fails\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ kfree_skb(skb);
+ gop++;
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ continue;
+ }
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+ FOREIGN_FRAME(gop->u.map_grant_ref.dev_bus_addr);
+ grant_tx_ref[pending_idx] = gop->u.map_grant_ref.handle;
+#else
if ( unlikely(mcl[0].result != 0) )
{
DPRINTK("Bad page frame\n");
@@ -620,6 +778,7 @@
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
+#endif
data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
@@ -627,7 +786,6 @@
memcpy(skb->data,
(void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
data_len);
-
if ( data_len < txreq.size )
{
/* Append the packet payload as a fragment. */
@@ -661,7 +819,11 @@
netif_rx(skb);
netif->dev->last_rx = jiffies;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gop++;
+#else
mcl++;
+#endif
}
}
@@ -781,6 +943,12 @@
return 0;
printk("Initialising Xen netif backend\n");
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ printk("#### netback tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ printk("#### netback rx using grant tables\n");
+#endif
/* We can increase reservation by this much in net_rx_action(). */
balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 2005-06-02
15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 2005-06-02
15:02:46 +01:00
@@ -54,6 +54,25 @@
#include <asm/page.h>
#include <asm/uaccess.h>
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, u32 ap)
+{
+ unsigned char *p = (unsigned char *)ap;
+ int i;
+
+ printk(KERN_ALERT "#### rx_poll %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
#ifndef __GFP_NOWARN
#define __GFP_NOWARN 0
#endif
@@ -82,6 +101,21 @@
#define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+static domid_t rdomid = 0;
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
+
static void network_tx_buf_gc(struct net_device *dev);
static void network_alloc_rx_buffers(struct net_device *dev);
@@ -322,6 +356,14 @@
for (i = np->tx_resp_cons; i != prod; i++) {
id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
skb = np->tx_skbs[id];
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
+ printk(KERN_ALERT "netfront: query foreign access\n");
+ }
+ gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
+ grant_tx_ref[id] = GRANT_INVALID_REF;
+#endif
ADD_ID_TO_FREELIST(np->tx_skbs, id);
dev_kfree_skb_irq(skb);
}
@@ -356,6 +398,9 @@
struct sk_buff *skb;
int i, batch_target;
NETIF_RING_IDX req_prod = np->rx->req_prod;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ int ref;
+#endif
if (unlikely(np->backend_state != BEST_CONNECTED))
return;
@@ -388,7 +433,16 @@
np->rx_skbs[id] = skb;
np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if ((ref = gnttab_claim_grant_reference(&gref_rx_head,
gref_rx_terminal)) < 0) {
+ printk(KERN_ALERT "#### netfront can't claim rx reference\n");
+ BUG();
+ }
+ grant_rx_ref[id] = ref;
+ gnttab_grant_foreign_transfer_ref(ref, rdomid,
+ virt_to_machine(skb->head) >> PAGE_SHIFT);
+ np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+#endif
rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
/* Remove this page from pseudo phys map before passing back to Xen. */
@@ -438,6 +492,10 @@
struct net_private *np = netdev_priv(dev);
netif_tx_request_t *tx;
NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ unsigned int ref;
+ unsigned long mfn;
+#endif
if (unlikely(np->tx_full)) {
printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
@@ -472,7 +530,18 @@
tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
tx->id = id;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal))
< 0) {
+ printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
+ BUG();
+ }
+ mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+ gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
+ tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+ grant_tx_ref[id] = ref;
+#else
tx->addr = virt_to_machine(skb->data);
+#endif
tx->size = skb->len;
tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
@@ -532,6 +601,10 @@
int work_done, budget, more_to_do = 1;
struct sk_buff_head rxq;
unsigned long flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ unsigned long mfn;
+ grant_ref_t ref;
+#endif
spin_lock(&np->rx_lock);
@@ -544,7 +617,6 @@
if ((budget = *pbudget) > dev->quota)
budget = dev->quota;
-
rp = np->rx->resp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
@@ -552,7 +624,6 @@
(i != rp) && (work_done < budget);
i++, work_done++) {
rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-
/*
* An error here is very odd. Usually indicates a backend bug,
* low-memory condition, or that we didn't have reservation headroom.
@@ -567,11 +638,23 @@
continue;
}
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ ref = grant_rx_ref[rx->id];
+ grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+
+ mfn = gnttab_end_foreign_transfer(ref);
+ gnttab_release_grant_reference(&gref_rx_head, ref);
+#endif
+
skb = np->rx_skbs[rx->id];
ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
/* NB. We handle skb overflow later. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ skb->data = skb->head + rx->addr;
+#else
skb->data = skb->head + (rx->addr & ~PAGE_MASK);
+#endif
skb->len = rx->status;
skb->tail = skb->data + skb->len;
@@ -582,18 +665,33 @@
np->stats.rx_bytes += rx->status;
/* Remap the page. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
+#else
mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+#endif
mmu->val = __pa(skb->head) >> PAGE_SHIFT;
mmu++;
mcl->op = __HYPERVISOR_update_va_mapping;
mcl->args[0] = (unsigned long)skb->head;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl->args[1] = (mfn << PAGE_SHIFT) | __PAGE_KERNEL;
+#else
mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
+#endif
mcl->args[2] = 0;
mcl++;
phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mfn;
+#else
rx->addr >> PAGE_SHIFT;
-
+#endif
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll enqueue vdata=%08x mfn=%08x
ref=%04x\n",
+ skb->data, mfn, ref);
+#endif
__skb_queue_tail(&rxq, skb);
}
@@ -612,6 +710,11 @@
}
while ((skb = __skb_dequeue(&rxq)) != NULL) {
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll dequeue vdata=%08x mfn=%08x\n",
+ skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+ dump_packet('d', skb->data, (unsigned long)skb->data);
+#endif
/*
* Enough room in skbuff for the data we were passed? Also, Linux
* expects at least 16 bytes headroom in each receive buffer.
@@ -620,6 +723,7 @@
unlikely((skb->data - skb->head) < 16)) {
nskb = NULL;
+
/* Only copy the packet if it fits in the current MTU. */
if (skb->len <= (dev->mtu + ETH_HLEN)) {
if ((skb->tail > skb->end) && net_ratelimit())
@@ -650,7 +754,6 @@
/* Set the shared-info area, which is hidden behind the real data. */
init_skb_shinfo(skb);
-
/* Ethernet-specific work. Delayed to here as it peeks the header. */
skb->protocol = eth_type_trans(skb, dev);
@@ -923,6 +1026,9 @@
network_connect(dev, status);
np->evtchn = status->evtchn;
np->irq = bind_evtchn_to_irq(np->evtchn);
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+ rdomid = status->domid;
+#endif
(void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
netctrl_connected_count();
(void)send_fake_arp(dev);
@@ -966,10 +1072,18 @@
np->rx_max_target = RX_MAX_TARGET;
/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
- for (i = 0; i <= NETIF_TX_RING_SIZE; i++)
+ for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
np->tx_skbs[i] = (void *)(i+1);
- for (i = 0; i <= NETIF_RX_RING_SIZE; i++)
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ grant_tx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
+ for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
np->rx_skbs[i] = (void *)(i+1);
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_rx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
dev->open = network_open;
dev->hard_start_xmit = network_start_xmit;
@@ -1271,6 +1385,22 @@
if (xen_start_info.flags & SIF_INITDOMAIN)
return 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+ &gref_tx_head, &gref_tx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+ &gref_rx_head, &gref_rx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront rx using grant tables\n");
+#endif
if ((err = xennet_proc_init()) != 0)
return err;
@@ -1290,6 +1420,16 @@
return err;
}
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
+#endif
+}
+
static void vif_suspend(struct net_private *np)
{
/* Avoid having tx/rx stuff happen until we're ready. */
@@ -1482,3 +1622,4 @@
#endif
module_init(netif_init);
+module_exit(netif_exit);
diff -Nru a/xen/common/grant_table.c b/xen/common/grant_table.c
--- a/xen/common/grant_table.c 2005-06-02 15:02:46 +01:00
+++ b/xen/common/grant_table.c 2005-06-02 15:02:46 +01:00
@@ -797,6 +797,146 @@
}
#endif
+static long
+gnttab_donate(gnttab_op_t *uop, unsigned int count)
+{
+ struct domain *d = current->domain;
+ struct domain *e;
+ struct pfn_info *page;
+ u32 _d, _nd, x, y;
+ int i;
+ int result = GNTST_okay;
+
+ for (i = 0; i < count; i++) {
+ gnttab_donate_t *gop = &uop[i].u.donate;
+#if GRANT_DEBUG
+ printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+ i, gop->mfn, gop->domid, gop->handle);
+#endif
+ page = &frame_table[gop->mfn];
+
+ if (unlikely(IS_XEN_HEAP_FRAME(page))) {
+ printk("gnttab_donate: xen heap frame mfn=%08x\n", gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely(!pfn_valid(page_to_pfn(page)))) {
+ printk("gnttab_donate: invalid pfn for mfn=%08x\n", gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
+ printk("gnttab_donate: can't find domain %d\n", gop->domid);
+ gop->status = GNTST_bad_domain;
+ continue;
+ }
+
+ spin_lock(&d->page_alloc_lock);
+
+ /*
+ * The tricky bit: atomically release ownership while
+ * there is just one benign reference to the page
+ * (PGC_allocated). If that reference disappears then the
+ * deallocation routine will safely spin.
+ */
+ _d = pickle_domptr(d);
+ _nd = page->u.inuse._domain;
+ y = page->count_info;
+ do {
+ x = y;
+ if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+ (1 | PGC_allocated)) || unlikely(_nd != _d)) {
+ printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+ " caf=%08x, taf=%08x\n", page_to_pfn(page),
+ d, d->id, unpickle_domptr(_nd), x,
+ page->u.inuse.type_info);
+ spin_unlock(&d->page_alloc_lock);
+ put_domain(e);
+ return 0;
+ }
+ __asm__ __volatile__(
+ LOCK_PREFIX "cmpxchg8b %2"
+ : "=d" (_nd), "=a" (y),
+ "=m" (*(volatile u64 *)(&page->count_info))
+ : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
+ } while (unlikely(_nd != _d) || unlikely(y != x));
+
+ /*
+ * Unlink from 'd'. At least one reference remains (now
+ * anonymous), so noone else is spinning to try to delete
+ * this page from 'd'.
+ */
+ d->tot_pages--;
+ list_del(&page->list);
+
+ spin_unlock(&d->page_alloc_lock);
+
+ spin_lock(&e->page_alloc_lock);
+
+ /*
+ * Check that 'e' will accept the page and has reservation
+ * headroom. Also, a domain mustn't have PGC_allocated
+ * pages when it is dying.
+ */
+#ifdef GRANT_DEBUG
+ if (unlikely(e->tot_pages >= e->max_pages)) {
+ printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
+ e->tot_pages, e->max_pages);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(test_bit(DF_DYING, &e->d_flags))) {
+ printk("gnttab_donate: target domain is dying\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#else
+ ASSERT(e->tot_pages <= e->max_pages);
+ if (unlikely(test_bit(DF_DYING, &e->d_flags)) ||
+ unlikely(e->tot_pages == e->max_pages) ||
+ unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: Transferee has no reservation headroom
(%d,%d), or "
+ "provided a bad grant ref (%08x), or is dying (%p).\n",
+ e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#endif
+ /* Okay, add the page to 'e'. */
+ if (unlikely(e->tot_pages++ == 0)) {
+ get_knownalive_domain(e);
+ }
+ list_add_tail(&page->list, &e->page_list);
+ page_set_owner(page, e);
+
+ spin_unlock(&e->page_alloc_lock);
+
+ /*
+ * Transfer is all done: tell the guest about its new page
+ * frame.
+ */
+ gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+
+ put_domain(e);
+
+ gop->status = GNTST_okay;
+ }
+ return result;
+}
+
long
do_grant_table_op(
unsigned int cmd, void *uop, unsigned int count)
@@ -831,6 +971,13 @@
rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
break;
#endif
+ case GNTTABOP_donate:
+ if (unlikely(!array_access_ok(VERIFY_WRITE, uop, count,
+ sizeof(gnttab_op_t)))) {
+ goto out;
+ }
+ rc = gnttab_donate(uop, count);
+ break;
default:
rc = -ENOSYS;
break;
@@ -1066,6 +1213,10 @@
}
sha->frame = __mfn_to_gpfn(rd, frame);
sha->domid = rd->domain_id;
+#ifdef GRANT_DEBUG
+ printk("gnttab_notify: ref=%08x src=%08x dest=%08x mfn=%08x\n",
+ ref, frame, pfn, sha->frame);
+#endif
wmb();
sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
diff -Nru a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h 2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/grant_table.h 2005-06-02 15:02:46 +01:00
@@ -220,6 +220,19 @@
s16 status; /* 2: GNTST_* */
} PACKED gnttab_dump_table_t; /* 4 bytes */
+/*
+ * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain. The
+ * foreign domain has previously registered the details of the transfer.
+ * These can be identified from <handle>, a grant reference.
+ */
+#define GNTTABOP_donate 4
+typedef struct {
+ memory_t mfn; /* 0 */
+ domid_t domid; /* 4 */
+ u16 handle; /* 8 */
+ s16 status; /* 10: GNTST_* */
+ u32 __pad;
+} PACKED gnttab_donate_t; /* 14 bytes */
/*
* Bitfield values for update_pin_status.flags.
@@ -273,6 +286,7 @@
gnttab_unmap_grant_ref_t unmap_grant_ref;
gnttab_setup_table_t setup_table;
gnttab_dump_table_t dump_table;
+ gnttab_donate_t donate;
u8 __dummy[24];
} PACKED u;
} PACKED gnttab_op_t; /* 32 bytes */
diff -Nru a/xen/include/public/io/netif.h b/xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h 2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/io/netif.h 2005-06-02 15:02:46 +01:00
@@ -25,10 +25,13 @@
typedef struct {
u16 id; /* 0: Echoed in response message. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_ref_t gref; /* 2: Reference to incoming granted frame */
+#endif
} PACKED netif_rx_request_t; /* 2 bytes */
typedef struct {
- memory_t addr; /* 0: Machine address of packet. */
+ u32 addr; /* 0: Offset in page of start of received packet */
MEMORY_PADDING;
u16 csum_valid:1; /* Protocol checksum is validated? */
u16 id:15; /* 8: */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|