WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] netif & grant tables

To: Matt Chapman <matthewc@xxxxxx>
Subject: Re: [Xen-devel] netif & grant tables
From: Vincent Hanquez <vincent.hanquez@xxxxxxxxxxxx>
Date: Fri, 1 Jul 2005 12:08:42 +0200
Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Fri, 01 Jul 2005 10:07:35 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <20050701022854.GA32494@xxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <20050701022854.GA32494@xxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.9i
On Thu, Jun 30, 2005 at 08:28:54PM -0600, Matt Chapman wrote:
> I'm currently looking at getting domU networking working
> on IA64, and to do this I need to make netback/netfront
> use grant tables.
> 
> I'm told that there's already a patch floating around,
> can someone tell me where to find it?

Hi Matt,

Here is the patch for having grant tables with netback and netfront.

diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/Kconfig 
b/linux-2.6.11-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig  2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig  2005-06-02 15:02:46 +01:00
@@ -97,6 +97,20 @@
          dedicated device-driver domain, or your master control domain
          (domain 0), then you almost certainly want to say Y here.
 
+config XEN_NETDEV_GRANT_TX
+        bool "Grant table substrate for net drivers tx path (DANGEROUS)"
+        default y
+        help
+          This introduces the use of grant tables as a data exhange mechanism
+          between the frontend and backend network drivers.
+
+config XEN_NETDEV_GRANT_RX
+        bool "Grant table substrate for net drivers rx path (DANGEROUS)"
+        default y
+        help
+          This introduces the use of grant tables as a data exhange mechanism
+          between the frontend and backend network drivers.
+
 config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
        bool "Pipelined transmitter (DANGEROUS)"
        depends on XEN_NETDEV_FRONTEND
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 
b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c  2005-06-02 15:02:46 
+01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c  2005-06-02 15:02:46 
+01:00
@@ -165,8 +165,14 @@
     u16           flags;
 
     flags = shared[ref].flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    /*
+     * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
+     * if gnttab_donate executes without interruption???
+     */
+#else
     ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-
+#endif
     /*
      * If a transfer is committed then wait for the frame address to appear.
      * Otherwise invalidate the grant entry against future use.
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h 
b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h      2005-06-02 
15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h      2005-06-02 
15:02:46 +01:00
@@ -50,6 +50,9 @@
     /* Private indexes into shared ring. */
     NETIF_RING_IDX rx_req_cons;
     NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
+#endif
     NETIF_RING_IDX tx_req_cons;
     NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
 
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c 
b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c     2005-06-02 
15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c     2005-06-02 
15:02:46 +01:00
@@ -18,6 +18,24 @@
 #include <linux/delay.h>
 #endif
 
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, unsigned char *p)
+{
+       int i;
+
+       printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
+       for (i = 0; i < 20; i++) {
+               printk("%02x", p[i]);
+       }
+       printk("\n");
+}
+#endif
+#endif
+
 static void netif_idx_release(u16 pending_idx);
 static void netif_page_release(struct page *page);
 static void make_tx_response(netif_t *netif, 
@@ -41,7 +59,9 @@
 static struct sk_buff_head rx_queue;
 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
 static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
+#endif
 static unsigned char rx_notify[NR_EVENT_CHANNELS];
 
 /* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -68,7 +88,21 @@
 static PEND_RING_IDX dealloc_prod, dealloc_cons;
 
 static struct sk_buff_head tx_queue;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static gnttab_op_t grant_tx_op[MAX_PENDING_REQS];
+static u16 grant_tx_ref[MAX_PENDING_REQS];
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_op_t grant_rx_op[MAX_PENDING_REQS];
+#endif
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
 static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
 
 static struct list_head net_schedule_list;
 static spinlock_t net_schedule_list_lock;
@@ -91,6 +125,7 @@
     return mfn;
 }
 
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
 static void free_mfn(unsigned long mfn)
 {
     unsigned long flags;
@@ -102,6 +137,7 @@
         BUG();
     spin_unlock_irqrestore(&mfn_lock, flags);
 }
+#endif
 
 static inline void maybe_schedule_tx_action(void)
 {
@@ -160,7 +196,17 @@
         dev_kfree_skb(skb);
         skb = nskb;
     }
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef DEBUG_GRANT
+    printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x 
gr=%04x\n",
+           netif->rx->req_prod,
+           netif->rx_req_cons,
+           netif->rx->ring[
+                  MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
+           netif->rx->ring[
+                  MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
+#endif
+#endif
     netif->rx_req_cons++;
     netif_get(netif);
 
@@ -201,7 +247,11 @@
     u16 size, id, evtchn;
     multicall_entry_t *mcl;
     mmu_update_t *mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gnttab_op_t *gop;
+#else
     struct mmuext_op *mmuext;
+#endif
     unsigned long vdata, mdata, new_mfn;
     struct sk_buff_head rxq;
     struct sk_buff *skb;
@@ -212,7 +262,12 @@
 
     mcl = rx_mcl;
     mmu = rx_mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gop = grant_rx_op;
+#else
     mmuext = rx_mmuext;
+#endif
+
     while ( (skb = skb_dequeue(&rx_queue)) != NULL )
     {
         netif   = netdev_priv(skb->dev);
@@ -228,7 +283,6 @@
             skb_queue_head(&rx_queue, skb);
             break;
         }
-
         /*
          * Set the new P2M table entry before reassigning the old data page.
          * Heed the comment in pgtable-2level.h:pte_page(). :-)
@@ -241,6 +295,14 @@
         mcl->args[2] = 0;
         mcl++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        gop->u.donate.mfn = mdata >> PAGE_SHIFT;
+        gop->u.donate.domid = netif->domid;
+        gop->u.donate.handle = netif->rx->ring[
+        MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+        netif->rx_resp_prod_copy++;
+        gop++;
+#else
         mcl->op = __HYPERVISOR_mmuext_op;
         mcl->args[0] = (unsigned long)mmuext;
         mcl->args[1] = 1;
@@ -251,13 +313,16 @@
         mmuext->cmd = MMUEXT_REASSIGN_PAGE;
         mmuext->mfn = mdata >> PAGE_SHIFT;
         mmuext++;
-
+#endif
         mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
         mmu->val = __pa(vdata) >> PAGE_SHIFT;  
         mmu++;
 
         __skb_queue_tail(&rxq, skb);
 
+#ifdef DEBUG_GRANT
+        dump_packet('a', mdata, vdata);
+#endif
         /* Filled the batch queue? */
         if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
             break;
@@ -273,12 +338,24 @@
     mcl->args[3] = DOMID_SELF;
     mcl++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    mcl[-2].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#else
     mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#endif
     if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
         BUG();
 
     mcl = rx_mcl;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
+                                           grant_rx_op, gop - grant_rx_op))) {
+        BUG();
+    }
+    gop = grant_rx_op;
+#else
     mmuext = rx_mmuext;
+#endif
     while ( (skb = __skb_dequeue(&rxq)) != NULL )
     {
         netif   = netdev_priv(skb->dev);
@@ -286,9 +363,12 @@
 
         /* Rederive the machine addresses. */
         new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mdata = (unsigned long)skb->data & ~PAGE_MASK;
+#else
         mdata   = ((mmuext[0].mfn << PAGE_SHIFT) |
                    ((unsigned long)skb->data & ~PAGE_MASK));
-        
+#endif
         atomic_set(&(skb_shinfo(skb)->dataref), 1);
         skb_shinfo(skb)->nr_frags = 0;
         skb_shinfo(skb)->frag_list = NULL;
@@ -302,13 +382,18 @@
 
         /* Check the reassignment error code. */
         status = NETIF_RSP_OKAY;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        if (unlikely(gop->u.donate.status != 0)) {
+            BUG();
+        }
+#else
         if ( unlikely(mcl[1].result != 0) )
         {
             DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
             free_mfn(mdata >> PAGE_SHIFT);
             status = NETIF_RSP_ERROR;
         }
-
+#endif
         evtchn = netif->evtchn;
         id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
         if ( make_rx_response(netif, id, status, mdata,
@@ -321,9 +406,13 @@
 
         netif_put(netif);
         dev_kfree_skb(skb);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mcl++;
+        gop++;
+#else
         mcl += 2;
         mmuext += 1;
+#endif
     }
 
     while ( notify_nr != 0 )
@@ -407,6 +496,7 @@
     netif_schedule_work(netif);
 }
 
+/* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
     struct list_head *ent;
@@ -415,13 +505,36 @@
     netif_tx_request_t txreq;
     u16 pending_idx;
     NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    gnttab_op_t *gop;
+#else
     multicall_entry_t *mcl;
+#endif
     PEND_RING_IDX dc, dp;
     unsigned int data_len;
 
+
     if ( (dc = dealloc_cons) == (dp = dealloc_prod) )
         goto skip_dealloc;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    /*
+     * Free up any grants we have finished using
+     */
+    gop = grant_tx_op;
+    while (dc != dp) {
+        pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+        gop->u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+        gop->u.unmap_grant_ref.dev_bus_addr = 0;
+        gop->u.unmap_grant_ref.handle = grant_tx_ref[pending_idx];
+        grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+        gop++;
+    }
+    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+                                           grant_tx_op, gop - grant_tx_op))) {
+        BUG();
+    }
+#else
     mcl = tx_mcl;
     while ( dc != dp )
     {
@@ -438,11 +551,14 @@
         BUG();
 
     mcl = tx_mcl;
+#endif
     while ( dealloc_cons != dp )
     {
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
         /* The update_va_mapping() must not fail. */
         if ( unlikely(mcl[0].result != 0) )
             BUG();
+#endif
 
         pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
 
@@ -466,11 +582,17 @@
         
         netif_put(netif);
 
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
         mcl++;
+#endif
     }
 
  skip_dealloc:
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    gop = grant_tx_op;
+#else
     mcl = tx_mcl;
+#endif
     while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
             !list_empty(&net_schedule_list) )
     {
@@ -492,7 +614,6 @@
         rmb(); /* Ensure that we see the request before we copy it. */
         memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, 
                sizeof(txreq));
-
         /* Credit-based scheduling. */
         if ( txreq.size > netif->remaining_credit )
         {
@@ -572,13 +693,20 @@
 
         /* Packets passed to netif_rx() must have some headroom. */
         skb_reserve(skb, 16);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        gop->u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+        gop->u.map_grant_ref.dom = netif->domid;
+        gop->u.map_grant_ref.ref = txreq.addr >> PAGE_SHIFT;
+        gop->u.map_grant_ref.flags = GNTMAP_host_map | GNTMAP_readonly;
+        gop++;
+#else
         mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain;
         mcl[0].args[0] = MMAP_VADDR(pending_idx);
         mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL;
         mcl[0].args[2] = 0;
         mcl[0].args[3] = netif->domid;
         mcl++;
+#endif
 
         memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
         pending_tx_info[pending_idx].netif = netif;
@@ -588,11 +716,26 @@
 
         pending_cons++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        if ((gop - grant_tx_op) >= ARRAY_SIZE(grant_tx_op))
+            break;
+#else
         /* Filled the batch queue? */
         if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
             break;
+#endif
     }
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    if (gop == grant_tx_op) {
+        return;
+    }
+    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+                                           grant_tx_op, gop - grant_tx_op))) {
+        BUG();
+    }
+    gop = grant_tx_op;
+#else
     if ( mcl == tx_mcl )
         return;
 
@@ -600,6 +743,7 @@
         BUG();
 
     mcl = tx_mcl;
+#endif
     while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
     {
         pending_idx = *((u16 *)skb->data);
@@ -607,6 +751,20 @@
         memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
 
         /* Check the remap error code. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        if (unlikely(gop->u.map_grant_ref.dev_bus_addr == 0)) {
+            printk(KERN_ALERT "#### netback grant fails\n");
+            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+            netif_put(netif);
+            kfree_skb(skb);
+            gop++;
+            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+            continue;
+        }
+        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+                             FOREIGN_FRAME(gop->u.map_grant_ref.dev_bus_addr);
+        grant_tx_ref[pending_idx] = gop->u.map_grant_ref.handle;
+#else
         if ( unlikely(mcl[0].result != 0) )
         {
             DPRINTK("Bad page frame\n");
@@ -620,6 +778,7 @@
 
         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
             FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
+#endif
 
         data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
 
@@ -627,7 +786,6 @@
         memcpy(skb->data, 
                (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
                data_len);
-
         if ( data_len < txreq.size )
         {
             /* Append the packet payload as a fragment. */
@@ -661,7 +819,11 @@
         netif_rx(skb);
         netif->dev->last_rx = jiffies;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        gop++;
+#else
         mcl++;
+#endif
     }
 }
 
@@ -781,6 +943,12 @@
         return 0;
 
     printk("Initialising Xen netif backend\n");
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    printk("#### netback tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    printk("#### netback rx using grant tables\n");
+#endif
 
     /* We can increase reservation by this much in net_rx_action(). */
     balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 
b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c   2005-06-02 
15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c   2005-06-02 
15:02:46 +01:00
@@ -54,6 +54,25 @@
 #include <asm/page.h>
 #include <asm/uaccess.h>
 
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, u32 ap)
+{
+    unsigned char *p = (unsigned char *)ap;
+    int i;
+    
+    printk(KERN_ALERT "#### rx_poll   %c %08x ", tag & 0xff, addr);
+    for (i = 0; i < 20; i++) {
+        printk("%02x", p[i]);
+    }
+    printk("\n");
+}
+#endif
+#endif
+
 #ifndef __GFP_NOWARN
 #define __GFP_NOWARN 0
 #endif
@@ -82,6 +101,21 @@
 #define TX_TEST_IDX req_cons  /* conservative: not seen all our requests? */
 #endif
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+static domid_t rdomid = 0;
+#define GRANT_INVALID_REF      (0xFFFF)
+#endif
+
 static void network_tx_buf_gc(struct net_device *dev);
 static void network_alloc_rx_buffers(struct net_device *dev);
 
@@ -322,6 +356,14 @@
         for (i = np->tx_resp_cons; i != prod; i++) {
             id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
             skb = np->tx_skbs[id];
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+            if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
+                printk(KERN_ALERT "netfront: query foreign access\n");
+            }
+            gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+            gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
+            grant_tx_ref[id] = GRANT_INVALID_REF;
+#endif
             ADD_ID_TO_FREELIST(np->tx_skbs, id);
             dev_kfree_skb_irq(skb);
         }
@@ -356,6 +398,9 @@
     struct sk_buff *skb;
     int i, batch_target;
     NETIF_RING_IDX req_prod = np->rx->req_prod;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    int ref;
+#endif
 
     if (unlikely(np->backend_state != BEST_CONNECTED))
         return;
@@ -388,7 +433,16 @@
         np->rx_skbs[id] = skb;
         
         np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-        
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        if ((ref = gnttab_claim_grant_reference(&gref_rx_head, 
gref_rx_terminal)) < 0) {
+            printk(KERN_ALERT "#### netfront can't claim rx reference\n");
+            BUG();
+        }
+        grant_rx_ref[id] = ref;
+        gnttab_grant_foreign_transfer_ref(ref, rdomid,
+        virt_to_machine(skb->head) >> PAGE_SHIFT);
+        np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+#endif
         rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
 
        /* Remove this page from pseudo phys map before passing back to Xen. */
@@ -438,6 +492,10 @@
     struct net_private *np = netdev_priv(dev);
     netif_tx_request_t *tx;
     NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    unsigned int ref;
+    unsigned long mfn;
+#endif
 
     if (unlikely(np->tx_full)) {
         printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
@@ -472,7 +530,18 @@
     tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
 
     tx->id   = id;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal)) 
< 0) {
+        printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
+        BUG();
+    }
+    mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+    gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
+    tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+    grant_tx_ref[id] = ref;
+#else
     tx->addr = virt_to_machine(skb->data);
+#endif
     tx->size = skb->len;
     tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
 
@@ -532,6 +601,10 @@
     int work_done, budget, more_to_do = 1;
     struct sk_buff_head rxq;
     unsigned long flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    unsigned long mfn;
+    grant_ref_t ref;
+#endif
 
     spin_lock(&np->rx_lock);
 
@@ -544,7 +617,6 @@
 
     if ((budget = *pbudget) > dev->quota)
         budget = dev->quota;
-
     rp = np->rx->resp_prod;
     rmb(); /* Ensure we see queued responses up to 'rp'. */
 
@@ -552,7 +624,6 @@
                    (i != rp) && (work_done < budget);
                    i++, work_done++) {
         rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-
         /*
          * An error here is very odd. Usually indicates a backend bug,
          * low-memory condition, or that we didn't have reservation headroom.
@@ -567,11 +638,23 @@
             continue;
         }
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        ref = grant_rx_ref[rx->id];
+        grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+
+        mfn = gnttab_end_foreign_transfer(ref);
+        gnttab_release_grant_reference(&gref_rx_head, ref);
+#endif
+
         skb = np->rx_skbs[rx->id];
         ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
 
         /* NB. We handle skb overflow later. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        skb->data = skb->head + rx->addr;
+#else
         skb->data = skb->head + (rx->addr & ~PAGE_MASK);
+#endif
         skb->len  = rx->status;
         skb->tail = skb->data + skb->len;
 
@@ -582,18 +665,33 @@
         np->stats.rx_bytes += rx->status;
 
         /* Remap the page. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
+#else
         mmu->ptr  = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+#endif
         mmu->val  = __pa(skb->head) >> PAGE_SHIFT;
         mmu++;
         mcl->op = __HYPERVISOR_update_va_mapping;
         mcl->args[0] = (unsigned long)skb->head;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mcl->args[1] = (mfn << PAGE_SHIFT) | __PAGE_KERNEL;
+#else
         mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
+#endif
         mcl->args[2] = 0;
         mcl++;
 
         phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+            mfn;
+#else
             rx->addr >> PAGE_SHIFT;
-
+#endif
+#ifdef GRANT_DEBUG
+        printk(KERN_ALERT "#### rx_poll     enqueue vdata=%08x mfn=%08x 
ref=%04x\n",
+               skb->data, mfn, ref);
+#endif
         __skb_queue_tail(&rxq, skb);
     }
 
@@ -612,6 +710,11 @@
     }
 
     while ((skb = __skb_dequeue(&rxq)) != NULL) {
+#ifdef GRANT_DEBUG
+         printk(KERN_ALERT "#### rx_poll     dequeue vdata=%08x mfn=%08x\n",
+                skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+         dump_packet('d', skb->data, (unsigned long)skb->data);
+#endif
         /*
          * Enough room in skbuff for the data we were passed? Also, Linux 
          * expects at least 16 bytes headroom in each receive buffer.
@@ -620,6 +723,7 @@
                        unlikely((skb->data - skb->head) < 16)) {
             nskb = NULL;
 
+
             /* Only copy the packet if it fits in the current MTU. */
             if (skb->len <= (dev->mtu + ETH_HLEN)) {
                 if ((skb->tail > skb->end) && net_ratelimit())
@@ -650,7 +754,6 @@
         
         /* Set the shared-info area, which is hidden behind the real data. */
         init_skb_shinfo(skb);
-
         /* Ethernet-specific work. Delayed to here as it peeks the header. */
         skb->protocol = eth_type_trans(skb, dev);
 
@@ -923,6 +1026,9 @@
     network_connect(dev, status);
     np->evtchn = status->evtchn;
     np->irq = bind_evtchn_to_irq(np->evtchn);
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+    rdomid = status->domid;
+#endif
     (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
     netctrl_connected_count();
     (void)send_fake_arp(dev);
@@ -966,10 +1072,18 @@
     np->rx_max_target = RX_MAX_TARGET;
 
     /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
-    for (i = 0; i <= NETIF_TX_RING_SIZE; i++)
+    for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
         np->tx_skbs[i] = (void *)(i+1);
-    for (i = 0; i <= NETIF_RX_RING_SIZE; i++)
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        grant_tx_ref[i] = GRANT_INVALID_REF;
+#endif
+    }
+    for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
         np->rx_skbs[i] = (void *)(i+1);
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        grant_rx_ref[i] = GRANT_INVALID_REF;
+#endif
+    }
 
     dev->open            = network_open;
     dev->hard_start_xmit = network_start_xmit;
@@ -1271,6 +1385,22 @@
 
     if (xen_start_info.flags & SIF_INITDOMAIN)
         return 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+                                      &gref_tx_head, &gref_tx_terminal) < 0) {
+        printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+        return 1;
+    }
+    printk(KERN_ALERT "#### netfront tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+                                      &gref_rx_head, &gref_rx_terminal) < 0) {
+        printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+        return 1;
+    }
+    printk(KERN_ALERT "#### netfront rx using grant tables\n");
+#endif
 
     if ((err = xennet_proc_init()) != 0)
         return err;
@@ -1290,6 +1420,16 @@
     return err;
 }
 
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
+#endif
+}
+
 static void vif_suspend(struct net_private *np)
 {
     /* Avoid having tx/rx stuff happen until we're ready. */
@@ -1482,3 +1622,4 @@
 #endif
 
 module_init(netif_init);
+module_exit(netif_exit);
diff -Nru a/xen/common/grant_table.c b/xen/common/grant_table.c
--- a/xen/common/grant_table.c  2005-06-02 15:02:46 +01:00
+++ b/xen/common/grant_table.c  2005-06-02 15:02:46 +01:00
@@ -797,6 +797,146 @@
 }
 #endif
 
+static long
+gnttab_donate(gnttab_op_t *uop, unsigned int count)
+{
+    struct domain *d = current->domain;
+    struct domain *e;
+    struct pfn_info *page;
+    u32 _d, _nd, x, y;
+    int i;
+    int result = GNTST_okay;
+
+    for (i = 0; i < count; i++) {
+        gnttab_donate_t *gop = &uop[i].u.donate;
+#if GRANT_DEBUG
+        printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+               i, gop->mfn, gop->domid, gop->handle);
+#endif
+        page = &frame_table[gop->mfn];
+
+        if (unlikely(IS_XEN_HEAP_FRAME(page))) { 
+            printk("gnttab_donate: xen heap frame mfn=%08x\n", gop->mfn);
+            gop->status = GNTST_bad_virt_addr;
+            continue;
+        }
+        if (unlikely(!pfn_valid(page_to_pfn(page)))) {
+            printk("gnttab_donate: invalid pfn for mfn=%08x\n", gop->mfn);
+            gop->status = GNTST_bad_virt_addr;
+            continue;
+        }
+        if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
+            printk("gnttab_donate: can't find domain %d\n", gop->domid);
+            gop->status = GNTST_bad_domain;
+            continue;
+        }
+
+        spin_lock(&d->page_alloc_lock);
+
+        /*
+         * The tricky bit: atomically release ownership while
+         * there is just one benign reference to the page
+         * (PGC_allocated). If that reference disappears then the
+         * deallocation routine will safely spin.
+         */
+        _d  = pickle_domptr(d);
+        _nd = page->u.inuse._domain;
+        y   = page->count_info;
+        do {
+            x = y;
+            if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+                         (1 | PGC_allocated)) || unlikely(_nd != _d)) {
+                printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
+                        d, d->id, unpickle_domptr(_nd), x, 
+                        page->u.inuse.type_info);
+                spin_unlock(&d->page_alloc_lock);
+                put_domain(e);
+                return 0;
+            }
+            __asm__ __volatile__(
+                LOCK_PREFIX "cmpxchg8b %2"
+                : "=d" (_nd), "=a" (y),
+                "=m" (*(volatile u64 *)(&page->count_info))
+                : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
+        } while (unlikely(_nd != _d) || unlikely(y != x));
+
+        /*
+         * Unlink from 'd'. At least one reference remains (now
+         * anonymous), so noone else is spinning to try to delete
+         * this page from 'd'.
+         */
+        d->tot_pages--;
+        list_del(&page->list);
+
+        spin_unlock(&d->page_alloc_lock);
+
+        spin_lock(&e->page_alloc_lock);
+
+        /*
+         * Check that 'e' will accept the page and has reservation
+         * headroom.  Also, a domain mustn't have PGC_allocated
+         * pages when it is dying.
+         */
+#ifdef GRANT_DEBUG
+        if (unlikely(e->tot_pages >= e->max_pages)) {
+            printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
+                   e->tot_pages, e->max_pages);
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+        if (unlikely(test_bit(DF_DYING, &e->d_flags))) {
+            printk("gnttab_donate: target domain is dying\n");
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+        if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+            printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+#else
+        ASSERT(e->tot_pages <= e->max_pages);
+        if (unlikely(test_bit(DF_DYING, &e->d_flags)) ||
+            unlikely(e->tot_pages == e->max_pages) ||
+            unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+            printk("gnttab_donate: Transferee has no reservation headroom 
(%d,%d), or "
+                    "provided a bad grant ref (%08x), or is dying (%p).\n",
+                    e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+#endif
+        /* Okay, add the page to 'e'. */
+        if (unlikely(e->tot_pages++ == 0)) {
+            get_knownalive_domain(e);
+        }
+        list_add_tail(&page->list, &e->page_list);
+        page_set_owner(page, e);
+
+        spin_unlock(&e->page_alloc_lock);
+
+        /*
+         * Transfer is all done: tell the guest about its new page
+         * frame.
+         */
+        gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+        
+        put_domain(e);
+
+        gop->status = GNTST_okay;
+    }
+    return result;
+}
+
 long 
 do_grant_table_op(
     unsigned int cmd, void *uop, unsigned int count)
@@ -831,6 +971,13 @@
         rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
         break;
 #endif
+    case GNTTABOP_donate:
+        if (unlikely(!array_access_ok(VERIFY_WRITE, uop, count,
+                                      sizeof(gnttab_op_t)))) {
+            goto out;
+        }
+        rc = gnttab_donate(uop, count);
+        break;
     default:
         rc = -ENOSYS;
         break;
@@ -1066,6 +1213,10 @@
     }
     sha->frame = __mfn_to_gpfn(rd, frame);
     sha->domid = rd->domain_id;
+#ifdef GRANT_DEBUG
+    printk("gnttab_notify: ref=%08x src=%08x dest=%08x mfn=%08x\n",
+           ref, frame, pfn, sha->frame);
+#endif
     wmb();
     sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
 
diff -Nru a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h  2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/grant_table.h  2005-06-02 15:02:46 +01:00
@@ -220,6 +220,19 @@
     s16         status;               /* 2: GNTST_* */
 } PACKED gnttab_dump_table_t; /* 4 bytes */
 
+/*
+ * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain.  The
+ * foreign domain has previously registered the details of the transfer.
+ * These can be identified from <handle>, a grant reference.
+ */
+#define GNTTABOP_donate                4
+typedef struct {
+    memory_t    mfn;                 /*  0 */
+    domid_t     domid;               /*  4 */
+    u16         handle;               /*  8 */
+    s16         status;               /*  10: GNTST_* */
+    u32         __pad;
+} PACKED gnttab_donate_t;            /*  14 bytes */
 
 /*
  * Bitfield values for update_pin_status.flags.
@@ -273,6 +286,7 @@
         gnttab_unmap_grant_ref_t  unmap_grant_ref;
         gnttab_setup_table_t      setup_table;
         gnttab_dump_table_t       dump_table;
+        gnttab_donate_t           donate;
         u8                        __dummy[24];
     } PACKED u;
 } PACKED gnttab_op_t; /* 32 bytes */
diff -Nru a/xen/include/public/io/netif.h b/xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/io/netif.h     2005-06-02 15:02:46 +01:00
@@ -25,10 +25,13 @@
 
 typedef struct {
     u16       id;    /*  0: Echoed in response message.        */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    grant_ref_t gref;  /* 2: Reference to incoming granted frame */
+#endif
 } PACKED netif_rx_request_t; /* 2 bytes */
 
 typedef struct {
-    memory_t addr;   /*  0: Machine address of packet.              */
+    u32      addr;   /*  0: Offset in page of start of received packet  */
     MEMORY_PADDING;
     u16      csum_valid:1; /* Protocol checksum is validated?       */
     u16      id:15;  /*  8:  */


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel