WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] RFC: [2/2] [NET] back: Add lazy copying

To: Keir Fraser <keir@xxxxxxxxxxxxx>, Xen Development Mailing List <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] RFC: [2/2] [NET] back: Add lazy copying
From: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 20 Mar 2007 15:56:57 +1100
Delivery-date: Mon, 19 Mar 2007 21:55:59 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
In-reply-to: <20070320044625.GA17463@xxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <20070320044625.GA17463@xxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.9i
Hi:

[NET] back: Add lazy copying

This patch adds lazy copying using the new unmap_and_replace grant
table operation.

We keep a list of pending entries sorted by arrival order.  We'll
process this list every time net_tx_action is invoked.  We ensure
that net_tx_action is invoked within one second of the arrival of
the first packet in the list.

When we process the list any entry that has been around for more
than half a second is copied.  This allows up to free the grant
table entry and return it to domU.

If the new grant table operation is not available (e.g., old HV
or architectures that don't support it yet) we simply copy each
packet as we receive them using skb_linearize.  We also disable
SG/TSO if this is the case.

By default the new code is disabled.  In order to enable it,
the module needs to be loaded with the argument copy_skb=1.

Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> 

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@xxxxxxxxxxxxxxxxxxx>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff -r 3ac19fda0bc2 linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Mar 02 12:11:52 
2007 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Tue Mar 20 14:08:40 
2007 +1100
@@ -102,6 +102,14 @@ typedef struct netif_st {
        wait_queue_head_t waiting_to_free;
 } netif_t;
 
+enum {
+       NETBK_DONT_COPY_SKB,
+       NETBK_DELAYED_COPY_SKB,
+       NETBK_ALWAYS_COPY_SKB,
+};
+
+extern int netbk_copy_skb_mode;
+
 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
diff -r 3ac19fda0bc2 linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Fri Mar 02 
12:11:52 2007 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Tue Mar 20 
14:08:40 2007 +1100
@@ -46,6 +46,11 @@ struct netbk_rx_meta {
        int copy:1;
 };
 
+struct netbk_tx_pending_inuse {
+       struct list_head list;
+       unsigned long alloc_time;
+};
+
 static void netif_idx_release(u16 pending_idx);
 static void netif_page_release(struct page *page);
 static void make_tx_response(netif_t *netif, 
@@ -65,6 +70,7 @@ static DECLARE_TASKLET(net_rx_tasklet, n
 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
 
 static struct timer_list net_timer;
+static struct timer_list netbk_tx_pending_timer;
 
 #define MAX_PENDING_REQS 256
 
@@ -92,6 +98,10 @@ static u16 dealloc_ring[MAX_PENDING_REQS
 static u16 dealloc_ring[MAX_PENDING_REQS];
 static PEND_RING_IDX dealloc_prod, dealloc_cons;
 
+/* Doubly-linked list of in-use pending entries. */
+static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+static LIST_HEAD(pending_inuse_head);
+
 static struct sk_buff_head tx_queue;
 
 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
@@ -104,6 +114,13 @@ static spinlock_t net_schedule_list_lock
 #define MAX_MFN_ALLOC 64
 static unsigned long mfn_list[MAX_MFN_ALLOC];
 static unsigned int alloc_index = 0;
+
+/* Setting this allows the safe use of this driver without netloop. */
+static int MODPARM_copy_skb;
+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+
+int netbk_copy_skb_mode;
 
 static inline unsigned long alloc_mfn(void)
 {
@@ -710,6 +727,11 @@ static void net_alarm(unsigned long unus
        tasklet_schedule(&net_rx_tasklet);
 }
 
+static void netbk_tx_pending_timeout(unsigned long unused)
+{
+       tasklet_schedule(&net_tx_tasklet);
+}
+
 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
 {
        netif_t *netif = netdev_priv(dev);
@@ -803,46 +825,140 @@ static void tx_credit_callback(unsigned 
        netif_schedule_work(netif);
 }
 
+/* Perform a delayed copy.  This is slow-path only. */
+static int copy_pending_req(PEND_RING_IDX pending_idx)
+{
+       struct gnttab_unmap_and_replace unmap;
+       mmu_update_t mmu;
+       struct page *page;
+       struct page *new_page;
+       void *new_addr;
+       void *addr;
+       unsigned long pfn;
+       unsigned long new_mfn;
+       int err;
+
+       page = mmap_pages[pending_idx];
+       if (!get_page_unless_zero(page))
+               return -ENOENT;
+
+       new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+       if (!new_page)
+               return -ENOMEM;
+
+       new_addr = page_address(new_page);
+       addr = page_address(page);
+       memcpy(new_addr, addr, PAGE_SIZE);
+
+       pfn = page_to_pfn(page);
+       new_mfn = virt_to_mfn(new_addr);
+
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               set_phys_to_machine(pfn, new_mfn);
+               set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
+       }
+
+       gnttab_set_replace_op(&unmap, (unsigned long)addr,
+                             (unsigned long)new_addr,
+                             grant_tx_handle[pending_idx]);
+
+       err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+                                       &unmap, 1);
+       BUG_ON(err);
+       BUG_ON(unmap.status);
+
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               mmu.ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+                         MMU_MACHPHYS_UPDATE;
+               mmu.val = pfn;
+               err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
+               BUG_ON(err);
+       }
+
+       ClearPageForeign(page);
+       put_page(page);
+
+       SetPageForeign(new_page, netif_page_release);
+       new_page->index = pending_idx;
+       mmap_pages[pending_idx] = new_page;
+
+       return 0;
+}
+
 inline static void net_tx_action_dealloc(void)
 {
+       struct netbk_tx_pending_inuse *inuse, *n;
        gnttab_unmap_grant_ref_t *gop;
        u16 pending_idx;
        PEND_RING_IDX dc, dp;
        netif_t *netif;
        int ret;
+       LIST_HEAD(list);
 
        dc = dealloc_cons;
-       dp = dealloc_prod;
-
-       /* Ensure we see all indexes enqueued by netif_idx_release(). */
-       smp_rmb();
+       gop = tx_unmap_ops;
 
        /*
         * Free up any grants we have finished using
         */
-       gop = tx_unmap_ops;
-       while (dc != dp) {
-               pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-               gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
-                                   GNTMAP_host_map,
-                                   grant_tx_handle[pending_idx]);
-               gop++;
-       }
+       do {
+               dp = dealloc_prod;
+
+               /* Ensure we see all indices enqueued by netif_idx_release(). */
+               smp_rmb();
+
+               while (dc != dp) {
+                       pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+                       list_move_tail(&pending_inuse[pending_idx].list, &list);
+                       gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
+                                           GNTMAP_host_map,
+                                           grant_tx_handle[pending_idx]);
+                       gop++;
+               }
+
+               if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
+                   list_empty(&pending_inuse_head))
+                       break;
+
+               /* Copy any entries that have been pending for too long. */
+               list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
+                       if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+                               break;
+
+                       switch (copy_pending_req(inuse - pending_inuse)) {
+                       case 0:
+                               list_move_tail(&inuse->list, &list);
+                               /* fall through */
+                       case -ENOENT:
+                               continue;
+                       }
+
+                       break;
+               }
+       } while (dp != dealloc_prod);
+
+       dealloc_cons = dc;
+
        ret = HYPERVISOR_grant_table_op(
                GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
        BUG_ON(ret);
 
-       while (dealloc_cons != dp) {
-               pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+       list_for_each_entry_safe(inuse, n, &list, list) {
+               pending_idx = inuse - pending_inuse;
 
                netif = pending_tx_info[pending_idx].netif;
 
                make_tx_response(netif, &pending_tx_info[pending_idx].req, 
                                 NETIF_RSP_OKAY);
 
+               /* Ready for next use. */
+               init_page_count(mmap_pages[pending_idx]);
+
                pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 
                netif_put(netif);
+
+               list_del_init(&inuse->list);
        }
 }
 
@@ -1014,6 +1130,11 @@ static void netbk_fill_frags(struct sk_b
                unsigned long pending_idx;
 
                pending_idx = (unsigned long)frag->page;
+
+               pending_inuse[pending_idx].alloc_time = jiffies;
+               list_add_tail(&pending_inuse[pending_idx].list,
+                             &pending_inuse_head);
+
                txp = &pending_tx_info[pending_idx].req;
                frag->page = virt_to_page(idx_to_kaddr(pending_idx));
                frag->size = txp->size;
@@ -1302,8 +1423,24 @@ static void net_tx_action(unsigned long 
                netif->stats.rx_bytes += skb->len;
                netif->stats.rx_packets++;
 
+               if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+                   unlikely(skb_linearize(skb))) {
+                       DPRINTK("Can't linearize skb in net_tx_action.\n");
+                       kfree_skb(skb);
+                       continue;
+               }
+
                netif_rx(skb);
                netif->dev->last_rx = jiffies;
+       }
+
+       if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+           !list_empty(&pending_inuse_head)) {
+               struct netbk_tx_pending_inuse *oldest;
+
+               oldest = list_entry(pending_inuse_head.next,
+                                   struct netbk_tx_pending_inuse, list);
+               mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
        }
 }
 
@@ -1324,9 +1461,6 @@ static void netif_idx_release(u16 pendin
 
 static void netif_page_release(struct page *page)
 {
-       /* Ready for next use. */
-       init_page_count(page);
-
        netif_idx_release(page->index);
 }
 
@@ -1448,6 +1582,10 @@ static int __init netback_init(void)
        net_timer.data = 0;
        net_timer.function = net_alarm;
 
+       init_timer(&netbk_tx_pending_timer);
+       netbk_tx_pending_timer.data = 0;
+       netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
+
        mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
        if (mmap_pages == NULL) {
                printk("%s: out of memory\n", __FUNCTION__);
@@ -1458,6 +1596,7 @@ static int __init netback_init(void)
                page = mmap_pages[i];
                SetPageForeign(page, netif_page_release);
                page->index = i;
+               INIT_LIST_HEAD(&pending_inuse[i].list);
        }
 
        pending_cons = 0;
@@ -1467,6 +1606,15 @@ static int __init netback_init(void)
 
        spin_lock_init(&net_schedule_list_lock);
        INIT_LIST_HEAD(&net_schedule_list);
+
+       netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+       if (MODPARM_copy_skb) {
+               if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+                                             NULL, 0))
+                       netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
+               else
+                       netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+       }
 
        netif_xenbus_init();
 
diff -r 3ac19fda0bc2 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Mar 02 12:11:52 
2007 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue Mar 20 14:08:40 
2007 +1100
@@ -62,6 +62,7 @@ static int netback_probe(struct xenbus_d
        const char *message;
        struct xenbus_transaction xbt;
        int err;
+       int sg;
        struct backend_info *be = kzalloc(sizeof(struct backend_info),
                                          GFP_KERNEL);
        if (!be) {
@@ -73,6 +74,10 @@ static int netback_probe(struct xenbus_d
        be->dev = dev;
        dev->dev.driver_data = be;
 
+       sg = 1;
+       if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+               sg = 0;
+
        do {
                err = xenbus_transaction_start(&xbt);
                if (err) {
@@ -80,14 +85,14 @@ static int netback_probe(struct xenbus_d
                        goto fail;
                }
 
-               err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+               err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
                if (err) {
                        message = "writing feature-sg";
                        goto abort_transaction;
                }
 
                err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
-                                   "%d", 1);
+                                   "%d", sg);
                if (err) {
                        message = "writing feature-gso-tcpv4";
                        goto abort_transaction;

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel