Hi:
[NET] back: Transmit SG packets if supported
This patch adds scatter-and-gather transmission support to the backend.
This allows the MTU to be raised right now and the potential for TSO in
future.
Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx>
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@xxxxxxxxxxxxxxxxxxx>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff -r 05d6393e9d18 -r 4f008474675a
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Jul 28 18:20:45
2006 +1000
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Jul 28 18:22:22
2006 +1000
@@ -129,4 +129,10 @@ static inline int netbk_can_queue(struct
return netif->can_queue;
}
+static inline int netbk_can_sg(struct net_device *dev)
+{
+ netif_t *netif = netdev_priv(dev);
+ return netif->features & NETIF_F_SG;
+}
+
#endif /* __NETIF__BACKEND__COMMON_H__ */
diff -r 05d6393e9d18 -r 4f008474675a
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Fri Jul 28
18:20:45 2006 +1000
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Fri Jul 28
18:22:22 2006 +1000
@@ -62,10 +62,34 @@ static int net_close(struct net_device *
return 0;
}
+static int netbk_change_mtu(struct net_device *dev, int mtu)
+{
+ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+ if (mtu > max)
+ return -EINVAL;
+ dev->mtu = mtu;
+ return 0;
+}
+
+static int netbk_set_sg(struct net_device *dev, u32 data)
+{
+ if (data) {
+ netif_t *netif = netdev_priv(dev);
+
+ if (!(netif->features & NETIF_F_SG))
+ return -ENOSYS;
+ }
+
+ return ethtool_op_set_sg(dev, data);
+}
+
static struct ethtool_ops network_ethtool_ops =
{
.get_tx_csum = ethtool_op_get_tx_csum,
.set_tx_csum = ethtool_op_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = netbk_set_sg,
.get_link = ethtool_op_get_link,
};
@@ -101,6 +125,7 @@ netif_t *netif_alloc(domid_t domid, unsi
dev->get_stats = netif_be_get_stats;
dev->open = net_open;
dev->stop = net_close;
+ dev->change_mtu = netbk_change_mtu;
dev->features = NETIF_F_IP_CSUM;
SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
diff -r 05d6393e9d18 -r 4f008474675a
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Jul 28
18:20:45 2006 +1000
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Jul 28
18:22:22 2006 +1000
@@ -40,6 +40,11 @@
/*#define NETBE_DEBUG_INTERRUPT*/
+struct netbk_rx_meta {
+ skb_frag_t frag;
+ int id;
+};
+
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
static void make_tx_response(netif_t *netif,
@@ -100,21 +105,27 @@ static unsigned long mfn_list[MAX_MFN_AL
static unsigned long mfn_list[MAX_MFN_ALLOC];
static unsigned int alloc_index = 0;
-static unsigned long alloc_mfn(void)
-{
- unsigned long mfn = 0;
+static inline unsigned long alloc_mfn(void)
+{
+ return mfn_list[--alloc_index];
+}
+
+static int check_mfn(int nr)
+{
struct xen_memory_reservation reservation = {
- .nr_extents = MAX_MFN_ALLOC,
.extent_order = 0,
.domid = DOMID_SELF
};
- set_xen_guest_handle(reservation.extent_start, mfn_list);
- if ( unlikely(alloc_index == 0) )
- alloc_index = HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation);
- if ( alloc_index != 0 )
- mfn = mfn_list[--alloc_index];
- return mfn;
+
+ if (likely(alloc_index >= nr))
+ return 0;
+
+ set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
+ reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
+ alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation,
+ &reservation);
+
+ return alloc_index >= nr ? 0 : -ENOMEM;
}
static inline void maybe_schedule_tx_action(void)
@@ -136,12 +147,87 @@ static inline int is_xen_skb(struct sk_b
return (cp == skbuff_cachep);
}
+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+{
+ struct skb_shared_info *ninfo;
+ struct sk_buff *nskb;
+ unsigned long offset;
+ int ret;
+ int len;
+ int headlen;
+
+ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
+ skb_reserve(nskb, 16);
+ headlen = nskb->end - nskb->data;
+ if (headlen > skb_headlen(skb))
+ headlen = skb_headlen(skb);
+ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+ BUG_ON(ret);
+
+ ninfo = skb_shinfo(nskb);
+ ninfo->gso_size = skb_shinfo(skb)->gso_size;
+ ninfo->gso_type = skb_shinfo(skb)->gso_type;
+
+ offset = headlen;
+ len = skb->len - headlen;
+
+ nskb->len = skb->len;
+ nskb->data_len = len;
+ nskb->truesize += len;
+
+ while (len) {
+ struct page *page;
+ int copy;
+ int zero;
+
+ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+ dump_stack();
+ goto err_free;
+ }
+
+ copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+
+ page = alloc_page(GFP_ATOMIC | zero);
+ if (unlikely(!page))
+ goto err_free;
+
+ ret = skb_copy_bits(skb, offset, page_address(page), copy);
+ BUG_ON(ret);
+
+ ninfo->frags[ninfo->nr_frags].page = page;
+ ninfo->frags[ninfo->nr_frags].page_offset = 0;
+ ninfo->frags[ninfo->nr_frags].size = copy;
+ ninfo->nr_frags++;
+
+ offset += copy;
+ len -= copy;
+ }
+
+ offset = nskb->data - skb->data;
+
+ nskb->h.raw = skb->h.raw + offset;
+ nskb->nh.raw = skb->nh.raw + offset;
+ nskb->mac.raw = skb->mac.raw + offset;
+
+ return nskb;
+
+err_free:
+ kfree_skb(nskb);
+err:
+ return NULL;
+}
+
static inline int netbk_queue_full(netif_t *netif)
{
RING_IDX peek = netif->rx_req_cons_peek;
- return netif->rx.sring->req_prod - peek <= 0 ||
- netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek <= 0;
+ return netif->rx.sring->req_prod - peek <= MAX_SKB_FRAGS ||
+ netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek <=
+ MAX_SKB_FRAGS;
}
int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -163,20 +249,12 @@ int netif_be_start_xmit(struct sk_buff *
* We do not copy the packet unless:
* 1. The data is shared; or
* 2. The data is not allocated from our special cache.
- * NB. We also couldn't cope with fragmented packets, but we won't get
- * any because we not advertise the NETIF_F_SG feature.
+ * 3. The data is fragmented.
*/
- if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) {
- int hlen = skb->data - skb->head;
- int ret;
- struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
+ if (skb_cloned(skb) || skb_is_nonlinear(skb) || !is_xen_skb(skb)) {
+ struct sk_buff *nskb = netbk_copy_skb(skb);
if ( unlikely(nskb == NULL) )
goto drop;
- skb_reserve(nskb, hlen);
- __skb_put(nskb, skb->len);
- ret = skb_copy_bits(skb, -hlen, nskb->data - hlen,
- skb->len + hlen);
- BUG_ON(ret);
/* Copy only the header fields we use in this driver. */
nskb->dev = skb->dev;
nskb->ip_summed = skb->ip_summed;
@@ -185,7 +263,7 @@ int netif_be_start_xmit(struct sk_buff *
skb = nskb;
}
- netif->rx_req_cons_peek++;
+ netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1;
netif_get(netif);
if (netbk_can_queue(dev) && netbk_queue_full(netif))
@@ -221,116 +299,80 @@ int xen_network_done(void)
}
#endif
-static void net_rx_action(unsigned long unused)
-{
- netif_t *netif = NULL;
- s8 status;
- u16 size, id, irq, flags;
- multicall_entry_t *mcl;
- mmu_update_t *mmu;
- gnttab_transfer_t *gop;
- unsigned long vdata, old_mfn, new_mfn;
- struct sk_buff_head rxq;
- struct sk_buff *skb;
- int notify_nr = 0;
- int ret;
+static u16 netbk_gop_frag(netif_t *netif, struct page *page, int count, int i)
+{
+ multicall_entry_t *mcl = rx_mcl + count;
+ mmu_update_t *mmu = rx_mmu + count;
+ gnttab_transfer_t *gop = grant_rx_op + count;
+ netif_rx_request_t *req;
+ unsigned long old_mfn, new_mfn;
+
+ old_mfn = virt_to_mfn(page_address(page));
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ new_mfn = alloc_mfn();
+
+ /*
+ * Set the new P2M table entry before reassigning
+ * the old data page. Heed the comment in
+ * pgtable-2level.h:pte_page(). :-)
+ */
+ set_phys_to_machine(page_to_pfn(page), new_mfn);
+
+ MULTI_update_va_mapping(mcl, (unsigned long)page_address(page),
+ pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
+
+ mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE;
+ mmu->val = page_to_pfn(page);
+ }
+
+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
+ gop->mfn = old_mfn;
+ gop->domid = netif->domid;
+ gop->ref = req->gref;
+ return req->id;
+}
+
+static void netbk_gop_skb(struct sk_buff *skb, struct netbk_rx_meta *meta,
+ int count)
+{
+ netif_t *netif = netdev_priv(skb->dev);
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ int i;
+
+ for (i = 0; i < nr_frags; i++) {
+ meta[++count].frag = skb_shinfo(skb)->frags[i];
+ meta[count].id = netbk_gop_frag(netif, meta[count].frag.page,
+ count, i + 1);
+ }
+
/*
- * Putting hundreds of bytes on the stack is considered rude.
- * Static works because a tasklet can only be on one CPU at any time.
+ * This must occur at the end to ensure that we don't trash
+ * skb_shinfo until we're done.
*/
- static u16 notify_list[NET_RX_RING_SIZE];
-
- skb_queue_head_init(&rxq);
-
- mcl = rx_mcl;
- mmu = rx_mmu;
- gop = grant_rx_op;
-
- while ((skb = skb_dequeue(&rx_queue)) != NULL) {
- netif = netdev_priv(skb->dev);
- vdata = (unsigned long)skb->data;
- old_mfn = virt_to_mfn(vdata);
-
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /* Memory squeeze? Back off for an arbitrary while. */
- if ((new_mfn = alloc_mfn()) == 0) {
- if ( net_ratelimit() )
- WPRINTK("Memory squeeze in netback "
- "driver.\n");
- mod_timer(&net_timer, jiffies + HZ);
- skb_queue_head(&rx_queue, skb);
- break;
- }
- /*
- * Set the new P2M table entry before reassigning
- * the old data page. Heed the comment in
- * pgtable-2level.h:pte_page(). :-)
- */
- set_phys_to_machine(
- __pa(skb->data) >> PAGE_SHIFT,
- new_mfn);
-
- MULTI_update_va_mapping(mcl, vdata,
- pfn_pte_ma(new_mfn,
- PAGE_KERNEL), 0);
- mcl++;
-
- mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
- MMU_MACHPHYS_UPDATE;
- mmu->val = __pa(vdata) >> PAGE_SHIFT;
- mmu++;
- }
-
- gop->mfn = old_mfn;
- gop->domid = netif->domid;
- gop->ref = RING_GET_REQUEST(
- &netif->rx, netif->rx.req_cons)->gref;
- netif->rx.req_cons++;
- gop++;
-
- __skb_queue_tail(&rxq, skb);
-
- /* Filled the batch queue? */
- if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
- break;
- }
-
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- if (mcl == rx_mcl)
- return;
-
- mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-
- if (mmu - rx_mmu) {
- mcl->op = __HYPERVISOR_mmu_update;
- mcl->args[0] = (unsigned long)rx_mmu;
- mcl->args[1] = mmu - rx_mmu;
- mcl->args[2] = 0;
- mcl->args[3] = DOMID_SELF;
- mcl++;
- }
-
- ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
- BUG_ON(ret != 0);
- }
-
- ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op,
- gop - grant_rx_op);
- BUG_ON(ret != 0);
-
- mcl = rx_mcl;
- gop = grant_rx_op;
- while ((skb = __skb_dequeue(&rxq)) != NULL) {
- netif = netdev_priv(skb->dev);
- size = skb->tail - skb->data;
-
- atomic_set(&(skb_shinfo(skb)->dataref), 1);
- skb_shinfo(skb)->nr_frags = 0;
- skb_shinfo(skb)->frag_list = NULL;
-
- netif->stats.tx_bytes += size;
- netif->stats.tx_packets++;
-
+ meta[count - nr_frags].id = netbk_gop_frag(netif,
+ virt_to_page(skb->data),
+ count - nr_frags, 0);
+ netif->rx.req_cons += nr_frags + 1;
+}
+
+static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+{
+ int i;
+
+ for (i = 0; i < nr_frags; i++)
+ put_page(meta[i].frag.page);
+}
+
+static int netbk_check_gop(int nr_frags, domid_t domid, int count)
+{
+ multicall_entry_t *mcl = rx_mcl + count;
+ gnttab_transfer_t *gop = grant_rx_op + count;
+ int status = NETIF_RSP_OKAY;
+ int i;
+
+ for (i = 0; i <= nr_frags; i++) {
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* The update_va_mapping() must not fail. */
BUG_ON(mcl->result != 0);
@@ -338,10 +380,9 @@ static void net_rx_action(unsigned long
}
/* Check the reassignment error code. */
- status = NETIF_RSP_OKAY;
if (gop->status != 0) {
DPRINTK("Bad status %d from grant transfer to DOM%u\n",
- gop->status, netif->domid);
+ gop->status, domid);
/*
* Page no longer belongs to us unless GNTST_bad_page,
* but that should be a fatal error anyway.
@@ -349,17 +390,128 @@ static void net_rx_action(unsigned long
BUG_ON(gop->status == GNTST_bad_page);
status = NETIF_RSP_ERROR;
}
- irq = netif->irq;
- id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
- flags = 0;
+ gop++;
+ }
+
+ return status;
+}
+
+static void netbk_add_frag_responses(netif_t *netif, int status,
+ struct netbk_rx_meta *meta, int nr_frags)
+{
+ int i;
+
+ for (i = 0; i < nr_frags; i++) {
+ int id = meta[i].id;
+ int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+
+ make_rx_response(netif, id, status, meta[i].frag.page_offset,
+ meta[i].frag.size, flags);
+ }
+}
+
+static void net_rx_action(unsigned long unused)
+{
+ netif_t *netif = NULL;
+ s8 status;
+ u16 id, irq, flags;
+ multicall_entry_t *mcl;
+ struct sk_buff_head rxq;
+ struct sk_buff *skb;
+ int notify_nr = 0;
+ int ret;
+ int nr_frags;
+ int count;
+
+ /*
+ * Putting hundreds of bytes on the stack is considered rude.
+ * Static works because a tasklet can only be on one CPU at any time.
+ */
+ static u16 notify_list[NET_RX_RING_SIZE];
+ static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+
+ skb_queue_head_init(&rxq);
+
+ count = 0;
+
+ while ((skb = skb_dequeue(&rx_queue)) != NULL) {
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ *(int *)skb->cb = nr_frags;
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+ check_mfn(nr_frags + 1)) {
+ /* Memory squeeze? Back off for an arbitrary while. */
+ if ( net_ratelimit() )
+ WPRINTK("Memory squeeze in netback "
+ "driver.\n");
+ mod_timer(&net_timer, jiffies + HZ);
+ skb_queue_head(&rx_queue, skb);
+ break;
+ }
+
+ netbk_gop_skb(skb, meta, count);
+
+ count += nr_frags + 1;
+
+ __skb_queue_tail(&rxq, skb);
+
+ /* Filled the batch queue? */
+ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
+ break;
+ }
+
+ if (!count)
+ return;
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ mcl = rx_mcl + count;
+
+ mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)rx_mmu;
+ mcl->args[1] = count;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+
+ ret = HYPERVISOR_multicall(rx_mcl, count + 1);
+ BUG_ON(ret != 0);
+ }
+
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, count);
+ BUG_ON(ret != 0);
+
+ count = 0;
+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
+ nr_frags = *(int *)skb->cb;
+
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+
+ netif = netdev_priv(skb->dev);
+ netif->stats.tx_bytes += skb->len;
+ netif->stats.tx_packets++;
+
+ netbk_free_pages(nr_frags, meta + count + 1);
+ status = netbk_check_gop(nr_frags, netif->domid, count);
+
+ id = meta[count].id;
+ flags = nr_frags ? NETRXF_more_data : 0;
+
if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
flags |= NETRXF_csum_blank | NETRXF_data_validated;
else if (skb->proto_data_valid) /* remote but checksummed? */
flags |= NETRXF_data_validated;
- if (make_rx_response(netif, id, status,
- (unsigned long)skb->data & ~PAGE_MASK,
- size, flags) &&
- (rx_notify[irq] == 0)) {
+
+ make_rx_response(netif, id, status, offset_in_page(skb->data),
+ skb_headlen(skb), flags);
+ netbk_add_frag_responses(netif, status, meta + count + 1,
+ nr_frags);
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+ irq = netif->irq;
+ if (ret && !rx_notify[irq]) {
rx_notify[irq] = 1;
notify_list[notify_nr++] = irq;
}
@@ -370,7 +522,7 @@ static void net_rx_action(unsigned long
netif_put(netif);
dev_kfree_skb(skb);
- gop++;
+ count += nr_frags + 1;
}
while (notify_nr != 0) {
@@ -1040,7 +1192,6 @@ static int make_rx_response(netif_t *net
{
RING_IDX i = netif->rx.rsp_prod_pvt;
netif_rx_response_t *resp;
- int notify;
resp = RING_GET_RESPONSE(&netif->rx, i);
resp->offset = offset;
@@ -1051,9 +1202,8 @@ static int make_rx_response(netif_t *net
resp->status = (s16)st;
netif->rx.rsp_prod_pvt = ++i;
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
-
- return notify;
+
+ return 0;
}
#ifdef NETBE_DEBUG_INTERRUPT
diff -r 05d6393e9d18 -r 4f008474675a
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Jul 28 18:20:45
2006 +1000
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Jul 28 18:22:22
2006 +1000
@@ -377,6 +377,13 @@ static int connect_rings(struct backend_
/* Must be non-zero for pfifo_fast to work. */
be->netif->dev->tx_queue_len = 1;
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
+ val = 0;
+ if (val) {
+ be->netif->features |= NETIF_F_SG;
+ be->netif->dev->features |= NETIF_F_SG;
+ }
+
/* Map the shared frame, irq etc. */
err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
if (err) {
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|