WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 12/22] Scatter-gather support.

Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>
---
 drivers/xen/netchannel2/chan.c             |   27 ++++++++++--
 drivers/xen/netchannel2/netchannel2_core.h |   35 +++++++++++++---
 drivers/xen/netchannel2/offload.c          |   59 ++++++++++++++++++++++++++++
 drivers/xen/netchannel2/recv_packet.c      |   23 +++++++++++
 drivers/xen/netchannel2/rscb.c             |   18 ++++++--
 drivers/xen/netchannel2/xmit_packet.c      |   43 ++++++++++++--------
 include/xen/interface/io/netchannel2.h     |   24 ++++++++++-
 7 files changed, 191 insertions(+), 38 deletions(-)

diff --git a/drivers/xen/netchannel2/chan.c b/drivers/xen/netchannel2/chan.c
index af8d028..ae9bdb0 100644
--- a/drivers/xen/netchannel2/chan.c
+++ b/drivers/xen/netchannel2/chan.c
@@ -85,6 +85,10 @@ retry:
                case NETCHANNEL2_MSG_SET_OFFLOAD:
                        nc2_handle_set_offload(nc, ncrp, &hdr);
                        break;
+               case NETCHANNEL2_MSG_SET_MAX_FRAGMENTS_PER_PACKET:
+                       nc2_handle_set_max_fragments_per_packet(nc, ncrp,
+                                                               &hdr);
+                       break;
                case NETCHANNEL2_MSG_PAD:
                        break;
                default:
@@ -137,6 +141,8 @@ static void flush_rings(struct netchannel2_ring_pair *ncrp)
        send_finish_packet_messages(ncrp);
        if (ncrp->need_advertise_max_packets)
                advertise_max_packets(ncrp);
+       if (ncrp->need_advertise_max_fragments_per_packet)
+               advertise_max_fragments_per_packet(ncrp);
        if (nc->need_advertise_offloads)
                advertise_offloads(nc);
 
@@ -460,6 +466,8 @@ static void _nc2_attach_rings(struct netchannel2_ring_pair 
*ncrp,
        ncrp->is_attached = 1;
 
        ncrp->need_advertise_max_packets = 1;
+       ncrp->need_advertise_max_fragments_per_packet = 1;
+       ncrp->max_fragments_per_tx_packet = 1;
 }
 
 /* Attach a netchannel2 structure to a ring pair.  The endpoint is
@@ -546,8 +554,9 @@ void nc2_detach_rings(struct netchannel2 *nc)
        nc->rings.irq = -1;
 
        /* Disable all offloads */
-       nc->net_device->features &= ~NETIF_F_IP_CSUM;
+       nc->net_device->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG);
        nc->allow_tx_csum_offload = 0;
+       nc->rings.max_fragments_per_tx_packet = 1;
 }
 
 #if defined(CONFIG_XEN_NETDEV2_BACKEND)
@@ -657,17 +666,25 @@ static int process_ring(struct napi_struct *napi,
                        skb = skb_peek_tail(&nc->pending_skbs);
                        if (!skb)
                                break;
-                       if (prepare_xmit_allocate_resources(nc, skb) < 0) {
-                               /* Still stuck */
+                       switch (prepare_xmit_allocate_resources(nc, skb)) {
+                       case PREP_XMIT_OKAY:
+                               __skb_unlink(skb, &nc->pending_skbs);
+                               queue_packet_to_interface(skb, ncrp);
+                               break;
+                       case PREP_XMIT_BUSY:
+                               goto still_stuck;
+                       case PREP_XMIT_DROP:
+                               __skb_unlink(skb, &nc->pending_skbs);
+                               release_tx_packet(ncrp, skb);
                                break;
                        }
-                       __skb_unlink(skb, &nc->pending_skbs);
-                       queue_packet_to_interface(skb, ncrp);
                }
                if (skb_queue_empty(&nc->pending_skbs)) {
                        nc->is_stopped = 0;
                        netif_wake_queue(nc->net_device);
                }
+still_stuck:
+               ;
        }
 
        spin_unlock(&ncrp->lock);
diff --git a/drivers/xen/netchannel2/netchannel2_core.h 
b/drivers/xen/netchannel2/netchannel2_core.h
index 7e00daf..b3b063c 100644
--- a/drivers/xen/netchannel2/netchannel2_core.h
+++ b/drivers/xen/netchannel2/netchannel2_core.h
@@ -199,6 +199,15 @@ struct netchannel2_ring_pair {
           filtering rules would suppress the event. */
        uint8_t delayed_kick;
 
+       /* Set if we need to send a SET_MAX_FRAGMENTS_PER_PACKET
+        * message. */
+       uint8_t need_advertise_max_fragments_per_packet;
+
+       /* The maximum number of fragments which can be used in any
+          given packet.  We have to linearise anything which is more
+          fragmented than this. */
+       uint32_t max_fragments_per_tx_packet;
+
        /* A list of packet IDs which we need to return to the other
           end as soon as there is space on the ring.  Protected by
           the lock. */
@@ -308,10 +317,18 @@ struct sk_buff *handle_receiver_copy_packet(struct 
netchannel2 *nc,
                                            unsigned nr_frags,
                                            unsigned frags_off);
 
-int prepare_xmit_allocate_small(struct netchannel2_ring_pair *ncrp,
-                                                      struct sk_buff *skb);
-int prepare_xmit_allocate_grant(struct netchannel2_ring_pair *ncrp,
-                               struct sk_buff *skb);
+enum prepare_xmit_result {
+       PREP_XMIT_OKAY = 0,
+       PREP_XMIT_BUSY = -1,
+       PREP_XMIT_DROP = -2,
+};
+
+enum prepare_xmit_result prepare_xmit_allocate_small(
+       struct netchannel2_ring_pair *ncrp,
+       struct sk_buff *skb);
+enum prepare_xmit_result prepare_xmit_allocate_grant(
+       struct netchannel2_ring_pair *ncrp,
+       struct sk_buff *skb);
 void xmit_grant(struct netchannel2_ring_pair *ncrp,
                struct sk_buff *skb,
                volatile void *msg);
@@ -347,9 +364,9 @@ void nc2_rscb_on_gntcopy_fail(void *ctxt, gnttab_copy_t 
*gop);
 
 int nc2_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int nc2_really_start_xmit(struct netchannel2_ring_pair *ncrp,
-                          struct sk_buff *skb);
-int prepare_xmit_allocate_resources(struct netchannel2 *nc,
-                                   struct sk_buff *skb);
+                         struct sk_buff *skb);
+enum prepare_xmit_result prepare_xmit_allocate_resources(struct netchannel2 
*nc,
+                                                        struct sk_buff *skb);
 void nc2_handle_finish_packet_msg(struct netchannel2 *nc,
                                  struct netchannel2_ring_pair *ncrp,
                                  struct netchannel2_msg_hdr *hdr);
@@ -363,6 +380,10 @@ void nc2_handle_packet_msg(struct netchannel2 *nc,
                           struct netchannel2_msg_hdr *hdr,
                           struct sk_buff_head *pending_rx_queue);
 void advertise_max_packets(struct netchannel2_ring_pair *ncrp);
+void nc2_handle_set_max_fragments_per_packet(struct netchannel2 *nc,
+                                            struct netchannel2_ring_pair *ncrp,
+                                            struct netchannel2_msg_hdr *hdr);
+void advertise_max_fragments_per_packet(struct netchannel2_ring_pair *ncrp);
 void receive_pending_skbs(struct sk_buff_head *rx_queue);
 void nc2_queue_purge(struct netchannel2_ring_pair *ncrp,
                     struct sk_buff_head *queue);
diff --git a/drivers/xen/netchannel2/offload.c 
b/drivers/xen/netchannel2/offload.c
index 90d0a54..552b0ad 100644
--- a/drivers/xen/netchannel2/offload.c
+++ b/drivers/xen/netchannel2/offload.c
@@ -5,6 +5,7 @@
 #include "netchannel2_core.h"
 
 static int nc2_set_tx_csum(struct net_device *nd, u32 val);
+static int nc2_set_sg(struct net_device *nd, u32 val);
 
 /* ---------------- Interface to the other domain ----------------------- */
 void nc2_handle_set_offload(struct netchannel2 *nc,
@@ -25,6 +26,14 @@ void nc2_handle_set_offload(struct netchannel2 *nc,
        if (msg.csum != nc->allow_tx_csum_offload) {
                nc->allow_tx_csum_offload = msg.csum;
                nc2_set_tx_csum(nc->net_device, msg.csum);
+               /* Linux doesn't support scatter-gather mode without
+                  TX csum offload.  We therefore need to disable SG
+                  support whenever the remote turns off csum support.
+                  We also elect to enable SG support whenever the
+                  remote turns on csum support, since that's more
+                  likely to be useful than requiring the user to
+                  manually enable it every time. */
+               nc2_set_sg(nc->net_device, msg.csum);
        }
 }
 
@@ -47,6 +56,37 @@ void advertise_offloads(struct netchannel2 *nc)
        }
 }
 
+/* Not really offload-related, but it interacts with checksum offload
+   and is easiest to do here. */
+void nc2_handle_set_max_fragments_per_packet(struct netchannel2 *nc,
+                                            struct netchannel2_ring_pair *ncrp,
+                                            struct netchannel2_msg_hdr *hdr)
+{
+       struct netchannel2_msg_set_max_fragments_per_packet msg;
+
+       if (hdr->size != sizeof(msg)) {
+               pr_debug("Set max fragments per packet message had strange size 
%d\n",
+                        hdr->size);
+               return;
+       }
+       nc2_copy_from_ring(&ncrp->cons_ring, &msg, sizeof(msg));
+       if (msg.max_frags_per_packet < 1) {
+               pr_debug("set max fragments per packet to %d?\n",
+                        msg.max_frags_per_packet);
+               return;
+       }
+       if (ncrp == &nc->rings &&
+           ncrp->max_fragments_per_tx_packet == 1 &&
+           msg.max_frags_per_packet > 1) {
+               /* Turning on scatter-gather mode.  Linux only
+                  supports it if you've got TX csum offload,
+                  though. */
+               if (nc->net_device->features & NETIF_F_IP_CSUM)
+                       nc->net_device->features |= NETIF_F_SG;
+       }
+       ncrp->max_fragments_per_tx_packet = msg.max_frags_per_packet;
+}
+
 
 
 /* ---------------------- Ethtool interface ---------------------------- */
@@ -85,9 +125,28 @@ static int nc2_set_tx_csum(struct net_device *nd, u32 val)
        return ethtool_op_set_tx_csum(nd, val);
 }
 
+/* ethtool set_sg() handler.  Linux makes sure that TX csum offload is
+   only enabled when scatter-gather mode is, so we don't have to worry
+   about that here. */
+static int nc2_set_sg(struct net_device *nd, u32 val)
+{
+       struct netchannel2 *nc = netdev_priv(nd);
+
+       if (nc->rings.max_fragments_per_tx_packet <= 1)
+               return -EOPNOTSUPP;
+
+       if (val)
+               nd->features |= NETIF_F_SG;
+       else
+               nd->features &= ~NETIF_F_SG;
+       return 0;
+}
+
 struct ethtool_ops nc2_ethtool_ops = {
        .get_tx_csum = ethtool_op_get_tx_csum,
        .set_tx_csum = nc2_set_tx_csum,
        .get_rx_csum = nc2_get_rx_csum,
        .set_rx_csum = nc2_set_rx_csum,
+       .get_sg      = ethtool_op_get_sg,
+       .set_sg      = nc2_set_sg,
 };
diff --git a/drivers/xen/netchannel2/recv_packet.c 
b/drivers/xen/netchannel2/recv_packet.c
index 0d4e593..958a3a6 100644
--- a/drivers/xen/netchannel2/recv_packet.c
+++ b/drivers/xen/netchannel2/recv_packet.c
@@ -83,6 +83,13 @@ void nc2_handle_packet_msg(struct netchannel2 *nc,
        frags_bytes = hdr->size - sizeof(msg) - msg.prefix_size;
        nr_frags = frags_bytes / sizeof(struct netchannel2_fragment);
 
+       if (nr_frags > MAX_SKB_FRAGS) {
+               pr_debug("otherend misbehaving: %d frags > %ld\n",
+                        nr_frags, MAX_SKB_FRAGS);
+               nc->stats.tx_errors++;
+               return;
+       }
+
        switch (msg.type) {
        case NC2_PACKET_TYPE_small:
                if (nr_frags != 0) {
@@ -218,6 +225,22 @@ void advertise_max_packets(struct netchannel2_ring_pair 
*ncrp)
        ncrp->pending_time_sensitive_messages = 1;
 }
 
+void advertise_max_fragments_per_packet(struct netchannel2_ring_pair *ncrp)
+{
+       struct netchannel2_msg_set_max_fragments_per_packet msg;
+
+       if (!nc2_can_send_payload_bytes(&ncrp->prod_ring, sizeof(msg)))
+               return;
+       msg.max_frags_per_packet = MAX_SKB_FRAGS;
+       nc2_send_message(&ncrp->prod_ring,
+                        NETCHANNEL2_MSG_SET_MAX_FRAGMENTS_PER_PACKET,
+                        0,
+                        &msg,
+                        sizeof(msg));
+       ncrp->need_advertise_max_fragments_per_packet = 0;
+       ncrp->pending_time_sensitive_messages = 1;
+}
+
 void receive_pending_skbs(struct sk_buff_head *pending_rx_queue)
 {
        struct sk_buff *skb;
diff --git a/drivers/xen/netchannel2/rscb.c b/drivers/xen/netchannel2/rscb.c
index 8984f90..8ad5454 100644
--- a/drivers/xen/netchannel2/rscb.c
+++ b/drivers/xen/netchannel2/rscb.c
@@ -229,8 +229,8 @@ static inline int nfrags_skb(struct sk_buff *skb, int 
prefix_size)
                + skb_shinfo(skb)->nr_frags;
 }
 
-int prepare_xmit_allocate_grant(struct netchannel2_ring_pair *ncrp,
-                               struct sk_buff *skb)
+enum prepare_xmit_result prepare_xmit_allocate_grant(struct 
netchannel2_ring_pair *ncrp,
+                                                    struct sk_buff *skb)
 {
        struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
        unsigned nr_fragments;
@@ -239,7 +239,7 @@ int prepare_xmit_allocate_grant(struct 
netchannel2_ring_pair *ncrp,
        unsigned inline_prefix_size;
 
        if (allocate_txp_slot(ncrp, skb) < 0)
-               return -1;
+               return PREP_XMIT_BUSY;
 
        /* We're going to have to get the remote to issue a grant copy
           hypercall anyway, so there's no real benefit to shoving the
@@ -256,6 +256,14 @@ int prepare_xmit_allocate_grant(struct 
netchannel2_ring_pair *ncrp,
                 * policy grant. */
                BUG_ON(nr_fragments == 0);
 
+               if (nr_fragments > ncrp->max_fragments_per_tx_packet) {
+                       if (skb_linearize(skb) < 0)
+                               return PREP_XMIT_DROP;
+                       nr_fragments = nfrags_skb(skb, inline_prefix_size);
+                       if (nr_fragments > ncrp->max_fragments_per_tx_packet)
+                               return PREP_XMIT_DROP;
+               }
+
                skb_co->nr_fragments = nr_fragments;
        }
 
@@ -267,14 +275,14 @@ int prepare_xmit_allocate_grant(struct 
netchannel2_ring_pair *ncrp,
                release_txp_slot(ncrp, skb);
                /* Leave skb_co->nr_fragments set, so that we don't
                   have to recompute it next time around. */
-               return -1;
+               return PREP_XMIT_BUSY;
        }
        skb_co->gref_pool = gref_pool;
        skb_co->inline_prefix_size = inline_prefix_size;
 
        skb_co->type = NC2_PACKET_TYPE_receiver_copy;
 
-       return 0;
+       return PREP_XMIT_OKAY;
 }
 
 static void prepare_subpage_grant(struct netchannel2_ring_pair *ncrp,
diff --git a/drivers/xen/netchannel2/xmit_packet.c 
b/drivers/xen/netchannel2/xmit_packet.c
index 5b0ba6b..5cebca6 100644
--- a/drivers/xen/netchannel2/xmit_packet.c
+++ b/drivers/xen/netchannel2/xmit_packet.c
@@ -21,8 +21,9 @@ static enum transmit_policy transmit_policy(struct 
netchannel2 *nc,
    transmitted in the ring.  This is only called for small, linear
    SKBs.  It always succeeds, but has an int return type for symmetry
    with the other prepare_xmit_*() functions. */
-int prepare_xmit_allocate_small(struct netchannel2_ring_pair *ncrp,
-                               struct sk_buff *skb)
+enum prepare_xmit_result prepare_xmit_allocate_small(
+       struct netchannel2_ring_pair *ncrp,
+       struct sk_buff *skb)
 {
        struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
 
@@ -33,7 +34,7 @@ int prepare_xmit_allocate_small(struct netchannel2_ring_pair 
*ncrp,
        skb_co->gref_pool = 0;
        skb_co->inline_prefix_size = skb->len;
 
-       return 0;
+       return PREP_XMIT_OKAY;
 }
 
 /* Figure out how much space @tp will take up on the ring. */
@@ -56,13 +57,13 @@ static unsigned get_transmitted_packet_msg_size(struct 
sk_buff *skb)
    allocated.  The expected case is that the caller will arrange for
    us to retry the allocation later, in which case we'll pick up the
    already-allocated buffers. */
-int prepare_xmit_allocate_resources(struct netchannel2 *nc,
-                                   struct sk_buff *skb)
+enum prepare_xmit_result prepare_xmit_allocate_resources(struct netchannel2 
*nc,
+                                                        struct sk_buff *skb)
 {
        struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
        enum transmit_policy policy;
        unsigned msg_size;
-       int r;
+       enum prepare_xmit_result r;
 
        if (skb_co->policy == transmit_policy_unknown) {
                policy = transmit_policy(nc, skb);
@@ -76,18 +77,18 @@ int prepare_xmit_allocate_resources(struct netchannel2 *nc,
                default:
                        BUG();
                        /* Shut the compiler up. */
-                       r = -1;
+                       r = PREP_XMIT_BUSY;
                }
-               if (r < 0)
+               if (r != PREP_XMIT_OKAY)
                        return r;
                skb_co->policy = policy;
        }
 
        msg_size = get_transmitted_packet_msg_size(skb);
        if (nc2_reserve_payload_bytes(&nc->rings.prod_ring, msg_size))
-               return 0;
+               return PREP_XMIT_OKAY;
 
-       return -1;
+       return PREP_XMIT_BUSY;
 }
 
 static void set_offload_flags(struct sk_buff *skb,
@@ -221,21 +222,27 @@ int nc2_start_xmit(struct sk_buff *skb, struct net_device 
*dev)
 
        spin_lock_bh(&nc->rings.lock);
 
-       if (!nc->rings.is_attached) {
-               spin_unlock_bh(&nc->rings.lock);
-               dev_kfree_skb(skb);
-               nc->stats.tx_dropped++;
-               return NETDEV_TX_OK;
-       }
+       if (!nc->rings.is_attached)
+               goto out_drop;
 
        r = prepare_xmit_allocate_resources(nc, skb);
-       if (r < 0)
-               goto out_busy;
+       if (r != PREP_XMIT_OKAY) {
+               if (r == PREP_XMIT_BUSY)
+                       goto out_busy;
+               else
+                       goto out_drop;
+       }
        queue_packet_to_interface(skb, &nc->rings);
        spin_unlock_bh(&nc->rings.lock);
 
        return NETDEV_TX_OK;
 
+out_drop:
+       spin_unlock_bh(&nc->rings.lock);
+       dev_kfree_skb(skb);
+       nc->stats.tx_dropped++;
+       return NETDEV_TX_OK;
+
 out_busy:
        /* Some more buffers may have arrived, so kick the worker
         * thread to go and have a look. */
diff --git a/include/xen/interface/io/netchannel2.h 
b/include/xen/interface/io/netchannel2.h
index 5a56eb9..11bb469 100644
--- a/include/xen/interface/io/netchannel2.h
+++ b/include/xen/interface/io/netchannel2.h
@@ -26,6 +26,11 @@ struct netchannel2_msg_set_max_packets {
  * NETCHANNEL2_MAX_INLINE_BYTES.  Packets may contain no more than
  * NETCHANNEL2_MAX_PACKET_BYTES bytes of data, including all fragments
  * and the prefix.
+ *
+ * If a SET_MAX_FRAGMENTS_PER_PACKET message has been received, the
+ * number of fragments in the packet should respect that limit.
+ * Otherwise, there should be at most one fragment in the packet
+ * (there may be zero if the entire packet fits in the inline prefix).
  */
 #define NETCHANNEL2_MSG_PACKET 2
 #define NETCHANNEL2_MAX_PACKET_BYTES 65536
@@ -55,10 +60,8 @@ struct netchannel2_msg_packet {
        uint16_t pad2;
        uint16_t csum_start;
        uint16_t csum_offset;
-       /* Variable-size array.  The number of elements is determined
+       /* Variable-size array.  The number of elements is determined
           by the size of the message. */
-       /* Until we support scatter-gather, this will be either 0 or 1
-          element. */
        struct netchannel2_fragment frags[0];
 };
 
@@ -141,4 +144,19 @@ struct netchannel2_msg_set_offload {
        uint16_t reserved;
 };
 
+/* Set the maximum number of fragments which can be used in any packet
+ * (not including the inline prefix).  Until this is sent, there can
+ * be at most one such fragment per packet.  The maximum must not be
+ * set to zero. */
+/* Note that there is no acknowledgement for this message, and so if
+ * an endpoint tries to reduce the number of fragments then it may
+ * continue to recieve over-fragmented packets for some time.  The
+ * receiving endpoint is expected to deal with this.
+ */
+#define NETCHANNEL2_MSG_SET_MAX_FRAGMENTS_PER_PACKET 5
+struct netchannel2_msg_set_max_fragments_per_packet {
+       struct netchannel2_msg_hdr hdr;
+       uint32_t max_frags_per_packet;
+};
+
 #endif /* !__NETCHANNEL2_H__ */
-- 
1.6.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>