Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>
---
drivers/xen/netchannel2/chan.c | 27 ++++++++++--
drivers/xen/netchannel2/netchannel2_core.h | 35 +++++++++++++---
drivers/xen/netchannel2/offload.c | 59 ++++++++++++++++++++++++++++
drivers/xen/netchannel2/recv_packet.c | 23 +++++++++++
drivers/xen/netchannel2/rscb.c | 18 ++++++--
drivers/xen/netchannel2/xmit_packet.c | 43 ++++++++++++--------
include/xen/interface/io/netchannel2.h | 24 ++++++++++-
7 files changed, 191 insertions(+), 38 deletions(-)
diff --git a/drivers/xen/netchannel2/chan.c b/drivers/xen/netchannel2/chan.c
index af8d028..ae9bdb0 100644
--- a/drivers/xen/netchannel2/chan.c
+++ b/drivers/xen/netchannel2/chan.c
@@ -85,6 +85,10 @@ retry:
case NETCHANNEL2_MSG_SET_OFFLOAD:
nc2_handle_set_offload(nc, ncrp, &hdr);
break;
+ case NETCHANNEL2_MSG_SET_MAX_FRAGMENTS_PER_PACKET:
+ nc2_handle_set_max_fragments_per_packet(nc, ncrp,
+ &hdr);
+ break;
case NETCHANNEL2_MSG_PAD:
break;
default:
@@ -137,6 +141,8 @@ static void flush_rings(struct netchannel2_ring_pair *ncrp)
send_finish_packet_messages(ncrp);
if (ncrp->need_advertise_max_packets)
advertise_max_packets(ncrp);
+ if (ncrp->need_advertise_max_fragments_per_packet)
+ advertise_max_fragments_per_packet(ncrp);
if (nc->need_advertise_offloads)
advertise_offloads(nc);
@@ -460,6 +466,8 @@ static void _nc2_attach_rings(struct netchannel2_ring_pair
*ncrp,
ncrp->is_attached = 1;
ncrp->need_advertise_max_packets = 1;
+ ncrp->need_advertise_max_fragments_per_packet = 1;
+ ncrp->max_fragments_per_tx_packet = 1;
}
/* Attach a netchannel2 structure to a ring pair. The endpoint is
@@ -546,8 +554,9 @@ void nc2_detach_rings(struct netchannel2 *nc)
nc->rings.irq = -1;
/* Disable all offloads */
- nc->net_device->features &= ~NETIF_F_IP_CSUM;
+ nc->net_device->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG);
nc->allow_tx_csum_offload = 0;
+ nc->rings.max_fragments_per_tx_packet = 1;
}
#if defined(CONFIG_XEN_NETDEV2_BACKEND)
@@ -657,17 +666,25 @@ static int process_ring(struct napi_struct *napi,
skb = skb_peek_tail(&nc->pending_skbs);
if (!skb)
break;
- if (prepare_xmit_allocate_resources(nc, skb) < 0) {
- /* Still stuck */
+ switch (prepare_xmit_allocate_resources(nc, skb)) {
+ case PREP_XMIT_OKAY:
+ __skb_unlink(skb, &nc->pending_skbs);
+ queue_packet_to_interface(skb, ncrp);
+ break;
+ case PREP_XMIT_BUSY:
+ goto still_stuck;
+ case PREP_XMIT_DROP:
+ __skb_unlink(skb, &nc->pending_skbs);
+ release_tx_packet(ncrp, skb);
break;
}
- __skb_unlink(skb, &nc->pending_skbs);
- queue_packet_to_interface(skb, ncrp);
}
if (skb_queue_empty(&nc->pending_skbs)) {
nc->is_stopped = 0;
netif_wake_queue(nc->net_device);
}
+still_stuck:
+ ;
}
spin_unlock(&ncrp->lock);
diff --git a/drivers/xen/netchannel2/netchannel2_core.h
b/drivers/xen/netchannel2/netchannel2_core.h
index 7e00daf..b3b063c 100644
--- a/drivers/xen/netchannel2/netchannel2_core.h
+++ b/drivers/xen/netchannel2/netchannel2_core.h
@@ -199,6 +199,15 @@ struct netchannel2_ring_pair {
filtering rules would suppress the event. */
uint8_t delayed_kick;
+ /* Set if we need to send a SET_MAX_FRAGMENTS_PER_PACKET
+ * message. */
+ uint8_t need_advertise_max_fragments_per_packet;
+
+ /* The maximum number of fragments which can be used in any
+ given packet. We have to linearise anything which is more
+ fragmented than this. */
+ uint32_t max_fragments_per_tx_packet;
+
/* A list of packet IDs which we need to return to the other
end as soon as there is space on the ring. Protected by
the lock. */
@@ -308,10 +317,18 @@ struct sk_buff *handle_receiver_copy_packet(struct
netchannel2 *nc,
unsigned nr_frags,
unsigned frags_off);
-int prepare_xmit_allocate_small(struct netchannel2_ring_pair *ncrp,
- struct sk_buff *skb);
-int prepare_xmit_allocate_grant(struct netchannel2_ring_pair *ncrp,
- struct sk_buff *skb);
+enum prepare_xmit_result {
+ PREP_XMIT_OKAY = 0,
+ PREP_XMIT_BUSY = -1,
+ PREP_XMIT_DROP = -2,
+};
+
+enum prepare_xmit_result prepare_xmit_allocate_small(
+ struct netchannel2_ring_pair *ncrp,
+ struct sk_buff *skb);
+enum prepare_xmit_result prepare_xmit_allocate_grant(
+ struct netchannel2_ring_pair *ncrp,
+ struct sk_buff *skb);
void xmit_grant(struct netchannel2_ring_pair *ncrp,
struct sk_buff *skb,
volatile void *msg);
@@ -347,9 +364,9 @@ void nc2_rscb_on_gntcopy_fail(void *ctxt, gnttab_copy_t
*gop);
int nc2_start_xmit(struct sk_buff *skb, struct net_device *dev);
int nc2_really_start_xmit(struct netchannel2_ring_pair *ncrp,
- struct sk_buff *skb);
-int prepare_xmit_allocate_resources(struct netchannel2 *nc,
- struct sk_buff *skb);
+ struct sk_buff *skb);
+enum prepare_xmit_result prepare_xmit_allocate_resources(struct netchannel2
*nc,
+ struct sk_buff *skb);
void nc2_handle_finish_packet_msg(struct netchannel2 *nc,
struct netchannel2_ring_pair *ncrp,
struct netchannel2_msg_hdr *hdr);
@@ -363,6 +380,10 @@ void nc2_handle_packet_msg(struct netchannel2 *nc,
struct netchannel2_msg_hdr *hdr,
struct sk_buff_head *pending_rx_queue);
void advertise_max_packets(struct netchannel2_ring_pair *ncrp);
+void nc2_handle_set_max_fragments_per_packet(struct netchannel2 *nc,
+ struct netchannel2_ring_pair *ncrp,
+ struct netchannel2_msg_hdr *hdr);
+void advertise_max_fragments_per_packet(struct netchannel2_ring_pair *ncrp);
void receive_pending_skbs(struct sk_buff_head *rx_queue);
void nc2_queue_purge(struct netchannel2_ring_pair *ncrp,
struct sk_buff_head *queue);
diff --git a/drivers/xen/netchannel2/offload.c
b/drivers/xen/netchannel2/offload.c
index 90d0a54..552b0ad 100644
--- a/drivers/xen/netchannel2/offload.c
+++ b/drivers/xen/netchannel2/offload.c
@@ -5,6 +5,7 @@
#include "netchannel2_core.h"
static int nc2_set_tx_csum(struct net_device *nd, u32 val);
+static int nc2_set_sg(struct net_device *nd, u32 val);
/* ---------------- Interface to the other domain ----------------------- */
void nc2_handle_set_offload(struct netchannel2 *nc,
@@ -25,6 +26,14 @@ void nc2_handle_set_offload(struct netchannel2 *nc,
if (msg.csum != nc->allow_tx_csum_offload) {
nc->allow_tx_csum_offload = msg.csum;
nc2_set_tx_csum(nc->net_device, msg.csum);
+ /* Linux doesn't support scatter-gather mode without
+ TX csum offload. We therefore need to disable SG
+ support whenever the remote turns off csum support.
+ We also elect to enable SG support whenever the
+ remote turns on csum support, since that's more
+ likely to be useful than requiring the user to
+ manually enable it every time. */
+ nc2_set_sg(nc->net_device, msg.csum);
}
}
@@ -47,6 +56,37 @@ void advertise_offloads(struct netchannel2 *nc)
}
}
+/* Not really offload-related, but it interacts with checksum offload
+ and is easiest to do here. */
+void nc2_handle_set_max_fragments_per_packet(struct netchannel2 *nc,
+ struct netchannel2_ring_pair *ncrp,
+ struct netchannel2_msg_hdr *hdr)
+{
+ struct netchannel2_msg_set_max_fragments_per_packet msg;
+
+ if (hdr->size != sizeof(msg)) {
+ pr_debug("Set max fragments per packet message had strange size
%d\n",
+ hdr->size);
+ return;
+ }
+ nc2_copy_from_ring(&ncrp->cons_ring, &msg, sizeof(msg));
+ if (msg.max_frags_per_packet < 1) {
+ pr_debug("set max fragments per packet to %d?\n",
+ msg.max_frags_per_packet);
+ return;
+ }
+ if (ncrp == &nc->rings &&
+ ncrp->max_fragments_per_tx_packet == 1 &&
+ msg.max_frags_per_packet > 1) {
+ /* Turning on scatter-gather mode. Linux only
+ supports it if you've got TX csum offload,
+ though. */
+ if (nc->net_device->features & NETIF_F_IP_CSUM)
+ nc->net_device->features |= NETIF_F_SG;
+ }
+ ncrp->max_fragments_per_tx_packet = msg.max_frags_per_packet;
+}
+
/* ---------------------- Ethtool interface ---------------------------- */
@@ -85,9 +125,28 @@ static int nc2_set_tx_csum(struct net_device *nd, u32 val)
return ethtool_op_set_tx_csum(nd, val);
}
+/* ethtool set_sg() handler. Linux makes sure that TX csum offload is
+ only enabled when scatter-gather mode is, so we don't have to worry
+ about that here. */
+static int nc2_set_sg(struct net_device *nd, u32 val)
+{
+ struct netchannel2 *nc = netdev_priv(nd);
+
+ if (nc->rings.max_fragments_per_tx_packet <= 1)
+ return -EOPNOTSUPP;
+
+ if (val)
+ nd->features |= NETIF_F_SG;
+ else
+ nd->features &= ~NETIF_F_SG;
+ return 0;
+}
+
struct ethtool_ops nc2_ethtool_ops = {
.get_tx_csum = ethtool_op_get_tx_csum,
.set_tx_csum = nc2_set_tx_csum,
.get_rx_csum = nc2_get_rx_csum,
.set_rx_csum = nc2_set_rx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = nc2_set_sg,
};
diff --git a/drivers/xen/netchannel2/recv_packet.c
b/drivers/xen/netchannel2/recv_packet.c
index 0d4e593..958a3a6 100644
--- a/drivers/xen/netchannel2/recv_packet.c
+++ b/drivers/xen/netchannel2/recv_packet.c
@@ -83,6 +83,13 @@ void nc2_handle_packet_msg(struct netchannel2 *nc,
frags_bytes = hdr->size - sizeof(msg) - msg.prefix_size;
nr_frags = frags_bytes / sizeof(struct netchannel2_fragment);
+ if (nr_frags > MAX_SKB_FRAGS) {
+ pr_debug("otherend misbehaving: %d frags > %ld\n",
+ nr_frags, MAX_SKB_FRAGS);
+ nc->stats.tx_errors++;
+ return;
+ }
+
switch (msg.type) {
case NC2_PACKET_TYPE_small:
if (nr_frags != 0) {
@@ -218,6 +225,22 @@ void advertise_max_packets(struct netchannel2_ring_pair
*ncrp)
ncrp->pending_time_sensitive_messages = 1;
}
+void advertise_max_fragments_per_packet(struct netchannel2_ring_pair *ncrp)
+{
+ struct netchannel2_msg_set_max_fragments_per_packet msg;
+
+ if (!nc2_can_send_payload_bytes(&ncrp->prod_ring, sizeof(msg)))
+ return;
+ msg.max_frags_per_packet = MAX_SKB_FRAGS;
+ nc2_send_message(&ncrp->prod_ring,
+ NETCHANNEL2_MSG_SET_MAX_FRAGMENTS_PER_PACKET,
+ 0,
+ &msg,
+ sizeof(msg));
+ ncrp->need_advertise_max_fragments_per_packet = 0;
+ ncrp->pending_time_sensitive_messages = 1;
+}
+
void receive_pending_skbs(struct sk_buff_head *pending_rx_queue)
{
struct sk_buff *skb;
diff --git a/drivers/xen/netchannel2/rscb.c b/drivers/xen/netchannel2/rscb.c
index 8984f90..8ad5454 100644
--- a/drivers/xen/netchannel2/rscb.c
+++ b/drivers/xen/netchannel2/rscb.c
@@ -229,8 +229,8 @@ static inline int nfrags_skb(struct sk_buff *skb, int
prefix_size)
+ skb_shinfo(skb)->nr_frags;
}
-int prepare_xmit_allocate_grant(struct netchannel2_ring_pair *ncrp,
- struct sk_buff *skb)
+enum prepare_xmit_result prepare_xmit_allocate_grant(struct
netchannel2_ring_pair *ncrp,
+ struct sk_buff *skb)
{
struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
unsigned nr_fragments;
@@ -239,7 +239,7 @@ int prepare_xmit_allocate_grant(struct
netchannel2_ring_pair *ncrp,
unsigned inline_prefix_size;
if (allocate_txp_slot(ncrp, skb) < 0)
- return -1;
+ return PREP_XMIT_BUSY;
/* We're going to have to get the remote to issue a grant copy
hypercall anyway, so there's no real benefit to shoving the
@@ -256,6 +256,14 @@ int prepare_xmit_allocate_grant(struct
netchannel2_ring_pair *ncrp,
* policy grant. */
BUG_ON(nr_fragments == 0);
+ if (nr_fragments > ncrp->max_fragments_per_tx_packet) {
+ if (skb_linearize(skb) < 0)
+ return PREP_XMIT_DROP;
+ nr_fragments = nfrags_skb(skb, inline_prefix_size);
+ if (nr_fragments > ncrp->max_fragments_per_tx_packet)
+ return PREP_XMIT_DROP;
+ }
+
skb_co->nr_fragments = nr_fragments;
}
@@ -267,14 +275,14 @@ int prepare_xmit_allocate_grant(struct
netchannel2_ring_pair *ncrp,
release_txp_slot(ncrp, skb);
/* Leave skb_co->nr_fragments set, so that we don't
have to recompute it next time around. */
- return -1;
+ return PREP_XMIT_BUSY;
}
skb_co->gref_pool = gref_pool;
skb_co->inline_prefix_size = inline_prefix_size;
skb_co->type = NC2_PACKET_TYPE_receiver_copy;
- return 0;
+ return PREP_XMIT_OKAY;
}
static void prepare_subpage_grant(struct netchannel2_ring_pair *ncrp,
diff --git a/drivers/xen/netchannel2/xmit_packet.c
b/drivers/xen/netchannel2/xmit_packet.c
index 5b0ba6b..5cebca6 100644
--- a/drivers/xen/netchannel2/xmit_packet.c
+++ b/drivers/xen/netchannel2/xmit_packet.c
@@ -21,8 +21,9 @@ static enum transmit_policy transmit_policy(struct
netchannel2 *nc,
transmitted in the ring. This is only called for small, linear
SKBs. It always succeeds, but has an int return type for symmetry
with the other prepare_xmit_*() functions. */
-int prepare_xmit_allocate_small(struct netchannel2_ring_pair *ncrp,
- struct sk_buff *skb)
+enum prepare_xmit_result prepare_xmit_allocate_small(
+ struct netchannel2_ring_pair *ncrp,
+ struct sk_buff *skb)
{
struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
@@ -33,7 +34,7 @@ int prepare_xmit_allocate_small(struct netchannel2_ring_pair
*ncrp,
skb_co->gref_pool = 0;
skb_co->inline_prefix_size = skb->len;
- return 0;
+ return PREP_XMIT_OKAY;
}
/* Figure out how much space @tp will take up on the ring. */
@@ -56,13 +57,13 @@ static unsigned get_transmitted_packet_msg_size(struct
sk_buff *skb)
allocated. The expected case is that the caller will arrange for
us to retry the allocation later, in which case we'll pick up the
already-allocated buffers. */
-int prepare_xmit_allocate_resources(struct netchannel2 *nc,
- struct sk_buff *skb)
+enum prepare_xmit_result prepare_xmit_allocate_resources(struct netchannel2
*nc,
+ struct sk_buff *skb)
{
struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
enum transmit_policy policy;
unsigned msg_size;
- int r;
+ enum prepare_xmit_result r;
if (skb_co->policy == transmit_policy_unknown) {
policy = transmit_policy(nc, skb);
@@ -76,18 +77,18 @@ int prepare_xmit_allocate_resources(struct netchannel2 *nc,
default:
BUG();
/* Shut the compiler up. */
- r = -1;
+ r = PREP_XMIT_BUSY;
}
- if (r < 0)
+ if (r != PREP_XMIT_OKAY)
return r;
skb_co->policy = policy;
}
msg_size = get_transmitted_packet_msg_size(skb);
if (nc2_reserve_payload_bytes(&nc->rings.prod_ring, msg_size))
- return 0;
+ return PREP_XMIT_OKAY;
- return -1;
+ return PREP_XMIT_BUSY;
}
static void set_offload_flags(struct sk_buff *skb,
@@ -221,21 +222,27 @@ int nc2_start_xmit(struct sk_buff *skb, struct net_device
*dev)
spin_lock_bh(&nc->rings.lock);
- if (!nc->rings.is_attached) {
- spin_unlock_bh(&nc->rings.lock);
- dev_kfree_skb(skb);
- nc->stats.tx_dropped++;
- return NETDEV_TX_OK;
- }
+ if (!nc->rings.is_attached)
+ goto out_drop;
r = prepare_xmit_allocate_resources(nc, skb);
- if (r < 0)
- goto out_busy;
+ if (r != PREP_XMIT_OKAY) {
+ if (r == PREP_XMIT_BUSY)
+ goto out_busy;
+ else
+ goto out_drop;
+ }
queue_packet_to_interface(skb, &nc->rings);
spin_unlock_bh(&nc->rings.lock);
return NETDEV_TX_OK;
+out_drop:
+ spin_unlock_bh(&nc->rings.lock);
+ dev_kfree_skb(skb);
+ nc->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+
out_busy:
/* Some more buffers may have arrived, so kick the worker
* thread to go and have a look. */
diff --git a/include/xen/interface/io/netchannel2.h
b/include/xen/interface/io/netchannel2.h
index 5a56eb9..11bb469 100644
--- a/include/xen/interface/io/netchannel2.h
+++ b/include/xen/interface/io/netchannel2.h
@@ -26,6 +26,11 @@ struct netchannel2_msg_set_max_packets {
* NETCHANNEL2_MAX_INLINE_BYTES. Packets may contain no more than
* NETCHANNEL2_MAX_PACKET_BYTES bytes of data, including all fragments
* and the prefix.
+ *
+ * If a SET_MAX_FRAGMENTS_PER_PACKET message has been received, the
+ * number of fragments in the packet should respect that limit.
+ * Otherwise, there should be at most one fragment in the packet
+ * (there may be zero if the entire packet fits in the inline prefix).
*/
#define NETCHANNEL2_MSG_PACKET 2
#define NETCHANNEL2_MAX_PACKET_BYTES 65536
@@ -55,10 +60,8 @@ struct netchannel2_msg_packet {
uint16_t pad2;
uint16_t csum_start;
uint16_t csum_offset;
- /* Variable-size array. The number of elements is determined
+ /* Variable-size array. The number of elements is determined
by the size of the message. */
- /* Until we support scatter-gather, this will be either 0 or 1
- element. */
struct netchannel2_fragment frags[0];
};
@@ -141,4 +144,19 @@ struct netchannel2_msg_set_offload {
uint16_t reserved;
};
+/* Set the maximum number of fragments which can be used in any packet
+ * (not including the inline prefix). Until this is sent, there can
+ * be at most one such fragment per packet. The maximum must not be
+ * set to zero. */
+/* Note that there is no acknowledgement for this message, and so if
+ * an endpoint tries to reduce the number of fragments then it may
+ * continue to recieve over-fragmented packets for some time. The
+ * receiving endpoint is expected to deal with this.
+ */
+#define NETCHANNEL2_MSG_SET_MAX_FRAGMENTS_PER_PACKET 5
+struct netchannel2_msg_set_max_fragments_per_packet {
+ struct netchannel2_msg_hdr hdr;
+ uint32_t max_frags_per_packet;
+};
+
#endif /* !__NETCHANNEL2_H__ */
--
1.6.3.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|