# HG changeset patch
# User Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
# Node ID 11ee20d418ea813709da4c86dbc4ae28efb17f36
# Parent fe1a31c06cbe548d3eb59b4e0fc16fc102a16344
[LINUX][XEN] backport net drivers
This is simply a copy of the network files from the Sparse tree that
have not made it to our tree yet.
Signed-off-by: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
---
drivers/xen/netback/common.h | 25
drivers/xen/netback/interface.c | 85 +-
drivers/xen/netback/loopback.c | 8
drivers/xen/netback/netback.c | 834 ++++++++++++++++++++------
drivers/xen/netback/xenbus.c | 87 ++
drivers/xen/netfront/netfront.c | 1242 ++++++++++++++++++++++++++++------------
6 files changed, 1696 insertions(+), 585 deletions(-)
diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/common.h
--- a/drivers/xen/netback/common.h Sun Oct 08 12:23:50 2006 -0400
+++ b/drivers/xen/netback/common.h Sun Oct 08 12:28:37 2006 -0400
@@ -64,9 +64,9 @@ typedef struct netif_st {
/* Physical parameters of the comms window. */
grant_handle_t tx_shmem_handle;
- grant_ref_t tx_shmem_ref;
+ grant_ref_t tx_shmem_ref;
grant_handle_t rx_shmem_handle;
- grant_ref_t rx_shmem_ref;
+ grant_ref_t rx_shmem_ref;
unsigned int evtchn;
unsigned int irq;
@@ -75,6 +75,13 @@ typedef struct netif_st {
netif_rx_back_ring_t rx;
struct vm_struct *tx_comms_area;
struct vm_struct *rx_comms_area;
+
+ /* Set of features that can be turned on in dev->features. */
+ int features;
+
+ /* Internal feature information. */
+ int can_queue:1; /* can queue packets for receiver? */
+ int copying_receiver:1; /* copy packets to receiver? */
/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
RING_IDX rx_req_cons_peek;
@@ -86,8 +93,6 @@ typedef struct netif_st {
struct timer_list credit_timeout;
/* Miscellaneous private stuff. */
- enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
- int active;
struct list_head list; /* scheduling list */
atomic_t refcnt;
struct net_device *dev;
@@ -121,4 +126,16 @@ struct net_device_stats *netif_be_get_st
struct net_device_stats *netif_be_get_stats(struct net_device *dev);
irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+static inline int netbk_can_queue(struct net_device *dev)
+{
+ netif_t *netif = netdev_priv(dev);
+ return netif->can_queue;
+}
+
+static inline int netbk_can_sg(struct net_device *dev)
+{
+ netif_t *netif = netdev_priv(dev);
+ return netif->features & NETIF_F_SG;
+}
+
#endif /* __NETIF__BACKEND__COMMON_H__ */
diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/interface.c
--- a/drivers/xen/netback/interface.c Sun Oct 08 12:23:50 2006 -0400
+++ b/drivers/xen/netback/interface.c Sun Oct 08 12:28:37 2006 -0400
@@ -36,46 +36,75 @@
static void __netif_up(netif_t *netif)
{
- struct net_device *dev = netif->dev;
- netif_tx_lock_bh(dev);
- netif->active = 1;
- netif_tx_unlock_bh(dev);
enable_irq(netif->irq);
netif_schedule_work(netif);
}
static void __netif_down(netif_t *netif)
{
- struct net_device *dev = netif->dev;
disable_irq(netif->irq);
- netif_tx_lock_bh(dev);
- netif->active = 0;
- netif_tx_unlock_bh(dev);
netif_deschedule_work(netif);
}
static int net_open(struct net_device *dev)
{
netif_t *netif = netdev_priv(dev);
- if (netif->status == CONNECTED)
+ if (netif_carrier_ok(dev))
__netif_up(netif);
- netif_start_queue(dev);
return 0;
}
static int net_close(struct net_device *dev)
{
netif_t *netif = netdev_priv(dev);
- netif_stop_queue(dev);
- if (netif->status == CONNECTED)
+ if (netif_carrier_ok(dev))
__netif_down(netif);
return 0;
+}
+
+static int netbk_change_mtu(struct net_device *dev, int mtu)
+{
+ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+ if (mtu > max)
+ return -EINVAL;
+ dev->mtu = mtu;
+ return 0;
+}
+
+static int netbk_set_sg(struct net_device *dev, u32 data)
+{
+ if (data) {
+ netif_t *netif = netdev_priv(dev);
+
+ if (!(netif->features & NETIF_F_SG))
+ return -ENOSYS;
+ }
+
+ return ethtool_op_set_sg(dev, data);
+}
+
+static int netbk_set_tso(struct net_device *dev, u32 data)
+{
+ if (data) {
+ netif_t *netif = netdev_priv(dev);
+
+ if (!(netif->features & NETIF_F_TSO))
+ return -ENOSYS;
+ }
+
+ return ethtool_op_set_tso(dev, data);
}
static struct ethtool_ops network_ethtool_ops =
{
.get_tx_csum = ethtool_op_get_tx_csum,
.set_tx_csum = ethtool_op_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = netbk_set_sg,
+ .get_tso = ethtool_op_get_tso,
+ .set_tso = netbk_set_tso,
+ .get_link = ethtool_op_get_link,
};
netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
@@ -92,11 +121,12 @@ netif_t *netif_alloc(domid_t domid, unsi
return ERR_PTR(-ENOMEM);
}
+ netif_carrier_off(dev);
+
netif = netdev_priv(dev);
memset(netif, 0, sizeof(*netif));
netif->domid = domid;
netif->handle = handle;
- netif->status = DISCONNECTED;
atomic_set(&netif->refcnt, 1);
init_waitqueue_head(&netif->waiting_to_free);
netif->dev = dev;
@@ -109,12 +139,16 @@ netif_t *netif_alloc(domid_t domid, unsi
dev->get_stats = netif_be_get_stats;
dev->open = net_open;
dev->stop = net_close;
+ dev->change_mtu = netbk_change_mtu;
dev->features = NETIF_F_IP_CSUM;
SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
- /* Disable queuing. */
- dev->tx_queue_len = 0;
+ /*
+ * Reduce default TX queuelen so that each guest interface only
+ * allows it to eat around 6.4MB of host memory.
+ */
+ dev->tx_queue_len = 100;
for (i = 0; i < ETH_ALEN; i++)
if (be_mac[i] != 0)
@@ -255,11 +289,9 @@ int netif_map(netif_t *netif, unsigned l
netif->rx_req_cons_peek = 0;
netif_get(netif);
- wmb(); /* Other CPUs see new state before interface is started. */
rtnl_lock();
- netif->status = CONNECTED;
- wmb();
+ netif_carrier_on(netif->dev);
if (netif_running(netif->dev))
__netif_up(netif);
rtnl_unlock();
@@ -295,20 +327,13 @@ static void netif_free(netif_t *netif)
void netif_disconnect(netif_t *netif)
{
- switch (netif->status) {
- case CONNECTED:
+ if (netif_carrier_ok(netif->dev)) {
rtnl_lock();
- netif->status = DISCONNECTING;
- wmb();
+ netif_carrier_off(netif->dev);
if (netif_running(netif->dev))
__netif_down(netif);
rtnl_unlock();
netif_put(netif);
- /* fall through */
- case DISCONNECTED:
- netif_free(netif);
- break;
- default:
- BUG();
- }
-}
+ }
+ netif_free(netif);
+}
diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/loopback.c
--- a/drivers/xen/netback/loopback.c Sun Oct 08 12:23:50 2006 -0400
+++ b/drivers/xen/netback/loopback.c Sun Oct 08 12:28:37 2006 -0400
@@ -125,6 +125,11 @@ static struct ethtool_ops network_ethtoo
{
.get_tx_csum = ethtool_op_get_tx_csum,
.set_tx_csum = ethtool_op_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = ethtool_op_set_sg,
+ .get_tso = ethtool_op_get_tso,
+ .set_tso = ethtool_op_set_tso,
+ .get_link = ethtool_op_get_link,
};
/*
@@ -152,6 +157,7 @@ static void loopback_construct(struct ne
dev->features = (NETIF_F_HIGHDMA |
NETIF_F_LLTX |
+ NETIF_F_TSO |
NETIF_F_SG |
NETIF_F_IP_CSUM);
@@ -212,7 +218,7 @@ static int __init make_loopback(int i)
return err;
}
-static void __init clean_loopback(int i)
+static void __exit clean_loopback(int i)
{
struct net_device *dev1, *dev2;
char dev_name[IFNAMSIZ];
diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/netback.c
--- a/drivers/xen/netback/netback.c Sun Oct 08 12:23:50 2006 -0400
+++ b/drivers/xen/netback/netback.c Sun Oct 08 12:28:37 2006 -0400
@@ -40,17 +40,23 @@
/*#define NETBE_DEBUG_INTERRUPT*/
+struct netbk_rx_meta {
+ skb_frag_t frag;
+ int id;
+ int copy:1;
+};
+
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
static void make_tx_response(netif_t *netif,
- u16 id,
+ netif_tx_request_t *txp,
s8 st);
-static int make_rx_response(netif_t *netif,
- u16 id,
- s8 st,
- u16 offset,
- u16 size,
- u16 flags);
+static netif_rx_response_t *make_rx_response(netif_t *netif,
+ u16 id,
+ s8 st,
+ u16 offset,
+ u16 size,
+ u16 flags);
static void net_tx_action(unsigned long unused);
static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
@@ -63,13 +69,11 @@ static struct timer_list net_timer;
#define MAX_PENDING_REQS 256
static struct sk_buff_head rx_queue;
-static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
-static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
-static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
-static unsigned char rx_notify[NR_IRQS];
static unsigned long mmap_vstart;
#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
+
+static void *rx_mmap_area;
#define PKT_PROT_LEN 64
@@ -97,27 +101,30 @@ static spinlock_t net_schedule_list_lock
static spinlock_t net_schedule_list_lock;
#define MAX_MFN_ALLOC 64
-static unsigned long mfn_list[MAX_MFN_ALLOC];
+static xen_pfn_t mfn_list[MAX_MFN_ALLOC];
static unsigned int alloc_index = 0;
-static DEFINE_SPINLOCK(mfn_lock);
-
-static unsigned long alloc_mfn(void)
-{
- unsigned long mfn = 0, flags;
+
+static inline unsigned long alloc_mfn(void)
+{
+ return mfn_list[--alloc_index];
+}
+
+static int check_mfn(int nr)
+{
struct xen_memory_reservation reservation = {
- .nr_extents = MAX_MFN_ALLOC,
.extent_order = 0,
.domid = DOMID_SELF
};
- set_xen_guest_handle(reservation.extent_start, (xen_pfn_t *)mfn_list);
- spin_lock_irqsave(&mfn_lock, flags);
- if ( unlikely(alloc_index == 0) )
- alloc_index = HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation);
- if ( alloc_index != 0 )
- mfn = mfn_list[--alloc_index];
- spin_unlock_irqrestore(&mfn_lock, flags);
- return mfn;
+
+ if (likely(alloc_index >= nr))
+ return 0;
+
+ set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
+ reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
+ alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation,
+ &reservation);
+
+ return alloc_index >= nr ? 0 : -ENOMEM;
}
static inline void maybe_schedule_tx_action(void)
@@ -139,6 +146,123 @@ static inline int is_xen_skb(struct sk_b
return (cp == skbuff_cachep);
}
+/*
+ * We can flip without copying the packet unless:
+ * 1. The data is not allocated from our special cache; or
+ * 2. The main data area is shared; or
+ * 3. One or more fragments are shared; or
+ * 4. There are chained fragments.
+ */
+static inline int is_flippable_skb(struct sk_buff *skb)
+{
+ int frag;
+
+ if (!is_xen_skb(skb) || skb_cloned(skb))
+ return 0;
+
+ for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) {
+ if (page_count(skb_shinfo(skb)->frags[frag].page) > 1)
+ return 0;
+ }
+
+ if (skb_shinfo(skb)->frag_list != NULL)
+ return 0;
+
+ return 1;
+}
+
+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+{
+ struct skb_shared_info *ninfo;
+ struct sk_buff *nskb;
+ unsigned long offset;
+ int ret;
+ int len;
+ int headlen;
+
+ BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+
+ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
+ skb_reserve(nskb, 16);
+ headlen = nskb->end - nskb->data;
+ if (headlen > skb_headlen(skb))
+ headlen = skb_headlen(skb);
+ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+ BUG_ON(ret);
+
+ ninfo = skb_shinfo(nskb);
+ ninfo->gso_size = skb_shinfo(skb)->gso_size;
+ ninfo->gso_type = skb_shinfo(skb)->gso_type;
+
+ offset = headlen;
+ len = skb->len - headlen;
+
+ nskb->len = skb->len;
+ nskb->data_len = len;
+ nskb->truesize += len;
+
+ while (len) {
+ struct page *page;
+ int copy;
+ int zero;
+
+ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+ dump_stack();
+ goto err_free;
+ }
+
+ copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+
+ page = alloc_page(GFP_ATOMIC | zero);
+ if (unlikely(!page))
+ goto err_free;
+
+ ret = skb_copy_bits(skb, offset, page_address(page), copy);
+ BUG_ON(ret);
+
+ ninfo->frags[ninfo->nr_frags].page = page;
+ ninfo->frags[ninfo->nr_frags].page_offset = 0;
+ ninfo->frags[ninfo->nr_frags].size = copy;
+ ninfo->nr_frags++;
+
+ offset += copy;
+ len -= copy;
+ }
+
+ offset = nskb->data - skb->data;
+
+ nskb->h.raw = skb->h.raw + offset;
+ nskb->nh.raw = skb->nh.raw + offset;
+ nskb->mac.raw = skb->mac.raw + offset;
+
+ return nskb;
+
+ err_free:
+ kfree_skb(nskb);
+ err:
+ return NULL;
+}
+
+static inline int netbk_max_required_rx_slots(netif_t *netif)
+{
+ if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+ return 1; /* all in one */
+}
+
+static inline int netbk_queue_full(netif_t *netif)
+{
+ RING_IDX peek = netif->rx_req_cons_peek;
+ RING_IDX needed = netbk_max_required_rx_slots(netif);
+
+ return ((netif->rx.sring->req_prod - peek) < needed) ||
+ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+}
+
int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
netif_t *netif = netdev_priv(dev);
@@ -146,30 +270,24 @@ int netif_be_start_xmit(struct sk_buff *
BUG_ON(skb->dev != dev);
/* Drop the packet if the target domain has no receive buffers. */
- if (!netif->active ||
- (netif->rx_req_cons_peek == netif->rx.sring->req_prod) ||
- ((netif->rx_req_cons_peek - netif->rx.rsp_prod_pvt) ==
- NET_RX_RING_SIZE))
+ if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev)))
goto drop;
- /*
- * We do not copy the packet unless:
- * 1. The data is shared; or
- * 2. The data is not allocated from our special cache.
- * NB. We also couldn't cope with fragmented packets, but we won't get
- * any because we not advertise the NETIF_F_SG feature.
- */
- if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) {
- int hlen = skb->data - skb->head;
- int ret;
- struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
+ if (unlikely(netbk_queue_full(netif))) {
+ /* Not a BUG_ON() -- misbehaving netfront can trigger this. */
+ if (netbk_can_queue(dev))
+ DPRINTK("Queue full but not stopped!\n");
+ goto drop;
+ }
+
+ /* Copy the packet here if it's destined for a flipping
+ interface but isn't flippable (e.g. extra references to
+ data)
+ */
+ if (!netif->copying_receiver && !is_flippable_skb(skb)) {
+ struct sk_buff *nskb = netbk_copy_skb(skb);
if ( unlikely(nskb == NULL) )
goto drop;
- skb_reserve(nskb, hlen);
- __skb_put(nskb, skb->len);
- ret = skb_copy_bits(skb, -hlen, nskb->data - hlen,
- skb->len + hlen);
- BUG_ON(ret);
/* Copy only the header fields we use in this driver. */
nskb->dev = skb->dev;
nskb->ip_summed = skb->ip_summed;
@@ -178,8 +296,17 @@ int netif_be_start_xmit(struct sk_buff *
skb = nskb;
}
- netif->rx_req_cons_peek++;
+ netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
+ !!skb_shinfo(skb)->gso_size;
netif_get(netif);
+
+ if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
+ netif->rx.sring->req_event = netif->rx_req_cons_peek +
+ netbk_max_required_rx_slots(netif);
+ mb(); /* request notification /then/ check & stop the queue */
+ if (netbk_queue_full(netif))
+ netif_stop_queue(dev);
+ }
skb_queue_tail(&rx_queue, skb);
tasklet_schedule(&net_rx_tasklet);
@@ -203,7 +330,7 @@ static void xen_network_done_notify(void
/*
* Add following to poll() function in NAPI driver (Tigon3 is example):
* if ( xen_network_done() )
- * tg3_enable_ints(tp);
+ * tg3_enable_ints(tp);
*/
int xen_network_done(void)
{
@@ -211,148 +338,371 @@ int xen_network_done(void)
}
#endif
-static void net_rx_action(unsigned long unused)
-{
- netif_t *netif = NULL;
- s8 status;
- u16 size, id, irq, flags;
+struct netrx_pending_operations {
+ unsigned trans_prod, trans_cons;
+ unsigned mmu_prod, mmu_cons;
+ unsigned mcl_prod, mcl_cons;
+ unsigned copy_prod, copy_cons;
+ unsigned meta_prod, meta_cons;
+ mmu_update_t *mmu;
+ gnttab_transfer_t *trans;
+ gnttab_copy_t *copy;
multicall_entry_t *mcl;
+ struct netbk_rx_meta *meta;
+};
+
+/* Set up the grant operations for this fragment. If it's a flipping
+ interface, we also set up the unmap request from here. */
+static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
+ int i, struct netrx_pending_operations *npo,
+ struct page *page, unsigned long size,
+ unsigned long offset)
+{
mmu_update_t *mmu;
gnttab_transfer_t *gop;
- unsigned long vdata, old_mfn, new_mfn;
+ gnttab_copy_t *copy_gop;
+ multicall_entry_t *mcl;
+ netif_rx_request_t *req;
+ unsigned long old_mfn, new_mfn;
+
+ old_mfn = virt_to_mfn(page_address(page));
+
+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
+ if (netif->copying_receiver) {
+ /* The fragment needs to be copied rather than
+ flipped. */
+ meta->copy = 1;
+ copy_gop = npo->copy + npo->copy_prod++;
+ copy_gop->source.domid = DOMID_SELF;
+ copy_gop->source.offset = offset;
+ copy_gop->source.u.gmfn = old_mfn;
+ copy_gop->dest.domid = netif->domid;
+ copy_gop->dest.offset = 0;
+ copy_gop->dest.u.ref = req->gref;
+ copy_gop->len = size;
+ copy_gop->flags = GNTCOPY_dest_gref;
+ } else {
+ meta->copy = 0;
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ new_mfn = alloc_mfn();
+
+ /*
+ * Set the new P2M table entry before
+ * reassigning the old data page. Heed the
+ * comment in pgtable-2level.h:pte_page(). :-)
+ */
+ set_phys_to_machine(page_to_pfn(page), new_mfn);
+
+ mcl = npo->mcl + npo->mcl_prod++;
+ MULTI_update_va_mapping(mcl,
+ (unsigned long)page_address(page),
+ pfn_pte_ma(new_mfn, PAGE_KERNEL),
+ 0);
+
+ mmu = npo->mmu + npo->mmu_prod++;
+ mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE;
+ mmu->val = page_to_pfn(page);
+ }
+
+ gop = npo->trans + npo->trans_prod++;
+ gop->mfn = old_mfn;
+ gop->domid = netif->domid;
+ gop->ref = req->gref;
+ }
+ return req->id;
+}
+
+static void netbk_gop_skb(struct sk_buff *skb,
+ struct netrx_pending_operations *npo)
+{
+ netif_t *netif = netdev_priv(skb->dev);
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ int i;
+ int extra;
+ struct netbk_rx_meta *head_meta, *meta;
+
+ head_meta = npo->meta + npo->meta_prod++;
+ head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
+ head_meta->frag.size = skb_shinfo(skb)->gso_size;
+ extra = !!head_meta->frag.size + 1;
+
+ for (i = 0; i < nr_frags; i++) {
+ meta = npo->meta + npo->meta_prod++;
+ meta->frag = skb_shinfo(skb)->frags[i];
+ meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
+ meta->frag.page,
+ meta->frag.size,
+ meta->frag.page_offset);
+ }
+
+ /*
+ * This must occur at the end to ensure that we don't trash
+ * skb_shinfo until we're done.
+ */
+ head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
+ virt_to_page(skb->data),
+ skb_headlen(skb),
+ offset_in_page(skb->data));
+
+ netif->rx.req_cons += nr_frags + extra;
+}
+
+static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+{
+ int i;
+
+ for (i = 0; i < nr_frags; i++)
+ put_page(meta[i].frag.page);
+}
+
+/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
+ used to set up the operations on the top of
+ netrx_pending_operations, which have since been done. Check that
+ they didn't give any errors and advance over them. */
+static int netbk_check_gop(int nr_frags, domid_t domid,
+ struct netrx_pending_operations *npo)
+{
+ multicall_entry_t *mcl;
+ gnttab_transfer_t *gop;
+ gnttab_copy_t *copy_op;
+ int status = NETIF_RSP_OKAY;
+ int i;
+
+ for (i = 0; i <= nr_frags; i++) {
+ if (npo->meta[npo->meta_cons + i].copy) {
+ copy_op = npo->copy + npo->copy_cons++;
+ if (copy_op->status != GNTST_okay) {
+ DPRINTK("Bad status %d from copy to DOM%d.\n",
+ gop->status, domid);
+ status = NETIF_RSP_ERROR;
+ }
+ } else {
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ mcl = npo->mcl + npo->mcl_cons++;
+ /* The update_va_mapping() must not fail. */
+ BUG_ON(mcl->result != 0);
+ }
+
+ gop = npo->trans + npo->trans_cons++;
+ /* Check the reassignment error code. */
+ if (gop->status != 0) {
+ DPRINTK("Bad status %d from grant transfer to
DOM%u\n",
+ gop->status, domid);
+ /*
+ * Page no longer belongs to us unless
+ * GNTST_bad_page, but that should be
+ * a fatal error anyway.
+ */
+ BUG_ON(gop->status == GNTST_bad_page);
+ status = NETIF_RSP_ERROR;
+ }
+ }
+ }
+
+ return status;
+}
+
+static void netbk_add_frag_responses(netif_t *netif, int status,
+ struct netbk_rx_meta *meta, int nr_frags)
+{
+ int i;
+ unsigned long offset;
+
+ for (i = 0; i < nr_frags; i++) {
+ int id = meta[i].id;
+ int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+
+ if (meta[i].copy)
+ offset = 0;
+ else
+ offset = meta[i].frag.page_offset;
+ make_rx_response(netif, id, status, offset,
+ meta[i].frag.size, flags);
+ }
+}
+
+static void net_rx_action(unsigned long unused)
+{
+ netif_t *netif = NULL;
+ s8 status;
+ u16 id, irq, flags;
+ netif_rx_response_t *resp;
+ multicall_entry_t *mcl;
struct sk_buff_head rxq;
struct sk_buff *skb;
- u16 notify_list[NET_RX_RING_SIZE];
int notify_nr = 0;
int ret;
+ int nr_frags;
+ int count;
+ unsigned long offset;
+
+ /*
+ * Putting hundreds of bytes on the stack is considered rude.
+ * Static works because a tasklet can only be on one CPU at any time.
+ */
+ static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
+ static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+ static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
+ static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
+ static unsigned char rx_notify[NR_IRQS];
+ static u16 notify_list[NET_RX_RING_SIZE];
+ static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+
+ struct netrx_pending_operations npo = {
+ mmu: rx_mmu,
+ trans: grant_trans_op,
+ copy: grant_copy_op,
+ mcl: rx_mcl,
+ meta: meta};
skb_queue_head_init(&rxq);
- mcl = rx_mcl;
- mmu = rx_mmu;
- gop = grant_rx_op;
+ count = 0;
while ((skb = skb_dequeue(&rx_queue)) != NULL) {
- netif = netdev_priv(skb->dev);
- vdata = (unsigned long)skb->data;
- old_mfn = virt_to_mfn(vdata);
-
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ *(int *)skb->cb = nr_frags;
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+ check_mfn(nr_frags + 1)) {
/* Memory squeeze? Back off for an arbitrary while. */
- if ((new_mfn = alloc_mfn()) == 0) {
- if ( net_ratelimit() )
- WPRINTK("Memory squeeze in netback "
- "driver.\n");
- mod_timer(&net_timer, jiffies + HZ);
- skb_queue_head(&rx_queue, skb);
- break;
- }
- /*
- * Set the new P2M table entry before reassigning
- * the old data page. Heed the comment in
- * pgtable-2level.h:pte_page(). :-)
- */
- set_phys_to_machine(
- __pa(skb->data) >> PAGE_SHIFT,
- new_mfn);
-
- MULTI_update_va_mapping(mcl, vdata,
- pfn_pte_ma(new_mfn,
- PAGE_KERNEL), 0);
- mcl++;
-
- mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
- MMU_MACHPHYS_UPDATE;
- mmu->val = __pa(vdata) >> PAGE_SHIFT;
- mmu++;
- }
-
- gop->mfn = old_mfn;
- gop->domid = netif->domid;
- gop->ref = RING_GET_REQUEST(
- &netif->rx, netif->rx.req_cons)->gref;
- netif->rx.req_cons++;
- gop++;
+ if ( net_ratelimit() )
+ WPRINTK("Memory squeeze in netback "
+ "driver.\n");
+ mod_timer(&net_timer, jiffies + HZ);
+ skb_queue_head(&rx_queue, skb);
+ break;
+ }
+
+ netbk_gop_skb(skb, &npo);
+
+ count += nr_frags + 1;
__skb_queue_tail(&rxq, skb);
/* Filled the batch queue? */
- if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
+ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
break;
}
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- if (mcl == rx_mcl)
- return;
-
+ if (npo.mcl_prod &&
+ !xen_feature(XENFEAT_auto_translated_physmap)) {
+ mcl = npo.mcl + npo.mcl_prod++;
+
+ BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
- if (mmu - rx_mmu) {
- mcl->op = __HYPERVISOR_mmu_update;
- mcl->args[0] = (unsigned long)rx_mmu;
- mcl->args[1] = mmu - rx_mmu;
- mcl->args[2] = 0;
- mcl->args[3] = DOMID_SELF;
- mcl++;
- }
-
- ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
- BUG_ON(ret != 0);
- }
-
- ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op,
- gop - grant_rx_op);
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)rx_mmu;
+ mcl->args[1] = npo.mmu_prod;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+ }
+
+ if (npo.trans_prod) {
+ mcl = npo.mcl + npo.mcl_prod++;
+ mcl->op = __HYPERVISOR_grant_table_op;
+ mcl->args[0] = GNTTABOP_transfer;
+ mcl->args[1] = (unsigned long)grant_trans_op;
+ mcl->args[2] = npo.trans_prod;
+ }
+
+ if (npo.copy_prod) {
+ mcl = npo.mcl + npo.mcl_prod++;
+ mcl->op = __HYPERVISOR_grant_table_op;
+ mcl->args[0] = GNTTABOP_copy;
+ mcl->args[1] = (unsigned long)grant_copy_op;
+ mcl->args[2] = npo.copy_prod;
+ }
+
+ /* Nothing to do? */
+ if (!npo.mcl_prod)
+ return;
+
+ BUG_ON(npo.copy_prod > NET_RX_RING_SIZE);
+ BUG_ON(npo.mmu_prod > NET_RX_RING_SIZE);
+ BUG_ON(npo.trans_prod > NET_RX_RING_SIZE);
+ BUG_ON(npo.mcl_prod > NET_RX_RING_SIZE+3);
+ BUG_ON(npo.meta_prod > NET_RX_RING_SIZE);
+
+ ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
BUG_ON(ret != 0);
- mcl = rx_mcl;
- gop = grant_rx_op;
while ((skb = __skb_dequeue(&rxq)) != NULL) {
- netif = netdev_priv(skb->dev);
- size = skb->tail - skb->data;
-
- atomic_set(&(skb_shinfo(skb)->dataref), 1);
- skb_shinfo(skb)->nr_frags = 0;
- skb_shinfo(skb)->frag_list = NULL;
-
- netif->stats.tx_bytes += size;
+ nr_frags = *(int *)skb->cb;
+
+ netif = netdev_priv(skb->dev);
+ /* We can't rely on skb_release_data to release the
+ pages used by fragments for us, since it tries to
+ touch the pages in the fraglist. If we're in
+ flipping mode, that doesn't work. In copying mode,
+ we still have access to all of the pages, and so
+ it's safe to let release_data deal with it. */
+ /* (Freeing the fragments is safe since we copy
+ non-linear skbs destined for flipping interfaces) */
+ if (!netif->copying_receiver) {
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->frag_list = NULL;
+ skb_shinfo(skb)->nr_frags = 0;
+ netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
+ }
+
+ netif->stats.tx_bytes += skb->len;
netif->stats.tx_packets++;
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /* The update_va_mapping() must not fail. */
- BUG_ON(mcl->result != 0);
- mcl++;
- }
-
- /* Check the reassignment error code. */
- status = NETIF_RSP_OKAY;
- if (gop->status != 0) {
- DPRINTK("Bad status %d from grant transfer to DOM%u\n",
- gop->status, netif->domid);
- /*
- * Page no longer belongs to us unless GNTST_bad_page,
- * but that should be a fatal error anyway.
- */
- BUG_ON(gop->status == GNTST_bad_page);
- status = NETIF_RSP_ERROR;
- }
- irq = netif->irq;
- id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
- flags = 0;
+ status = netbk_check_gop(nr_frags, netif->domid, &npo);
+
+ id = meta[npo.meta_cons].id;
+ flags = nr_frags ? NETRXF_more_data : 0;
+
if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
flags |= NETRXF_csum_blank | NETRXF_data_validated;
else if (skb->proto_data_valid) /* remote but checksummed? */
flags |= NETRXF_data_validated;
- if (make_rx_response(netif, id, status,
- (unsigned long)skb->data & ~PAGE_MASK,
- size, flags) &&
- (rx_notify[irq] == 0)) {
+
+ if (meta[npo.meta_cons].copy)
+ offset = 0;
+ else
+ offset = offset_in_page(skb->data);
+ resp = make_rx_response(netif, id, status, offset,
+ skb_headlen(skb), flags);
+
+ if (meta[npo.meta_cons].frag.size) {
+ struct netif_extra_info *gso =
+ (struct netif_extra_info *)
+ RING_GET_RESPONSE(&netif->rx,
+ netif->rx.rsp_prod_pvt++);
+
+ resp->flags |= NETRXF_extra_info;
+
+ gso->u.gso.size = meta[npo.meta_cons].frag.size;
+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ gso->u.gso.pad = 0;
+ gso->u.gso.features = 0;
+
+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+ gso->flags = 0;
+ }
+
+ netbk_add_frag_responses(netif, status,
+ meta + npo.meta_cons + 1,
+ nr_frags);
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+ irq = netif->irq;
+ if (ret && !rx_notify[irq]) {
rx_notify[irq] = 1;
notify_list[notify_nr++] = irq;
}
+ if (netif_queue_stopped(netif->dev) &&
+ !netbk_queue_full(netif))
+ netif_wake_queue(netif->dev);
+
netif_put(netif);
dev_kfree_skb(skb);
- gop++;
+ npo.meta_cons += nr_frags + 1;
}
while (notify_nr != 0) {
@@ -403,7 +753,9 @@ static void add_to_net_schedule_list_tai
return;
spin_lock_irq(&net_schedule_list_lock);
- if (!__on_net_schedule_list(netif) && netif->active) {
+ if (!__on_net_schedule_list(netif) &&
+ likely(netif_running(netif->dev) &&
+ netif_carrier_ok(netif->dev))) {
list_add_tail(&netif->list, &net_schedule_list);
netif_get(netif);
}
@@ -481,7 +833,7 @@ inline static void net_tx_action_dealloc
netif = pending_tx_info[pending_idx].netif;
- make_tx_response(netif, pending_tx_info[pending_idx].req.id,
+ make_tx_response(netif, &pending_tx_info[pending_idx].req,
NETIF_RSP_OKAY);
pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
@@ -490,14 +842,16 @@ inline static void net_tx_action_dealloc
}
}
-static void netbk_tx_err(netif_t *netif, RING_IDX end)
+static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
{
RING_IDX cons = netif->tx.req_cons;
do {
- netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons);
- make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
- } while (++cons < end);
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
+ if (cons >= end)
+ break;
+ txp = RING_GET_REQUEST(&netif->tx, cons++);
+ } while (1);
netif->tx.req_cons = cons;
netif_schedule_work(netif);
netif_put(netif);
@@ -508,7 +862,7 @@ static int netbk_count_requests(netif_t
{
netif_tx_request_t *first = txp;
RING_IDX cons = netif->tx.req_cons;
- int frags = 1;
+ int frags = 0;
while (txp->flags & NETTXF_more_data) {
if (frags >= work_to_do) {
@@ -543,7 +897,7 @@ static gnttab_map_grant_ref_t *netbk_get
skb_frag_t *frags = shinfo->frags;
netif_tx_request_t *txp;
unsigned long pending_idx = *((u16 *)skb->data);
- RING_IDX cons = netif->tx.req_cons + 1;
+ RING_IDX cons = netif->tx.req_cons;
int i, start;
/* Skip first skb fragment if it is on same page as header fragment. */
@@ -581,7 +935,7 @@ static int netbk_tx_check_mop(struct sk_
err = mop->status;
if (unlikely(err)) {
txp = &pending_tx_info[pending_idx].req;
- make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
netif_put(netif);
} else {
@@ -614,7 +968,7 @@ static int netbk_tx_check_mop(struct sk_
/* Error on this fragment: respond to client with an error. */
txp = &pending_tx_info[pending_idx].req;
- make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
netif_put(netif);
@@ -661,6 +1015,57 @@ static void netbk_fill_frags(struct sk_b
}
}
+int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
+ int work_to_do)
+{
+ struct netif_extra_info *extra;
+ RING_IDX cons = netif->tx.req_cons;
+
+ do {
+ if (unlikely(work_to_do-- <= 0)) {
+ DPRINTK("Missing extra info\n");
+ return -EBADR;
+ }
+
+ extra = (struct netif_extra_info *)
+ RING_GET_REQUEST(&netif->tx, cons);
+ if (unlikely(!extra->type ||
+ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ netif->tx.req_cons = ++cons;
+ DPRINTK("Invalid extra type: %d\n", extra->type);
+ return -EINVAL;
+ }
+
+ memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
+ netif->tx.req_cons = ++cons;
+ } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+ return work_to_do;
+}
+
+static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
+{
+ if (!gso->u.gso.size) {
+ DPRINTK("GSO size must not be zero.\n");
+ return -EINVAL;
+ }
+
+ /* Currently only TCPv4 S.O. is supported. */
+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+ DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
+ return -EINVAL;
+ }
+
+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ /* Header must be checked, and gso_segs computed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+
+ return 0;
+}
+
/* Called after netfront has transmitted */
static void net_tx_action(unsigned long unused)
{
@@ -668,6 +1073,7 @@ static void net_tx_action(unsigned long
struct sk_buff *skb;
netif_t *netif;
netif_tx_request_t txreq;
+ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
u16 pending_idx;
RING_IDX i;
gnttab_map_grant_ref_t *mop;
@@ -726,23 +1132,37 @@ static void net_tx_action(unsigned long
}
netif->remaining_credit -= txreq.size;
+ work_to_do--;
+ netif->tx.req_cons = ++i;
+
+ memset(extras, 0, sizeof(extras));
+ if (txreq.flags & NETTXF_extra_info) {
+ work_to_do = netbk_get_extras(netif, extras,
+ work_to_do);
+ i = netif->tx.req_cons;
+ if (unlikely(work_to_do < 0)) {
+ netbk_tx_err(netif, &txreq, i);
+ continue;
+ }
+ }
+
ret = netbk_count_requests(netif, &txreq, work_to_do);
if (unlikely(ret < 0)) {
- netbk_tx_err(netif, i - ret);
+ netbk_tx_err(netif, &txreq, i - ret);
continue;
}
i += ret;
- if (unlikely(ret > MAX_SKB_FRAGS + 1)) {
+ if (unlikely(ret > MAX_SKB_FRAGS)) {
DPRINTK("Too many frags\n");
- netbk_tx_err(netif, i);
+ netbk_tx_err(netif, &txreq, i);
continue;
}
if (unlikely(txreq.size < ETH_HLEN)) {
DPRINTK("Bad packet size: %d\n", txreq.size);
- netbk_tx_err(netif, i);
- continue;
+ netbk_tx_err(netif, &txreq, i);
+ continue;
}
/* No crossing a page as the payload mustn't fragment. */
@@ -750,25 +1170,36 @@ static void net_tx_action(unsigned long
DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
txreq.offset, txreq.size,
(txreq.offset &~PAGE_MASK) + txreq.size);
- netbk_tx_err(netif, i);
+ netbk_tx_err(netif, &txreq, i);
continue;
}
pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
data_len = (txreq.size > PKT_PROT_LEN &&
- ret < MAX_SKB_FRAGS + 1) ?
+ ret < MAX_SKB_FRAGS) ?
PKT_PROT_LEN : txreq.size;
skb = alloc_skb(data_len+16, GFP_ATOMIC);
if (unlikely(skb == NULL)) {
DPRINTK("Can't allocate a skb in start_xmit.\n");
- netbk_tx_err(netif, i);
+ netbk_tx_err(netif, &txreq, i);
break;
}
/* Packets passed to netif_rx() must have some headroom. */
skb_reserve(skb, 16);
+
+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+ struct netif_extra_info *gso;
+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+ if (netbk_set_skb_gso(skb, gso)) {
+ kfree_skb(skb);
+ netbk_tx_err(netif, &txreq, i);
+ continue;
+ }
+ }
gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
GNTMAP_host_map | GNTMAP_readonly,
@@ -782,11 +1213,14 @@ static void net_tx_action(unsigned long
__skb_put(skb, data_len);
- skb_shinfo(skb)->nr_frags = ret - 1;
+ skb_shinfo(skb)->nr_frags = ret;
if (data_len < txreq.size) {
skb_shinfo(skb)->nr_frags++;
skb_shinfo(skb)->frags[0].page =
(void *)(unsigned long)pending_idx;
+ } else {
+ /* Discriminate from any valid pending_idx value. */
+ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
}
__skb_queue_tail(&tx_queue, skb);
@@ -884,21 +1318,32 @@ static void netif_page_release(struct pa
u16 pending_idx = page - virt_to_page(mmap_vstart);
/* Ready for next use. */
- init_page_count(page);
+ set_page_count(page, 1);
netif_idx_release(pending_idx);
}
+static void netif_rx_page_release(struct page *page)
+{
+ /* Ready for next use. */
+ set_page_count(page, 1);
+}
+
irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
{
netif_t *netif = dev_id;
+
add_to_net_schedule_list_tail(netif);
maybe_schedule_tx_action();
+
+ if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif))
+ netif_wake_queue(netif->dev);
+
return IRQ_HANDLED;
}
static void make_tx_response(netif_t *netif,
- u16 id,
+ netif_tx_request_t *txp,
s8 st)
{
RING_IDX i = netif->tx.rsp_prod_pvt;
@@ -906,8 +1351,11 @@ static void make_tx_response(netif_t *ne
int notify;
resp = RING_GET_RESPONSE(&netif->tx, i);
- resp->id = id;
+ resp->id = txp->id;
resp->status = st;
+
+ if (txp->flags & NETTXF_extra_info)
+ RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
netif->tx.rsp_prod_pvt = ++i;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
@@ -924,16 +1372,15 @@ static void make_tx_response(netif_t *ne
#endif
}
-static int make_rx_response(netif_t *netif,
- u16 id,
- s8 st,
- u16 offset,
- u16 size,
- u16 flags)
+static netif_rx_response_t *make_rx_response(netif_t *netif,
+ u16 id,
+ s8 st,
+ u16 offset,
+ u16 size,
+ u16 flags)
{
RING_IDX i = netif->rx.rsp_prod_pvt;
netif_rx_response_t *resp;
- int notify;
resp = RING_GET_RESPONSE(&netif->rx, i);
resp->offset = offset;
@@ -944,9 +1391,8 @@ static int make_rx_response(netif_t *net
resp->status = (s16)st;
netif->rx.rsp_prod_pvt = ++i;
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
-
- return notify;
+
+ return resp;
}
#ifdef NETBE_DEBUG_INTERRUPT
@@ -1002,13 +1448,25 @@ static int __init netback_init(void)
net_timer.function = net_alarm;
page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
- BUG_ON(page == NULL);
+ if (page == NULL)
+ return -ENOMEM;
+
mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
for (i = 0; i < MAX_PENDING_REQS; i++) {
page = virt_to_page(MMAP_VADDR(i));
- init_page_count(page);
+ set_page_count(page, 1);
SetPageForeign(page, netif_page_release);
+ }
+
+ page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE);
+ BUG_ON(page == NULL);
+ rx_mmap_area = pfn_to_kaddr(page_to_pfn(page));
+
+ for (i = 0; i < NET_RX_RING_SIZE; i++) {
+ page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE));
+ set_page_count(page, 1);
+ SetPageForeign(page, netif_rx_page_release);
}
pending_cons = 0;
diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/xenbus.c
--- a/drivers/xen/netback/xenbus.c Sun Oct 08 12:23:50 2006 -0400
+++ b/drivers/xen/netback/xenbus.c Sun Oct 08 12:28:37 2006 -0400
@@ -101,6 +101,19 @@ static int netback_probe(struct xenbus_d
goto abort_transaction;
}
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
+ "%d", 1);
+ if (err) {
+ message = "writing feature-gso-tcpv4";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy",
"%d", 1);
+ if (err) {
+ message = "writing feature-copying";
+ goto abort_transaction;
+ }
+
err = xenbus_transaction_end(xbt, 0);
} while (err == -EAGAIN);
@@ -215,16 +228,31 @@ static void frontend_changed(struct xenb
{
struct backend_info *be = dev->dev.driver_data;
- DPRINTK("");
+ DPRINTK("%s", xenbus_strstate(frontend_state));
be->frontend_state = frontend_state;
switch (frontend_state) {
case XenbusStateInitialising:
+ if (dev->state == XenbusStateClosed) {
+ printk("%s: %s: prepare for reconnect\n",
+ __FUNCTION__, dev->nodename);
+ if (be->netif) {
+ netif_disconnect(be->netif);
+ be->netif = NULL;
+ }
+ xenbus_switch_state(dev, XenbusStateInitWait);
+ }
+ break;
+
case XenbusStateInitialised:
break;
case XenbusStateConnected:
+ if (!be->netif) {
+ /* reconnect: setup be->netif */
+ backend_changed(&be->backend_watch, NULL, 0);
+ }
maybe_connect(be);
break;
@@ -233,13 +261,18 @@ static void frontend_changed(struct xenb
break;
case XenbusStateClosed:
+ xenbus_switch_state(dev, XenbusStateClosed);
+#ifdef JX
+ if (xenbus_dev_is_online(dev))
+ break;
+#endif
+ /* fall through if not online */
+ case XenbusStateUnknown:
if (be->netif != NULL)
kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
device_unregister(&dev->dev);
break;
- case XenbusStateUnknown:
- case XenbusStateInitWait:
default:
xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
frontend_state);
@@ -342,8 +375,9 @@ static int connect_rings(struct backend_
{
struct xenbus_device *dev = be->dev;
unsigned long tx_ring_ref, rx_ring_ref;
- unsigned int evtchn;
+ unsigned int evtchn, rx_copy;
int err;
+ int val;
DPRINTK("");
@@ -356,6 +390,51 @@ static int connect_rings(struct backend_
"reading %s/ring-ref and event-channel",
dev->otherend);
return err;
+ }
+
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
+ &rx_copy);
+ if (err == -ENOENT) {
+ err = 0;
+ rx_copy = 0;
+ }
+ if (err < 0) {
+ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
+ dev->otherend);
+ return err;
+ }
+ be->netif->copying_receiver = !!rx_copy;
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d",
+ &val) < 0)
+ val = 0;
+ if (val)
+ be->netif->can_queue = 1;
+ else
+ /* Must be non-zero for pfifo_fast to work. */
+ be->netif->dev->tx_queue_len = 1;
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
+ val = 0;
+ if (val) {
+ be->netif->features |= NETIF_F_SG;
+ be->netif->dev->features |= NETIF_F_SG;
+ }
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
+ &val) < 0)
+ val = 0;
+ if (val) {
+ be->netif->features |= NETIF_F_TSO;
+ be->netif->dev->features |= NETIF_F_TSO;
+ }
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+ "%d", &val) < 0)
+ val = 0;
+ if (val) {
+ be->netif->features &= ~NETIF_F_IP_CSUM;
+ be->netif->dev->features &= ~NETIF_F_IP_CSUM;
}
/* Map the shared frame, irq etc. */
diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netfront/netfront.c
--- a/drivers/xen/netfront/netfront.c Sun Oct 08 12:23:50 2006 -0400
+++ b/drivers/xen/netfront/netfront.c Sun Oct 08 12:28:37 2006 -0400
@@ -46,11 +46,11 @@
#include <linux/ethtool.h>
#include <linux/in.h>
#include <linux/if_ether.h>
+#include <linux/io.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/arp.h>
#include <net/route.h>
-#include <asm/io.h>
#include <asm/uaccess.h>
#include <xen/evtchn.h>
#include <xen/xenbus.h>
@@ -58,21 +58,31 @@
#include <xen/interface/memory.h>
#include <xen/balloon.h>
#include <asm/page.h>
+#include <asm/maddr.h>
#include <asm/uaccess.h>
#include <xen/interface/grant_table.h>
#include <xen/gnttab.h>
+#define RX_COPY_THRESHOLD 256
+
+/* If we don't have GSO, fake things up so that we never try to use it. */
+#ifndef NETIF_F_GSO
+#define netif_needs_gso(dev, skb) 0
+#define dev_disable_gso_features(dev) ((void)0)
+#else
+#define HAVE_GSO 1
+static inline void dev_disable_gso_features(struct net_device *dev)
+{
+ /* Turn off all GSO bits except ROBUST. */
+ dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
+ dev->features |= NETIF_F_GSO_ROBUST;
+}
+#endif
+
#define GRANT_INVALID_REF 0
#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
-
-static inline void init_skb_shinfo(struct sk_buff *skb)
-{
- atomic_set(&(skb_shinfo(skb)->dataref), 1);
- skb_shinfo(skb)->nr_frags = 0;
- skb_shinfo(skb)->frag_list = NULL;
-}
struct netfront_info {
struct list_head list;
@@ -88,6 +98,7 @@ struct netfront_info {
unsigned int handle;
unsigned int evtchn, irq;
+ unsigned int copying_receiver;
/* Receive-ring batched refills. */
#define RX_MIN_TARGET 8
@@ -99,30 +110,35 @@ struct netfront_info {
struct timer_list rx_refill_timer;
/*
- * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
- * array is an index into a chain of free entries.
+ * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs
+ * is an index into a chain of free entries.
*/
struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1];
- struct sk_buff *rx_skbs[NET_RX_RING_SIZE+1];
+ struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
grant_ref_t gref_tx_head;
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
grant_ref_t gref_rx_head;
- grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1];
+ grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
struct xenbus_device *xbdev;
int tx_ring_ref;
int rx_ring_ref;
u8 mac[ETH_ALEN];
- unsigned long rx_pfn_array[NET_RX_RING_SIZE];
+ xen_pfn_t rx_pfn_array[NET_RX_RING_SIZE];
struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
struct mmu_update rx_mmu[NET_RX_RING_SIZE];
};
+struct netfront_rx_info {
+ struct netif_rx_response rx;
+ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+};
+
/*
- * Access macros for acquiring freeing slots in {tx,rx}_skbs[].
+ * Access macros for acquiring freeing slots in tx_skbs[].
*/
static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
@@ -136,6 +152,29 @@ static inline unsigned short get_id_from
unsigned int id = (unsigned int)(unsigned long)list[0];
list[0] = list[id];
return id;
+}
+
+static inline int xennet_rxidx(RING_IDX idx)
+{
+ return idx & (NET_RX_RING_SIZE - 1);
+}
+
+static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
+ RING_IDX ri)
+{
+ int i = xennet_rxidx(ri);
+ struct sk_buff *skb = np->rx_skbs[i];
+ np->rx_skbs[i] = NULL;
+ return skb;
+}
+
+static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
+ RING_IDX ri)
+{
+ int i = xennet_rxidx(ri);
+ grant_ref_t ref = np->grant_rx_ref[i];
+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
+ return ref;
}
#define DPRINTK(fmt, args...) \
@@ -148,12 +187,13 @@ static inline unsigned short get_id_from
static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
static int setup_device(struct xenbus_device *, struct netfront_info *);
-static struct net_device *create_netdev(int, struct xenbus_device *);
+static struct net_device *create_netdev(int, int, struct xenbus_device *);
static void netfront_closing(struct xenbus_device *);
static void end_access(int, void *);
static void netif_disconnect_backend(struct netfront_info *);
+static int open_netdev(struct netfront_info *);
static void close_netdev(struct netfront_info *);
static void netif_free(struct netfront_info *);
@@ -190,6 +230,7 @@ static int __devinit netfront_probe(stru
struct net_device *netdev;
struct netfront_info *info;
unsigned int handle;
+ unsigned feature_rx_copy;
err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
if (err != 1) {
@@ -197,7 +238,24 @@ static int __devinit netfront_probe(stru
return err;
}
- netdev = create_netdev(handle, dev);
+#ifdef CONFIG_PPC_XEN
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u",
+ &feature_rx_copy);
+ BUG_ON(err != 1);
+ if (err != 1) {
+ xenbus_dev_fatal(dev, err, "reading feature-rx-copy");
+ return err;
+ }
+ BUG_ON(!feature_rx_copy);
+ if (!feature_rx_copy) {
+ xenbus_dev_fatal(dev, 0, "need a copy-capable backend");
+ return -EINVAL;
+ }
+#else
+ feature_rx_copy = 0;
+#endif
+
+ netdev = create_netdev(handle, feature_rx_copy, dev);
if (IS_ERR(netdev)) {
err = PTR_ERR(netdev);
xenbus_dev_fatal(dev, err, "creating netdev");
@@ -208,15 +266,22 @@ static int __devinit netfront_probe(stru
dev->dev.driver_data = info;
err = talk_to_backend(dev, info);
- if (err) {
- xennet_sysfs_delif(info->netdev);
- unregister_netdev(netdev);
- free_netdev(netdev);
- dev->dev.driver_data = NULL;
- return err;
- }
+ if (err)
+ goto fail_backend;
+
+ err = open_netdev(info);
+ if (err)
+ goto fail_open;
return 0;
+
+ fail_open:
+ xennet_sysfs_delif(info->netdev);
+ unregister_netdev(netdev);
+ fail_backend:
+ free_netdev(netdev);
+ dev->dev.driver_data = NULL;
+ return err;
}
@@ -303,6 +368,33 @@ again:
goto abort_transaction;
}
+ err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
+ info->copying_receiver);
+ if (err) {
+ message = "writing request-rx-copy";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
+ if (err) {
+ message = "writing feature-rx-notify";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+ if (err) {
+ message = "writing feature-sg";
+ goto abort_transaction;
+ }
+
+#ifdef HAVE_GSO
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
+ if (err) {
+ message = "writing feature-gso-tcpv4";
+ goto abort_transaction;
+ }
+#endif
+
err = xenbus_transaction_end(xbt, 0);
if (err) {
if (err == -EAGAIN)
@@ -374,7 +466,8 @@ static int setup_device(struct xenbus_de
memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
err = bind_evtchn_to_irqhandler(info->evtchn, netif_int,
- SA_SAMPLE_RANDOM, netdev->name, netdev);
+ SA_SAMPLE_RANDOM, netdev->name,
+ netdev);
if (err < 0)
goto fail;
info->irq = err;
@@ -395,7 +488,7 @@ static void backend_changed(struct xenbu
struct netfront_info *np = dev->dev.driver_data;
struct net_device *netdev = np->netdev;
- DPRINTK("\n");
+ DPRINTK("%s\n", xenbus_strstate(backend_state));
switch (backend_state) {
case XenbusStateInitialising:
@@ -453,8 +546,14 @@ static int network_open(struct net_devic
memset(&np->stats, 0, sizeof(np->stats));
- network_alloc_rx_buffers(dev);
- np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
+ spin_lock(&np->rx_lock);
+ if (netif_carrier_ok(dev)) {
+ network_alloc_rx_buffers(dev);
+ np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+ netif_rx_schedule(dev);
+ }
+ spin_unlock(&np->rx_lock);
netif_start_queue(dev);
@@ -463,7 +562,7 @@ static int network_open(struct net_devic
static inline int netfront_tx_slot_available(struct netfront_info *np)
{
- return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1;
+ return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 2;
}
static inline void network_maybe_wake_tx(struct net_device *dev)
@@ -483,15 +582,20 @@ static void network_tx_buf_gc(struct net
struct netfront_info *np = netdev_priv(dev);
struct sk_buff *skb;
- if (unlikely(!netif_carrier_ok(dev)))
- return;
+ BUG_ON(!netif_carrier_ok(dev));
do {
prod = np->tx.sring->rsp_prod;
rmb(); /* Ensure we see responses up to 'rp'. */
for (cons = np->tx.rsp_cons; cons != prod; cons++) {
- id = RING_GET_RESPONSE(&np->tx, cons)->id;
+ struct netif_tx_response *txrsp;
+
+ txrsp = RING_GET_RESPONSE(&np->tx, cons);
+ if (txrsp->status == NETIF_RSP_NULL)
+ continue;
+
+ id = txrsp->id;
skb = np->tx_skbs[id];
if (unlikely(gnttab_query_foreign_access(
np->grant_tx_ref[id]) != 0)) {
@@ -540,10 +644,15 @@ static void network_alloc_rx_buffers(str
unsigned short id;
struct netfront_info *np = netdev_priv(dev);
struct sk_buff *skb;
- int i, batch_target;
+ struct page *page;
+ int i, batch_target, notify;
RING_IDX req_prod = np->rx.req_prod_pvt;
struct xen_memory_reservation reservation;
grant_ref_t ref;
+ unsigned long pfn;
+ void *vaddr;
+ int nr_flips;
+ netif_rx_request_t *req;
if (unlikely(!netif_carrier_ok(dev)))
return;
@@ -557,28 +666,41 @@ static void network_alloc_rx_buffers(str
batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
/*
- * Subtract dev_alloc_skb headroom (16 bytes) and shared info
- * tailroom then round down to SKB_DATA_ALIGN boundary.
+ * Allocate an skb and a page. Do not use __dev_alloc_skb as
+ * that will allocate page-sized buffers which is not
+ * necessary here.
+ * 16 bytes added as necessary headroom for netif_receive_skb.
*/
- skb = __dev_alloc_skb(
- ((PAGE_SIZE - sizeof(struct skb_shared_info)) &
- (-SKB_DATA_ALIGN(1))) - 16,
- GFP_ATOMIC|__GFP_NOWARN);
- if (skb == NULL) {
+ skb = alloc_skb(RX_COPY_THRESHOLD + 16,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ goto no_skb;
+
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (!page) {
+ kfree_skb(skb);
+no_skb:
/* Any skbuffs queued for refill? Force them out. */
if (i != 0)
goto refill;
/* Could not allocate any skbuffs. Try again later. */
mod_timer(&np->rx_refill_timer,
jiffies + (HZ/10));
- return;
+ break;
}
+
+ skb_reserve(skb, 16); /* mimic dev_alloc_skb() */
+ skb_shinfo(skb)->frags[0].page = page;
+ skb_shinfo(skb)->nr_frags = 1;
__skb_queue_tail(&np->rx_batch, skb);
}
/* Is the batch large enough to be worthwhile? */
- if (i < (np->rx_target/2))
+ if (i < (np->rx_target/2)) {
+ if (req_prod > np->rx.sring->req_prod)
+ goto push;
return;
+ }
/* Adjust our fill target if we risked running out of buffers. */
if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
@@ -586,70 +708,93 @@ static void network_alloc_rx_buffers(str
np->rx_target = np->rx_max_target;
refill:
- for (i = 0; ; i++) {
+ for (nr_flips = i = 0; ; i++) {
if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
break;
skb->dev = dev;
- id = get_id_from_freelist(np->rx_skbs);
-
+ id = xennet_rxidx(req_prod + i);
+
+ BUG_ON(np->rx_skbs[id]);
np->rx_skbs[id] = skb;
- RING_GET_REQUEST(&np->rx, req_prod + i)->id = id;
ref = gnttab_claim_grant_reference(&np->gref_rx_head);
BUG_ON((signed short)ref < 0);
np->grant_rx_ref[id] = ref;
- gnttab_grant_foreign_transfer_ref(ref,
- np->xbdev->otherend_id,
- __pa(skb->head)>>PAGE_SHIFT);
- RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
- np->rx_pfn_array[i] = virt_to_mfn(skb->head);
+
+ pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
+ vaddr = page_address(skb_shinfo(skb)->frags[0].page);
+
+ req = RING_GET_REQUEST(&np->rx, req_prod + i);
+ if (!np->copying_receiver) {
+ gnttab_grant_foreign_transfer_ref(ref,
+
np->xbdev->otherend_id,
+ pfn);
+ np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Remove this page before passing
+ * back to Xen. */
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ MULTI_update_va_mapping(np->rx_mcl+i,
+ (unsigned long)vaddr,
+ __pte(0), 0);
+ }
+ nr_flips++;
+ } else {
+ gnttab_grant_foreign_access_ref(ref,
+ np->xbdev->otherend_id,
+ pfn,
+ 0);
+ }
+
+ req->id = id;
+ req->gref = ref;
+ }
+
+ if ( nr_flips != 0 ) {
+ /* Tell the ballon driver what is going on. */
+ balloon_update_driver_allowance(i);
+
+ set_xen_guest_handle(reservation.extent_start,
+ np->rx_pfn_array);
+ reservation.nr_extents = nr_flips;
+ reservation.extent_order = 0;
+ reservation.address_bits = 0;
+ reservation.domid = DOMID_SELF;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /* Remove this page before passing back to Xen. */
- set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT,
- INVALID_P2M_ENTRY);
- MULTI_update_va_mapping(np->rx_mcl+i,
- (unsigned long)skb->head,
- __pte(0), 0);
+ /* After all PTEs have been zapped, flush the TLB. */
+ np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+ UVMF_TLB_FLUSH|UVMF_ALL;
+
+ /* Give away a batch of pages. */
+ np->rx_mcl[i].op = __HYPERVISOR_memory_op;
+ np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+ np->rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+ /* Zap PTEs and give away pages in one big
+ * multicall. */
+ (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
+
+ /* Check return status of HYPERVISOR_memory_op(). */
+ if (unlikely(np->rx_mcl[i].result != i))
+ panic("Unable to reduce memory reservation\n");
+ } else {
+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation) != i)
+ panic("Unable to reduce memory reservation\n");
}
- }
-
- /* Tell the ballon driver what is going on. */
- balloon_update_driver_allowance(i);
-
- set_xen_guest_handle(reservation.extent_start,
- (xen_pfn_t *)np->rx_pfn_array);
- reservation.nr_extents = i;
- reservation.extent_order = 0;
- reservation.address_bits = 0;
- reservation.domid = DOMID_SELF;
-
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /* After all PTEs have been zapped, flush the TLB. */
- np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
- UVMF_TLB_FLUSH|UVMF_ALL;
-
- /* Give away a batch of pages. */
- np->rx_mcl[i].op = __HYPERVISOR_memory_op;
- np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
- np->rx_mcl[i].args[1] = (unsigned long)&reservation;
-
- /* Zap PTEs and give away pages in one big multicall. */
- (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
-
- /* Check return status of HYPERVISOR_memory_op(). */
- if (unlikely(np->rx_mcl[i].result != i))
- panic("Unable to reduce memory reservation\n");
- } else
- if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation) != i)
- panic("Unable to reduce memory reservation\n");
+ } else {
+ wmb();
+ }
/* Above is a suitable barrier to ensure backend will see requests. */
np->rx.req_prod_pvt = req_prod + i;
- RING_PUSH_REQUESTS(&np->rx);
+ push:
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
+ if (notify)
+ notify_remote_via_irq(np->irq);
}
static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
@@ -720,6 +865,7 @@ static int network_start_xmit(struct sk_
unsigned short id;
struct netfront_info *np = netdev_priv(dev);
struct netif_tx_request *tx;
+ struct netif_extra_info *extra;
char *data = skb->data;
RING_IDX i;
grant_ref_t ref;
@@ -740,7 +886,8 @@ static int network_start_xmit(struct sk_
spin_lock_irq(&np->tx_lock);
if (unlikely(!netif_carrier_ok(dev) ||
- (frags > 1 && !xennet_can_sg(dev)))) {
+ (frags > 1 && !xennet_can_sg(dev)) ||
+ netif_needs_gso(dev, skb))) {
spin_unlock_irq(&np->tx_lock);
goto drop;
}
@@ -763,10 +910,35 @@ static int network_start_xmit(struct sk_
tx->size = len;
tx->flags = 0;
+ extra = NULL;
+
if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+#ifdef CONFIG_XEN
if (skb->proto_data_valid) /* remote but checksummed? */
tx->flags |= NETTXF_data_validated;
+#endif
+
+#ifdef HAVE_GSO
+ if (skb_shinfo(skb)->gso_size) {
+ struct netif_extra_info *gso = (struct netif_extra_info *)
+ RING_GET_REQUEST(&np->tx, ++i);
+
+ if (extra)
+ extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+ else
+ tx->flags |= NETTXF_extra_info;
+
+ gso->u.gso.size = skb_shinfo(skb)->gso_size;
+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ gso->u.gso.pad = 0;
+ gso->u.gso.features = 0;
+
+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+ gso->flags = 0;
+ extra = gso;
+ }
+#endif
np->tx.req_prod_pvt = i + 1;
@@ -802,191 +974,411 @@ static irqreturn_t netif_int(int irq, vo
unsigned long flags;
spin_lock_irqsave(&np->tx_lock, flags);
- network_tx_buf_gc(dev);
+
+ if (likely(netif_carrier_ok(dev))) {
+ network_tx_buf_gc(dev);
+ /* Under tx_lock: protects access to rx shared-ring indexes. */
+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+ netif_rx_schedule(dev);
+ }
+
spin_unlock_irqrestore(&np->tx_lock, flags);
- if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) &&
- likely(netif_running(dev)))
- netif_rx_schedule(dev);
-
return IRQ_HANDLED;
}
-
-static int netif_poll(struct net_device *dev, int *pbudget)
-{
- struct netfront_info *np = netdev_priv(dev);
- struct sk_buff *skb, *nskb;
- struct netif_rx_response *rx;
- RING_IDX i, rp;
- struct mmu_update *mmu = np->rx_mmu;
- struct multicall_entry *mcl = np->rx_mcl;
- int work_done, budget, more_to_do = 1;
- struct sk_buff_head rxq;
- unsigned long flags;
- unsigned long mfn;
- grant_ref_t ref;
-
- spin_lock(&np->rx_lock);
-
- if (unlikely(!netif_carrier_ok(dev))) {
- spin_unlock(&np->rx_lock);
- return 0;
- }
-
- skb_queue_head_init(&rxq);
-
- if ((budget = *pbudget) > dev->quota)
- budget = dev->quota;
- rp = np->rx.sring->rsp_prod;
- rmb(); /* Ensure we see queued responses up to 'rp'. */
-
- for (i = np->rx.rsp_cons, work_done = 0;
- (i != rp) && (work_done < budget);
- i++, work_done++) {
- rx = RING_GET_RESPONSE(&np->rx, i);
+static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
+ grant_ref_t ref)
+{
+ int new = xennet_rxidx(np->rx.req_prod_pvt);
+
+ BUG_ON(np->rx_skbs[new]);
+ np->rx_skbs[new] = skb;
+ np->grant_rx_ref[new] = ref;
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
+ np->rx.req_prod_pvt++;
+}
+
+int xennet_get_extras(struct netfront_info *np,
+ struct netif_extra_info *extras, RING_IDX rp)
+
+{
+ struct netif_extra_info *extra;
+ RING_IDX cons = np->rx.rsp_cons;
+ int err = 0;
+
+ do {
+ struct sk_buff *skb;
+ grant_ref_t ref;
+
+ if (unlikely(cons + 1 == rp)) {
+ if (net_ratelimit())
+ WPRINTK("Missing extra info\n");
+ err = -EBADR;
+ break;
+ }
+
+ extra = (struct netif_extra_info *)
+ RING_GET_RESPONSE(&np->rx, ++cons);
+
+ if (unlikely(!extra->type ||
+ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ if (net_ratelimit())
+ WPRINTK("Invalid extra type: %d\n",
+ extra->type);
+ err = -EINVAL;
+ } else {
+ memcpy(&extras[extra->type - 1], extra,
+ sizeof(*extra));
+ }
+
+ skb = xennet_get_rx_skb(np, cons);
+ ref = xennet_get_rx_ref(np, cons);
+ xennet_move_rx_slot(np, skb, ref);
+ } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+ np->rx.rsp_cons = cons;
+ return err;
+}
+
+static int xennet_get_responses(struct netfront_info *np,
+ struct netfront_rx_info *rinfo, RING_IDX rp,
+ struct sk_buff_head *list,
+ int *pages_flipped_p)
+{
+ int pages_flipped = *pages_flipped_p;
+ struct mmu_update *mmu;
+ struct multicall_entry *mcl;
+ struct netif_rx_response *rx = &rinfo->rx;
+ struct netif_extra_info *extras = rinfo->extras;
+ RING_IDX cons = np->rx.rsp_cons;
+ struct sk_buff *skb = xennet_get_rx_skb(np, cons);
+ grant_ref_t ref = xennet_get_rx_ref(np, cons);
+ int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
+ int frags = 1;
+ int err = 0;
+ unsigned long ret;
+
+ if (rx->flags & NETRXF_extra_info) {
+ err = xennet_get_extras(np, extras, rp);
+ cons = np->rx.rsp_cons;
+ }
+
+ for (;;) {
+ unsigned long mfn;
+
+ if (unlikely(rx->status < 0 ||
+ rx->offset + rx->status > PAGE_SIZE)) {
+ if (net_ratelimit())
+ WPRINTK("rx->offset: %x, size: %u\n",
+ rx->offset, rx->status);
+ err = -EINVAL;
+ goto next;
+ }
/*
* This definitely indicates a bug, either in this driver or in
* the backend driver. In future this should flag the bad
* situation to the system controller to reboot the backed.
*/
- if ((ref = np->grant_rx_ref[rx->id]) == GRANT_INVALID_REF) {
+ if (ref == GRANT_INVALID_REF) {
WPRINTK("Bad rx response id %d.\n", rx->id);
+ err = -EINVAL;
+ goto next;
+ }
+
+ if (!np->copying_receiver) {
+ /* Memory pressure, insufficient buffer
+ * headroom, ... */
+ if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
+ if (net_ratelimit())
+ WPRINTK("Unfulfilled rx req "
+ "(id=%d, st=%d).\n",
+ rx->id, rx->status);
+ xennet_move_rx_slot(np, skb, ref);
+ err = -ENOMEM;
+ goto next;
+ }
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Remap the page. */
+ struct page *page =
+ skb_shinfo(skb)->frags[0].page;
+ unsigned long pfn = page_to_pfn(page);
+ void *vaddr = page_address(page);
+
+ mcl = np->rx_mcl + pages_flipped;
+ mmu = np->rx_mmu + pages_flipped;
+
+ MULTI_update_va_mapping(mcl,
+ (unsigned long)vaddr,
+ pfn_pte_ma(mfn,
+ PAGE_KERNEL),
+ 0);
+ mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
+ | MMU_MACHPHYS_UPDATE;
+ mmu->val = pfn;
+
+ set_phys_to_machine(pfn, mfn);
+ }
+ pages_flipped++;
+ } else {
+ ret = gnttab_end_foreign_access_ref(ref, 0);
+ BUG_ON(!ret);
+ }
+
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+
+ __skb_queue_tail(list, skb);
+
+next:
+ if (!(rx->flags & NETRXF_more_data))
+ break;
+
+ if (cons + frags == rp) {
+ if (net_ratelimit())
+ WPRINTK("Need more frags\n");
+ err = -ENOENT;
+ break;
+ }
+
+ rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+ skb = xennet_get_rx_skb(np, cons + frags);
+ ref = xennet_get_rx_ref(np, cons + frags);
+ frags++;
+ }
+
+ if (unlikely(frags > max)) {
+ if (net_ratelimit())
+ WPRINTK("Too many frags\n");
+ err = -E2BIG;
+ }
+
+ *pages_flipped_p = pages_flipped;
+
+ return err;
+}
+
+static RING_IDX xennet_fill_frags(struct netfront_info *np,
+ struct sk_buff *skb,
+ struct sk_buff_head *list)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int nr_frags = shinfo->nr_frags;
+ RING_IDX cons = np->rx.rsp_cons;
+ skb_frag_t *frag = shinfo->frags + nr_frags;
+ struct sk_buff *nskb;
+
+ while ((nskb = __skb_dequeue(list))) {
+ struct netif_rx_response *rx =
+ RING_GET_RESPONSE(&np->rx, ++cons);
+
+ frag->page = skb_shinfo(nskb)->frags[0].page;
+ frag->page_offset = rx->offset;
+ frag->size = rx->status;
+
+ skb->data_len += rx->status;
+
+ skb_shinfo(nskb)->nr_frags = 0;
+ kfree_skb(nskb);
+
+ frag++;
+ nr_frags++;
+ }
+
+ shinfo->nr_frags = nr_frags;
+ return cons;
+}
+
+static int xennet_set_skb_gso(struct sk_buff *skb,
+ struct netif_extra_info *gso)
+{
+ if (!gso->u.gso.size) {
+ if (net_ratelimit())
+ WPRINTK("GSO size must not be zero.\n");
+ return -EINVAL;
+ }
+
+ /* Currently only TCPv4 S.O. is supported. */
+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+ if (net_ratelimit())
+ WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
+ return -EINVAL;
+ }
+
+#ifdef HAVE_GSO
+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ /* Header must be checked, and gso_segs computed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+
+ return 0;
+#else
+ if (net_ratelimit())
+ WPRINTK("GSO unsupported by this kernel.\n");
+ return -EINVAL;
+#endif
+}
+
+static int netif_poll(struct net_device *dev, int *pbudget)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ struct sk_buff *skb;
+ struct netfront_rx_info rinfo;
+ struct netif_rx_response *rx = &rinfo.rx;
+ struct netif_extra_info *extras = rinfo.extras;
+ RING_IDX i, rp;
+ struct multicall_entry *mcl;
+ int work_done, budget, more_to_do = 1;
+ struct sk_buff_head rxq;
+ struct sk_buff_head errq;
+ struct sk_buff_head tmpq;
+ unsigned long flags;
+ unsigned int len;
+ int pages_flipped = 0;
+ int err;
+
+ spin_lock(&np->rx_lock);
+
+ if (unlikely(!netif_carrier_ok(dev))) {
+ spin_unlock(&np->rx_lock);
+ return 0;
+ }
+
+ skb_queue_head_init(&rxq);
+ skb_queue_head_init(&errq);
+ skb_queue_head_init(&tmpq);
+
+ if ((budget = *pbudget) > dev->quota)
+ budget = dev->quota;
+ rp = np->rx.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ for (i = np->rx.rsp_cons, work_done = 0;
+ (i != rp) && (work_done < budget);
+ np->rx.rsp_cons = ++i, work_done++) {
+ memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+ memset(extras, 0, sizeof(extras));
+
+ err = xennet_get_responses(np, &rinfo, rp, &tmpq,
+ &pages_flipped);
+
+ if (unlikely(err)) {
+err:
+ i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1;
work_done--;
+ while ((skb = __skb_dequeue(&tmpq)))
+ __skb_queue_tail(&errq, skb);
+ np->stats.rx_errors++;
continue;
}
- /* Memory pressure, insufficient buffer headroom, ... */
- if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
- if (net_ratelimit())
- WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
- rx->id, rx->status);
- RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id =
- rx->id;
- RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref =
- ref;
- np->rx.req_prod_pvt++;
- RING_PUSH_REQUESTS(&np->rx);
- work_done--;
- continue;
+ skb = __skb_dequeue(&tmpq);
+
+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+ struct netif_extra_info *gso;
+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+ if (unlikely(xennet_set_skb_gso(skb, gso))) {
+ __skb_queue_head(&tmpq, skb);
+ goto err;
+ }
}
- gnttab_release_grant_reference(&np->gref_rx_head, ref);
- np->grant_rx_ref[rx->id] = GRANT_INVALID_REF;
-
- skb = np->rx_skbs[rx->id];
- add_id_to_freelist(np->rx_skbs, rx->id);
-
- /* NB. We handle skb overflow later. */
- skb->data = skb->head + rx->offset;
- skb->len = rx->status;
- skb->tail = skb->data + skb->len;
+ skb->nh.raw = (void *)skb_shinfo(skb)->frags[0].page;
+ skb->h.raw = skb->nh.raw + rx->offset;
+
+ len = rx->status;
+ if (len > RX_COPY_THRESHOLD)
+ len = RX_COPY_THRESHOLD;
+ skb_put(skb, len);
+
+ if (rx->status > len) {
+ skb_shinfo(skb)->frags[0].page_offset =
+ rx->offset + len;
+ skb_shinfo(skb)->frags[0].size = rx->status - len;
+ skb->data_len = rx->status - len;
+ } else {
+ skb_shinfo(skb)->frags[0].page = NULL;
+ skb_shinfo(skb)->nr_frags = 0;
+ }
+
+ i = xennet_fill_frags(np, skb, &tmpq);
+
+ /*
+ * Truesize must approximates the size of true data plus
+ * any supervisor overheads. Adding hypervisor overheads
+ * has been shown to significantly reduce achievable
+ * bandwidth with the default receive buffer size. It is
+ * therefore not wise to account for it here.
+ *
+ * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
+ * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
+ * add the size of the data pulled in xennet_fill_frags().
+ *
+ * We also adjust for any unused space in the main data
+ * area by subtracting (RX_COPY_THRESHOLD - len). This is
+ * especially important with drivers which split incoming
+ * packets into header and data, using only 66 bytes of
+ * the main data area (see the e1000 driver for example.)
+ * On such systems, without this last adjustement, our
+ * achievable receive throughout using the standard receive
+ * buffer size was cut by 25%(!!!).
+ */
+ skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
+ skb->len += skb->data_len;
/*
* Old backends do not assert data_validated but we
* can infer it from csum_blank so test both flags.
*/
- if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) {
+ if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank))
skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->proto_data_valid = 1;
- } else {
+ else
skb->ip_summed = CHECKSUM_NONE;
- skb->proto_data_valid = 0;
+#ifdef CONFIG_XEN
+ skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE);
+ skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
+#endif
+ np->stats.rx_packets++;
+ np->stats.rx_bytes += skb->len;
+
+ __skb_queue_tail(&rxq, skb);
+ }
+
+ if (pages_flipped) {
+ /* Some pages are no longer absent... */
+ balloon_update_driver_allowance(-pages_flipped);
+
+ /* Do all the remapping work and M2P updates. */
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ mcl = np->rx_mcl + pages_flipped;
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)np->rx_mmu;
+ mcl->args[1] = pages_flipped;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+ (void)HYPERVISOR_multicall(np->rx_mcl,
+ pages_flipped + 1);
}
- skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
-
- np->stats.rx_packets++;
- np->stats.rx_bytes += rx->status;
-
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /* Remap the page. */
- MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
- pfn_pte_ma(mfn, PAGE_KERNEL),
- 0);
- mcl++;
- mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
- | MMU_MACHPHYS_UPDATE;
- mmu->val = __pa(skb->head) >> PAGE_SHIFT;
- mmu++;
-
- set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT,
- mfn);
- }
-
- __skb_queue_tail(&rxq, skb);
- }
-
- /* Some pages are no longer absent... */
- balloon_update_driver_allowance(-work_done);
-
- /* Do all the remapping work, and M2P updates, in one big hypercall. */
- if (likely((mcl - np->rx_mcl) != 0)) {
- mcl->op = __HYPERVISOR_mmu_update;
- mcl->args[0] = (unsigned long)np->rx_mmu;
- mcl->args[1] = mmu - np->rx_mmu;
- mcl->args[2] = 0;
- mcl->args[3] = DOMID_SELF;
- mcl++;
- (void)HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
- }
+ }
+
+ while ((skb = __skb_dequeue(&errq)))
+ kfree_skb(skb);
while ((skb = __skb_dequeue(&rxq)) != NULL) {
- if (skb->len > (dev->mtu + ETH_HLEN + 4)) {
- if (net_ratelimit())
- printk(KERN_INFO "Received packet too big for "
- "MTU (%d > %d)\n",
- skb->len - ETH_HLEN - 4, dev->mtu);
- skb->len = 0;
- skb->tail = skb->data;
- init_skb_shinfo(skb);
- dev_kfree_skb(skb);
- continue;
- }
-
- /*
- * Enough room in skbuff for the data we were passed? Also,
- * Linux expects at least 16 bytes headroom in each rx buffer.
- */
- if (unlikely(skb->tail > skb->end) ||
- unlikely((skb->data - skb->head) < 16)) {
- if (net_ratelimit()) {
- if (skb->tail > skb->end)
- printk(KERN_INFO "Received packet "
- "is %zd bytes beyond tail.\n",
- skb->tail - skb->end);
- else
- printk(KERN_INFO "Received packet "
- "is %zd bytes before head.\n",
- 16 - (skb->data - skb->head));
- }
-
- nskb = __dev_alloc_skb(skb->len + 2,
- GFP_ATOMIC|__GFP_NOWARN);
- if (nskb != NULL) {
- skb_reserve(nskb, 2);
- skb_put(nskb, skb->len);
- memcpy(nskb->data, skb->data, skb->len);
- /* Copy any other fields we already set up. */
- nskb->dev = skb->dev;
- nskb->ip_summed = skb->ip_summed;
- nskb->proto_data_valid = skb->proto_data_valid;
- nskb->proto_csum_blank = skb->proto_csum_blank;
- }
-
- /* Reinitialise and then destroy the old skbuff. */
- skb->len = 0;
- skb->tail = skb->data;
- init_skb_shinfo(skb);
- dev_kfree_skb(skb);
-
- /* Switch old for new, if we copied the buffer. */
- if ((skb = nskb) == NULL)
- continue;
- }
-
- /* Set the shinfo area, which is hidden behind the data. */
- init_skb_shinfo(skb);
+ struct page *page = (struct page *)skb->nh.raw;
+ void *vaddr = page_address(page);
+
+ memcpy(skb->data, vaddr + (skb->h.raw - skb->nh.raw),
+ skb_headlen(skb));
+
+ if (page != skb_shinfo(skb)->frags[0].page)
+ __free_page(page);
+
/* Ethernet work: Delayed to here as it peeks the header. */
skb->protocol = eth_type_trans(skb, dev);
@@ -994,8 +1386,6 @@ static int netif_poll(struct net_device
netif_receive_skb(skb);
dev->last_rx = jiffies;
}
-
- np->rx.rsp_cons = i;
/* If we get a callback with very few responses, reduce fill target. */
/* NB. Note exponential increase, linear decrease. */
@@ -1024,74 +1414,12 @@ static int netif_poll(struct net_device
return more_to_do;
}
-
-static int network_close(struct net_device *dev)
-{
- struct netfront_info *np = netdev_priv(dev);
- netif_stop_queue(np->netdev);
- return 0;
-}
-
-
-static struct net_device_stats *network_get_stats(struct net_device *dev)
-{
- struct netfront_info *np = netdev_priv(dev);
- return &np->stats;
-}
-
-static int xennet_change_mtu(struct net_device *dev, int mtu)
-{
- int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
-
- if (mtu > max)
- return -EINVAL;
- dev->mtu = mtu;
- return 0;
-}
-
-static int xennet_set_sg(struct net_device *dev, u32 data)
-{
- if (data) {
- struct netfront_info *np = netdev_priv(dev);
- int val;
-
- if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
- "%d", &val) < 0)
- val = 0;
- if (!val)
- return -ENOSYS;
- } else if (dev->mtu > ETH_DATA_LEN)
- dev->mtu = ETH_DATA_LEN;
-
- return ethtool_op_set_sg(dev, data);
-}
-
-static void xennet_set_features(struct net_device *dev)
-{
- xennet_set_sg(dev, 1);
-}
-
-static void network_connect(struct net_device *dev)
-{
- struct netfront_info *np = netdev_priv(dev);
- int i, requeue_idx;
+static void netif_release_tx_bufs(struct netfront_info *np)
+{
struct sk_buff *skb;
-
- xennet_set_features(dev);
-
- spin_lock_irq(&np->tx_lock);
- spin_lock(&np->rx_lock);
-
- /*
- * Recovery procedure:
- * NB. Freelist index entries are always going to be less than
- * PAGE_OFFSET, whereas pointers to skbs will always be equal or
- * greater than PAGE_OFFSET: we use this property to distinguish
- * them.
- */
-
- /* Step 1: Discard all pending TX packet fragments. */
- for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
+ int i;
+
+ for (i = 1; i <= NET_TX_RING_SIZE; i++) {
if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
continue;
@@ -1104,22 +1432,218 @@ static void network_connect(struct net_d
add_id_to_freelist(np->tx_skbs, i);
dev_kfree_skb_irq(skb);
}
+}
+
+static void netif_release_rx_bufs(struct netfront_info *np)
+{
+ struct mmu_update *mmu = np->rx_mmu;
+ struct multicall_entry *mcl = np->rx_mcl;
+ struct sk_buff_head free_list;
+ struct sk_buff *skb;
+ unsigned long mfn;
+ int xfer = 0, noxfer = 0, unused = 0;
+ int id, ref;
+
+ if (np->copying_receiver) {
+ printk("%s: fix me for copying receiver.\n", __FUNCTION__);
+ return;
+ }
+
+ skb_queue_head_init(&free_list);
+
+ spin_lock(&np->rx_lock);
+
+ for (id = 0; id < NET_RX_RING_SIZE; id++) {
+ if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
+ unused++;
+ continue;
+ }
+
+ skb = np->rx_skbs[id];
+ mfn = gnttab_end_foreign_transfer_ref(ref);
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+ np->grant_rx_ref[id] = GRANT_INVALID_REF;
+ add_id_to_freelist(np->rx_skbs, id);
+
+ if (0 == mfn) {
+ struct page *page = skb_shinfo(skb)->frags[0].page;
+ balloon_release_driver_page(page);
+ skb_shinfo(skb)->nr_frags = 0;
+ dev_kfree_skb(skb);
+ noxfer++;
+ continue;
+ }
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Remap the page. */
+ struct page *page = skb_shinfo(skb)->frags[0].page;
+ unsigned long pfn = page_to_pfn(page);
+ void *vaddr = page_address(page);
+
+ MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
+ pfn_pte_ma(mfn, PAGE_KERNEL),
+ 0);
+ mcl++;
+ mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
+ | MMU_MACHPHYS_UPDATE;
+ mmu->val = pfn;
+ mmu++;
+
+ set_phys_to_machine(pfn, mfn);
+ }
+ __skb_queue_tail(&free_list, skb);
+ xfer++;
+ }
+
+ printk("%s: %d xfer, %d noxfer, %d unused\n",
+ __FUNCTION__, xfer, noxfer, unused);
+
+ if (xfer) {
+ /* Some pages are no longer absent... */
+ balloon_update_driver_allowance(-xfer);
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Do all the remapping work and M2P updates. */
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)np->rx_mmu;
+ mcl->args[1] = mmu - np->rx_mmu;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+ mcl++;
+ HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
+ }
+ }
+
+ while ((skb = __skb_dequeue(&free_list)) != NULL)
+ dev_kfree_skb(skb);
+
+ spin_unlock(&np->rx_lock);
+}
+
+static int network_close(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ netif_stop_queue(np->netdev);
+ return 0;
+}
+
+
+static struct net_device_stats *network_get_stats(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ return &np->stats;
+}
+
+static int xennet_change_mtu(struct net_device *dev, int mtu)
+{
+ int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+ if (mtu > max)
+ return -EINVAL;
+ dev->mtu = mtu;
+ return 0;
+}
+
+static int xennet_set_sg(struct net_device *dev, u32 data)
+{
+ if (data) {
+ struct netfront_info *np = netdev_priv(dev);
+ int val;
+
+ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
+ "%d", &val) < 0)
+ val = 0;
+ if (!val)
+ return -ENOSYS;
+ } else if (dev->mtu > ETH_DATA_LEN)
+ dev->mtu = ETH_DATA_LEN;
+
+ return ethtool_op_set_sg(dev, data);
+}
+
+static int xennet_set_tso(struct net_device *dev, u32 data)
+{
+#ifdef HAVE_GSO
+ if (data) {
+ struct netfront_info *np = netdev_priv(dev);
+ int val;
+
+ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-gso-tcpv4", "%d", &val) < 0)
+ val = 0;
+ if (!val)
+ return -ENOSYS;
+ }
+
+ return ethtool_op_set_tso(dev, data);
+#else
+ return -ENOSYS;
+#endif
+}
+
+static void xennet_set_features(struct net_device *dev)
+{
+ dev_disable_gso_features(dev);
+ xennet_set_sg(dev, 0);
+
+ /* We need checksum offload to enable scatter/gather and TSO. */
+ if (!(dev->features & NETIF_F_IP_CSUM))
+ return;
+
+ if (!xennet_set_sg(dev, 1))
+ xennet_set_tso(dev, 1);
+}
+
+static void network_connect(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ int i, requeue_idx;
+ struct sk_buff *skb;
+ grant_ref_t ref;
+ netif_rx_request_t *req;
+
+ xennet_set_features(dev);
+
+ spin_lock_irq(&np->tx_lock);
+ spin_lock(&np->rx_lock);
+
+ /*
+ * Recovery procedure:
+ * NB. Freelist index entries are always going to be less than
+ * PAGE_OFFSET, whereas pointers to skbs will always be equal or
+ * greater than PAGE_OFFSET: we use this property to distinguish
+ * them.
+ */
+
+ /* Step 1: Discard all pending TX packet fragments. */
+ netif_release_tx_bufs(np);
/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
- for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) {
- if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET)
+ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+ if (!np->rx_skbs[i])
continue;
- gnttab_grant_foreign_transfer_ref(
- np->grant_rx_ref[i], np->xbdev->otherend_id,
- __pa(np->rx_skbs[i]->data) >> PAGE_SHIFT);
- RING_GET_REQUEST(&np->rx, requeue_idx)->gref =
- np->grant_rx_ref[i];
- RING_GET_REQUEST(&np->rx, requeue_idx)->id = i;
+
+ skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
+ ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
+ req = RING_GET_REQUEST(&np->rx, requeue_idx);
+
+ if (!np->copying_receiver) {
+ gnttab_grant_foreign_transfer_ref(
+ ref, np->xbdev->otherend_id,
+ page_to_pfn(skb_shinfo(skb)->frags->page));
+ } else {
+ gnttab_grant_foreign_access_ref(
+ ref, np->xbdev->otherend_id,
+ page_to_pfn(skb_shinfo(skb)->frags->page),
+ 0);
+ }
+ req->gref = ref;
+ req->id = requeue_idx;
+
requeue_idx++;
}
np->rx.req_prod_pvt = requeue_idx;
- RING_PUSH_REQUESTS(&np->rx);
/*
* Step 3: All public and private state should now be sane. Get
@@ -1139,6 +1663,8 @@ static void netif_uninit(struct net_devi
static void netif_uninit(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
+ netif_release_tx_bufs(np);
+ netif_release_rx_bufs(np);
gnttab_free_grant_references(np->gref_tx_head);
gnttab_free_grant_references(np->gref_rx_head);
}
@@ -1149,6 +1675,9 @@ static struct ethtool_ops network_ethtoo
.set_tx_csum = ethtool_op_set_tx_csum,
.get_sg = ethtool_op_get_sg,
.set_sg = xennet_set_sg,
+ .get_tso = ethtool_op_get_tso,
+ .set_tso = xennet_set_tso,
+ .get_link = ethtool_op_get_link,
};
#ifdef CONFIG_SYSFS
@@ -1294,13 +1823,8 @@ static void network_set_multicast_list(s
{
}
-/** Create a network device.
- * @param handle device handle
- * @param val return parameter for created device
- * @return 0 on success, error code otherwise
- */
-static struct net_device * __devinit create_netdev(int handle,
- struct xenbus_device *dev)
+static struct net_device * __devinit
+create_netdev(int handle, int copying_receiver, struct xenbus_device *dev)
{
int i, err = 0;
struct net_device *netdev = NULL;
@@ -1313,9 +1837,10 @@ static struct net_device * __devinit cre
return ERR_PTR(-ENOMEM);
}
- np = netdev_priv(netdev);
- np->handle = handle;
- np->xbdev = dev;
+ np = netdev_priv(netdev);
+ np->handle = handle;
+ np->xbdev = dev;
+ np->copying_receiver = copying_receiver;
netif_carrier_off(netdev);
@@ -1337,8 +1862,8 @@ static struct net_device * __devinit cre
np->grant_tx_ref[i] = GRANT_INVALID_REF;
}
- for (i = 0; i <= NET_RX_RING_SIZE; i++) {
- np->rx_skbs[i] = (void *)((unsigned long) i+1);
+ for (i = 0; i < NET_RX_RING_SIZE; i++) {
+ np->rx_skbs[i] = NULL;
np->grant_rx_ref[i] = GRANT_INVALID_REF;
}
@@ -1372,27 +1897,9 @@ static struct net_device * __devinit cre
SET_MODULE_OWNER(netdev);
SET_NETDEV_DEV(netdev, &dev->dev);
- err = register_netdev(netdev);
- if (err) {
- printk(KERN_WARNING "%s> register_netdev err=%d\n",
- __FUNCTION__, err);
- goto exit_free_rx;
- }
-
- err = xennet_sysfs_addif(netdev);
- if (err) {
- /* This can be non-fatal: it only means no tuning parameters */
- printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
- __FUNCTION__, err);
- }
-
np->netdev = netdev;
-
return netdev;
-
- exit_free_rx:
- gnttab_free_grant_references(np->gref_rx_head);
exit_free_tx:
gnttab_free_grant_references(np->gref_tx_head);
exit:
@@ -1431,10 +1938,9 @@ static void netfront_closing(struct xenb
{
struct netfront_info *info = dev->dev.driver_data;
- DPRINTK("netfront_closing: %s removed\n", dev->nodename);
+ DPRINTK("%s\n", dev->nodename);
close_netdev(info);
-
xenbus_switch_state(dev, XenbusStateClosed);
}
@@ -1451,6 +1957,26 @@ static int __devexit netfront_remove(str
return 0;
}
+
+static int open_netdev(struct netfront_info *info)
+{
+ int err;
+
+ err = register_netdev(info->netdev);
+ if (err) {
+ printk(KERN_WARNING "%s: register_netdev err=%d\n",
+ __FUNCTION__, err);
+ return err;
+ }
+
+ err = xennet_sysfs_addif(info->netdev);
+ if (err) {
+ /* This can be non-fatal: it only means no tuning parameters */
+ printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
+ __FUNCTION__, err);
+ }
+ return 0;
+}
static void close_netdev(struct netfront_info *info)
{
@@ -1529,7 +2055,7 @@ static int __init netif_init(void)
if (!is_running_on_xen())
return -ENODEV;
- if (xen_start_info->flags & SIF_INITDOMAIN)
+ if (is_initial_xendomain())
return 0;
IPRINTK("Initialising virtual ethernet driver.\n");
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
|